Batch-process a folder of .hwp files

Open β†’ transform β†’ save, for every file in a directory

μžλ™ν™” μš”μ²­ 1μˆœμœ„ β€” 폴더에 μžˆλŠ” λͺ¨λ“  .hwp λ₯Ό μ°¨λ‘€λ‘œ μ—΄κ³ , μ–΄λ–€ λ³€ν™˜μ„ μ μš©ν•œ λ’€, λ‹€λ₯Έ 폴더에 μ €μž₯. ν•œ 개의 App μΈμŠ€ν„΄μŠ€λ₯Ό μž¬μ‚¬μš©ν•˜κΈ° λ•Œλ¬Έμ— HWP 엔진을 N번 λ„μš°μ§€ μ•Šμ•„λ„ λ˜μ–΄ λΉ λ¦…λ‹ˆλ‹€.

1. κ°€μž₯ λ‹¨μˆœν•œ ν˜•νƒœ β€” λͺ¨λ“  .hwp β†’ .pdf

from pathlib import Path
from hwpapi import App

src_dir = Path("contracts")
dst_dir = Path("contracts_pdf")
dst_dir.mkdir(exist_ok=True)

with App() as app:
    for hwp in sorted(src_dir.glob("*.hwp")):
        app.open(str(hwp.absolute()))
        out = dst_dir / hwp.with_suffix(".pdf").name
        app.save(str(out.absolute()))
        app.close()
        print(f"βœ“ {hwp.name} β†’ {out.name}")

2. 일괄 find/replace ν›„ μ €μž₯

from pathlib import Path
from hwpapi import App

substitutions = {
    "[[YEAR]]": "2026",
    "[[QUARTER]]": "Q1",
    "[[CONTACT]]": "ops@example.com",
}

with App() as app:
    for hwp in Path("templates").glob("*.hwp"):
        app.open(str(hwp.absolute()))

        for needle, replacement in substitutions.items():
            app.doc.find_replace(needle, replacement, all=True)

        out = Path("rendered") / hwp.name
        out.parent.mkdir(exist_ok=True)
        app.save(str(out.absolute()))
        app.close()

3. 데이터 ν•œ ν–‰ β†’ λ¬Έμ„œ ν•œ 개 (메일머지 ν˜• μ›Œν¬ν”Œλ‘œ)

CSV/JSON 의 ν–‰ λ‹¨μœ„λ‘œ ν…œν”Œλ¦Ώμ„ μ±„μ›Œ N개의 결과물을 λ§Œλ“­λ‹ˆλ‹€. fill-fields λ ˆμ‹œν”Όμ™€ 같은 νŒ¨ν„΄μ΄μ§€λ§Œ, μ—¬κΈ°μ„œλŠ” 좜λ ₯을 PDF 둜 λ°”λ‘œ λ–¨κ΅½λ‹ˆλ‹€.

import csv
from pathlib import Path
from hwpapi import App

template = "templates/invoice.hwp"
out_dir = Path("out_invoices")
out_dir.mkdir(exist_ok=True)

with App() as app, open("clients.csv", encoding="utf-8-sig") as f:
    reader = csv.DictReader(f)
    for row in reader:
        app.open(template)

        for field, value in row.items():
            if field in app.doc.fields:
                app.doc.fields[field] = value

        pdf = out_dir / f"invoice-{row['client_id']}.pdf"
        app.save(str(pdf.absolute()))
        app.close()
        print(f"  β†’ {pdf.name}")

4. κ²¬κ³ ν•œ μ—λŸ¬ 처리 β€” ν•œ 파일 μ‹€νŒ¨ν•΄λ„ μ§„ν–‰

배치 μž‘μ—…μ—μ„œλŠ” ν•œ 파일이 κΉ¨μ‘Œλ‹€κ³  λ©ˆμΆ”λ©΄ μ•ˆ λ©λ‹ˆλ‹€. hwpapi.errors.HwpApiError 계열을 μž‘μ•„ λ‘œκ·Έν•˜κ³  λ‹€μŒ 파일둜 λ„˜μ–΄κ°€μ„Έμš”.

import logging
from pathlib import Path
from hwpapi import App
from hwpapi.errors import HwpApiError

logging.basicConfig(level=logging.INFO)
log = logging.getLogger("batch")

results = {"ok": 0, "fail": []}

with App() as app:
    for hwp in Path("input").glob("*.hwp"):
        try:
            app.open(str(hwp.absolute()))
            app.save(f"output/{hwp.stem}.pdf")
            app.close()
            results["ok"] += 1
        except HwpApiError as exc:
            log.warning(f"skip {hwp.name}: {exc}")
            results["fail"].append(hwp.name)
            try:
                app.close(save=False)
            except Exception:
                pass

log.info(f"converted {results['ok']}, failed {len(results['fail'])}")

5. μ§„ν–‰λ₯  ν‘œμ‹œ β€” tqdm 톡합

from pathlib import Path
from hwpapi import App
from tqdm import tqdm

files = sorted(Path("input").glob("*.hwp"))

with App() as app:
    for hwp in tqdm(files, desc="HWP→PDF"):
        app.open(str(hwp.absolute()))
        app.save(f"output/{hwp.stem}.pdf")
        app.close()
Warning

병렬 처리 주의 β€” HWP COM μ„œλ²„λŠ” 단일 μŠ€λ ˆλ“œλ₯Ό κ°€μ •ν•©λ‹ˆλ‹€. multiprocessing 으둜 ν”„λ‘œμ„ΈμŠ€λ₯Ό λΆ„λ¦¬ν•˜μ§€ μ•ŠλŠ” ν•œ threading / asyncio 둜 병렬화해도 빨라지지 μ•ŠμŠ΅λ‹ˆλ‹€ (였히렀 dead-lock μœ„ν—˜). ν”„λ‘œμ„ΈμŠ€ λ‹¨μœ„λ‘œ λ‚˜λˆŒ λ•Œλ„ 각 ν”„λ‘œμ„ΈμŠ€κ°€ 자체 App() 을 λ“€μ–΄μ•Ό ν•˜λ©°, HWP μΈμŠ€ν„΄μŠ€ 수만큼 λ©”λͺ¨λ¦¬λ₯Ό λ¨ΉμŠ΅λ‹ˆλ‹€.

See also

Back to top