def render_pdf(d, path):
"""Draw a realistic one-page invoice: header, meta, bill/ship, line items, totals, payment."""
from reportlab.lib.pagesizes import LETTER
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.platypus import (SimpleDocTemplate, Paragraph, Spacer,
Table, TableStyle)
c = compute(d)
sym = d["currency_symbol"]
def money(x): return f"{sym}{x:,.2f}"
ss = getSampleStyleSheet()
H1 = ParagraphStyle("H1", parent=ss["Title"], fontSize=18, leading=22, spaceAfter=2)
SMALL= ParagraphStyle("SM", parent=ss["Normal"], fontSize=8.5, textColor=colors.grey, leading=11)
LBL = ParagraphStyle("LBL", parent=ss["Normal"], fontSize=8.5, textColor=colors.HexColor("#2b3a67"),
spaceAfter=1, fontName="Helvetica-Bold")
BODY = ParagraphStyle("BODY", parent=ss["Normal"], fontSize=9.5, leading=13)
RIGHT= ParagraphStyle("R", parent=ss["Normal"], fontSize=16, leading=18, alignment=2,
textColor=colors.HexColor("#2b3a67"), fontName="Helvetica-Bold")
story = []
head = Table([[
[Paragraph(d["vendor_name"], H1), Paragraph(d["vendor_address"], SMALL)],
[Paragraph("INVOICE", RIGHT),
Paragraph(f"{d['invoice_number']}", ParagraphStyle('n', parent=SMALL, alignment=2, fontSize=9.5))],
]], colWidths=[4.2 * inch, 2.8 * inch])
head.setStyle(TableStyle([("VALIGN", (0, 0), (-1, -1), "TOP")]))
story += [head, Spacer(1, 10)]
meta_rows = [["Invoice date", d["invoice_date"], "Due date", d["due_date"]]]
if d.get("po_number"):
meta_rows.append(["PO number", d["po_number"], "Currency", d["currency_code"]])
else:
meta_rows.append(["Currency", d["currency_code"], "", ""])
meta = Table(meta_rows, colWidths=[1.3 * inch, 2.2 * inch, 1.3 * inch, 2.2 * inch])
meta.setStyle(TableStyle([
("FONTSIZE", (0, 0), (-1, -1), 9),
("TEXTCOLOR", (0, 0), (0, -1), colors.HexColor("#2b3a67")),
("TEXTCOLOR", (2, 0), (2, -1), colors.HexColor("#2b3a67")),
("FONTNAME", (0, 0), (0, -1), "Helvetica-Bold"),
("FONTNAME", (2, 0), (2, -1), "Helvetica-Bold"),
("BOTTOMPADDING", (0, 0), (-1, -1), 3), ("TOPPADDING", (0, 0), (-1, -1), 3)]))
story += [meta, Spacer(1, 12)]
bill = [Paragraph("BILL TO", LBL), Paragraph(d["bill_to_name"], BODY),
Paragraph(d["bill_to_address"], SMALL)]
if d.get("ship_to_name"):
ship = [Paragraph("SHIP TO", LBL), Paragraph(d["ship_to_name"], BODY),
Paragraph(d["ship_to_address"], SMALL)]
else:
ship = [Paragraph("SHIP TO", LBL), Paragraph("Same as billing address", SMALL)]
parties = Table([[bill, ship]], colWidths=[3.5 * inch, 3.5 * inch])
parties.setStyle(TableStyle([("VALIGN", (0, 0), (-1, -1), "TOP"),
("LEFTPADDING", (0, 0), (-1, -1), 0)]))
story += [parties, Spacer(1, 14)]
rows = [["Description", "Qty", "Unit price", "Amount"]]
for (desc, q, up, t) in c["items"]:
rows.append([desc, str(q), money(up), money(t)])
items_tbl = Table(rows, colWidths=[3.5 * inch, 0.7 * inch, 1.4 * inch, 1.4 * inch])
items_tbl.setStyle(TableStyle([
("BACKGROUND", (0, 0), (-1, 0), colors.HexColor("#2b3a67")),
("TEXTCOLOR", (0, 0), (-1, 0), colors.white),
("FONTSIZE", (0, 0), (-1, -1), 9.5),
("ALIGN", (1, 0), (-1, -1), "RIGHT"),
("GRID", (0, 0), (-1, -1), 0.4, colors.HexColor("#cdd3e6")),
("ROWBACKGROUNDS", (0, 1), (-1, -1), [colors.white, colors.HexColor("#eef1f8")]),
("LEFTPADDING", (0, 0), (-1, -1), 8), ("TOPPADDING", (0, 0), (-1, -1), 5),
("BOTTOMPADDING", (0, 0), (-1, -1), 5)]))
story += [items_tbl, Spacer(1, 10)]
tot_rows = [["Subtotal", money(c["subtotal"])]]
if c["discount"]:
tot_rows.append(["Discount", "-" + money(c["discount"])])
tot_rows.append([f"Tax ({d['tax_rate']*100:.1f}%)", money(c["tax"])])
tot_rows.append(["TOTAL", money(c["total"])])
totals = Table(tot_rows, colWidths=[1.6 * inch, 1.4 * inch], hAlign="RIGHT")
totals.setStyle(TableStyle([
("FONTSIZE", (0, 0), (-1, -1), 10),
("ALIGN", (0, 0), (-1, -1), "RIGHT"),
("LINEABOVE", (0, -1), (-1, -1), 1.0, colors.HexColor("#2b3a67")),
("FONTNAME", (0, -1), (-1, -1), "Helvetica-Bold"),
("TEXTCOLOR", (0, -1), (-1, -1), colors.HexColor("#2b3a67")),
("TOPPADDING", (0, 0), (-1, -1), 3), ("BOTTOMPADDING", (0, 0), (-1, -1), 3)]))
story += [totals, Spacer(1, 8)]
pay_rows = [["Amount paid", money(c["amount_paid"])],
["Balance due", money(c["balance"])]]
pay = Table(pay_rows, colWidths=[1.6 * inch, 1.4 * inch], hAlign="RIGHT")
due_color = colors.HexColor("#1b7a3d") if c["is_paid"] else colors.HexColor("#7a2e2e")
pay.setStyle(TableStyle([
("FONTSIZE", (0, 0), (-1, -1), 10),
("ALIGN", (0, 0), (-1, -1), "RIGHT"),
("FONTNAME", (0, 1), (-1, 1), "Helvetica-Bold"),
("TEXTCOLOR", (0, 1), (-1, 1), due_color),
("TOPPADDING", (0, 0), (-1, -1), 2), ("BOTTOMPADDING", (0, 0), (-1, -1), 2)]))
status = "PAID IN FULL" if c["is_paid"] else "BALANCE DUE"
story += [pay, Spacer(1, 6),
Paragraph(f"Status: {status}", BODY), Spacer(1, 16),
Paragraph("Notes", LBL), Paragraph(d["notes"], BODY)]
SimpleDocTemplate(path, pagesize=LETTER,
topMargin=0.7 * inch, bottomMargin=0.7 * inch,
leftMargin=0.8 * inch, rightMargin=0.8 * inch).build(story)
print("STEP 3/7 · Generating synthetic invoice PDFs…")
CORPUS = []
for i, d in enumerate(DOCS):
path = f"/content/invoice_{i}.pdf" if os.path.isdir("/content") else f"invoice_{i}.pdf"
render_pdf(d, path)
CORPUS.append((d, ground_truth(d), path))
print(f" ✓ {os.path.basename(path)} — {d['vendor_name']} → {d['bill_to_name']}")
print()
if SHOW_FIRST_PAGE:
try:
import pypdfium2 as pdfium, matplotlib.pyplot as plt
pg = pdfium.PdfDocument(CORPUS[0][2])[0]
img = pg.render(scale=2.0).to_pil()
plt.figure(figsize=(6.4, 8.3)); plt.imshow(img); plt.axis("off")
plt.title("What lift reads — page 1 of invoice_0.pdf", fontsize=10); plt.show()
except Exception as e:
print(" page preview skipped:", e, "\n")