Skip to main content

Advanced Usage

Learn advanced patterns for working with PDFDancer, including custom fonts, batch processing, complex workflows, and performance optimization.


Context Managers and Resource Management

Python Context Manager Pattern

from pdfdancer import PDFDancer

# Recommended: Use context manager for automatic cleanup
with PDFDancer.open("document.pdf") as pdf:
# Perform operations
pdf.page(1).select_paragraphs()[0].delete()
pdf.save("output.pdf")
# Session automatically closed

# Manual management (not recommended)
pdf = PDFDancer.open("document.pdf")
try:
pdf.page(1).select_paragraphs()[0].delete()
pdf.save("output.pdf")
finally:
pdf.close() # Must manually close

Using the context manager ensures sessions are properly cleaned up even if exceptions occur.

Thread Safety Warning

PDFDancer sessions are not thread-safe. Each session instance must be accessed from only one thread at a time. Never share session objects across threads or use them in concurrent operations.

For parallel processing, create a separate session instance for each thread. See the Thread Safety section for detailed examples.


Custom Font Management

Finding and Using Service Fonts

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
# Search for available fonts on the service
roboto_fonts = pdf.find_fonts("Roboto", 12)

if roboto_fonts:
# Use the first match
font = roboto_fonts[0]
print(f"Using: {font.name} at {font.size}pt")

pdf.new_paragraph() \
.text("Text with service font") \
.font(font.name, font.size) \
.at(page_number=1, x=100, y=500) \
.add()

pdf.save("output.pdf")

Uploading Custom Fonts

from pathlib import Path
from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
# Register custom TTF font
custom_font_path = Path("fonts/CustomFont.ttf")
pdf.register_font(str(custom_font_path))

# Now use the custom font
pdf.new_paragraph() \
.text("Text with custom font") \
.font("CustomFont", 14) \
.at(page_number=1, x=100, y=500) \
.add()

pdf.save("output.pdf")

Batch Processing Multiple PDFs

from pathlib import Path
from pdfdancer import PDFDancer, Color

def process_invoice(input_path: Path, output_dir: Path) -> None:
"""Process a single invoice PDF."""
with PDFDancer.open(input_path) as pdf:
# Add PAID watermark
pdf.new_paragraph() \
.text("PAID") \
.font("Helvetica-Bold", 72) \
.color(Color(0, 200, 0)) \
.at(page_number=1, x=200, y=400) \
.add()

# Save to output directory
output_path = output_dir / input_path.name
pdf.save(str(output_path))


def batch_process_invoices(input_dir: Path, output_dir: Path) -> None:
"""Process all PDFs in a directory."""
output_dir.mkdir(parents=True, exist_ok=True)

# Find all PDF files
pdf_files = list(input_dir.glob("*.pdf"))

print(f"Processing {len(pdf_files)} invoices...")

for i, pdf_file in enumerate(pdf_files, 1):
try:
print(f"[{i}/{len(pdf_files)}] Processing {pdf_file.name}...")
process_invoice(pdf_file, output_dir)
print(f" ✓ Saved to {output_dir / pdf_file.name}")
except Exception as e:
print(f" ✗ Error: {e}")

print("Batch processing complete!")


# Usage
batch_process_invoices(
input_dir=Path("invoices/pending"),
output_dir=Path("invoices/processed")
)

Complex Editing Workflows

Conditional Content Replacement

from pdfdancer import PDFDancer, Color

def redact_sensitive_info(pdf_path: str, output_path: str) -> None:
"""Find and redact sensitive information in a PDF."""
sensitive_keywords = ["SSN:", "Credit Card:", "Password:"]

with PDFDancer.open(pdf_path) as pdf:
# Search all paragraphs for sensitive keywords
all_paragraphs = pdf.select_paragraphs()

for paragraph in all_paragraphs:
for keyword in sensitive_keywords:
if keyword in paragraph.text:
# Replace with REDACTED
paragraph.edit() \
.replace("[REDACTED]") \
.color(Color(0, 0, 0)) \
.apply()
print(f"Redacted: {keyword} in paragraph")
break

pdf.save(output_path)


redact_sensitive_info("document.pdf", "redacted.pdf")

Template-Based Document Generation

from pdfdancer import PDFDancer, Color
from datetime import datetime

def generate_certificate(
template_path: str,
output_path: str,
student_name: str,
course_name: str,
completion_date: str
) -> None:
"""Generate a certificate from an existing template by replacing placeholders."""
with PDFDancer.open(template_path) as pdf:
# Replace placeholder fields
placeholders = {
"{{STUDENT_NAME}}": student_name,
"{{COURSE_NAME}}": course_name,
"{{DATE}}": completion_date
}

for placeholder, value in placeholders.items():
# Find paragraphs with placeholders
matches = pdf.select_paragraphs_starting_with(placeholder)

for match in matches:
match.edit().replace(value).apply()

# Add signature image
pdf.new_image() \
.from_file("signature.png") \
.at(page=1, x=400, y=100) \
.add()

pdf.save(output_path)


# Generate certificates for multiple students
students = [
("Alice Johnson", "Python Programming", "2024-01-15"),
("Bob Smith", "Python Programming", "2024-01-15"),
("Carol Davis", "Python Programming", "2024-01-15")
]

for name, course, date in students:
output = f"certificates/{name.replace(' ', '_')}.pdf"
generate_certificate("template.pdf", output, name, course, date)
print(f"Generated certificate for {name}")

Performance Optimization

Minimizing API Calls

from pdfdancer import PDFDancer, Color

# Less efficient: Multiple API calls
with PDFDancer.open("document.pdf") as pdf:
for i in range(10):
para = pdf.page(1).select_paragraphs()[i]
para.delete() # 10 separate API calls

# More efficient: Batch operations
with PDFDancer.open("document.pdf") as pdf:
paragraphs = pdf.page(1).select_paragraphs()[:10] # Single fetch
for para in paragraphs:
para.delete() # Batch deletions

Reusing Sessions

from pdfdancer import PDFDancer

# Process multiple operations in a single session
with PDFDancer.open("document.pdf") as pdf:
# Operation 1: Edit text
paragraphs = pdf.select_paragraphs_starting_with("Invoice")
if paragraphs:
paragraphs[0].edit().replace("PAID").apply()

# Operation 2: Add watermark
pdf.new_paragraph() \
.text("CONFIDENTIAL") \
.at(page_number=1, x=200, y=400) \
.add()

# Operation 3: Add image
pdf.new_image() \
.from_file("logo.png") \
.at(page=1, x=50, y=750) \
.add()

# Single save operation
pdf.save("output.pdf")

Next Steps