Skip to main content

Working with Text

PDFDancer provides comprehensive tools for working with text in PDFs. You can select existing text, add new paragraphs, edit content, move text, and delete paragraphs.


Selecting Paragraphs

All Paragraphs

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
# Get all paragraphs across the entire document
all_paragraphs = pdf.select_paragraphs()
print(f"Total paragraphs: {len(all_paragraphs)}")

# Get all paragraphs on a specific page
page_paragraphs = pdf.page(1).select_paragraphs()
print(f"Page 0 paragraphs: {len(page_paragraphs)}")

for para in page_paragraphs:
print(f"Paragraph: {para.text[:50]}...")

Paragraphs by Text Prefix

with PDFDancer.open("invoice.pdf") as pdf:
# Find paragraphs starting with specific text
headers = pdf.select_paragraphs_starting_with("Invoice #")

# On a specific page
page_headers = pdf.page(1).select_paragraphs_starting_with("The Complete")

if page_headers:
para = page_headers[0]
print(f"Found: {para.text}")
print(f"Position: ({para.position.x()}, {para.position.y()})")

Paragraphs at Coordinates

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
# Find paragraphs at specific coordinates
paragraphs = pdf.page(1).select_paragraphs_at(x=150, y=320)

for para in paragraphs:
print(f"Paragraph at position: {para.text}")

Text Properties

Once you've selected a paragraph or text line, you can access its properties including text content, font information, position, color, and status information.

Accessing Text Properties

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
paragraphs = pdf.page(1).select_paragraphs()

for para in paragraphs:
# Access text content
print(f"Text: {para.text}")

# Access font information
print(f"Font: {para.font_name} at {para.font_size}pt")

# Access position
print(f"Position: ({para.position.x()}, {para.position.y()})")

# Access color (if available)
if para.color:
print(f"Color: RGB({para.color.r}, {para.color.g}, {para.color.b})")

Working with Text Line Properties

Text lines also expose color and other properties:

with PDFDancer.open("document.pdf") as pdf:
lines = pdf.page(1).select_text_lines()

for line in lines:
print(f"Line text: {line.text}")
if line.color:
print(f" Color: RGB({line.color.r}, {line.color.g}, {line.color.b})")
if line.font_name:
print(f" Font: {line.font_name} {line.font_size}pt")

Text Status Information

Text objects include status information that indicates whether they have been modified, whether the text is encodable with the current font, and font recommendations:

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
paragraphs = pdf.page(1).select_paragraphs()

for para in paragraphs:
if para.status:
# Check if text was modified
print(f"Modified: {para.status.is_modified()}")

# Check if text is encodable with current font
print(f"Encodable: {para.status.is_encodable()}")

# Get font type classification
font_type = para.status.get_font_type()
print(f"Font type: {font_type.value}") # SYSTEM, STANDARD, or EMBEDDED

# Get font recommendation with similarity score
recommendation = para.status.get_font_recommendation()
print(f"Recommended font: {recommendation.get_font_name()}")
print(f"Similarity score: {recommendation.get_similarity_score()}")

Font Type Classifications:

  • SYSTEM: Font available from the operating system
  • STANDARD: One of the 14 standard PDF fonts (Helvetica, Times-Roman, Courier, etc.)
  • EMBEDDED: Font embedded in the PDF file

Adding Paragraphs

Basic Paragraph with Standard Font

from pdfdancer import PDFDancer, StandardFonts, Color

with PDFDancer.open("document.pdf") as pdf:
# Add a simple paragraph with standard font
pdf.new_paragraph() \
.text("Standard Font Test\nHelvetica Bold") \
.font(StandardFonts.HELVETICA_BOLD.value, 16) \
.line_spacing(1.2) \
.color(Color(255, 0, 0)) \
.at(1, 100, 100) \
.add()

pdf.save("output.pdf")

Adding to Specific Page

You can add paragraphs directly to a specific page using two syntaxes:

from pdfdancer import PDFDancer, StandardFonts

with PDFDancer.open("document.pdf") as pdf:
# Method 1: Specify page index in at()
pdf.new_paragraph() \
.text("Times Roman Test") \
.font(StandardFonts.TIMES_ROMAN.value, 14) \
.at(1, 150, 150) \
.add()

# Method 2: Use page() to scope the builder
pdf.page(1).new_paragraph() \
.text("Awesomely\nObvious!") \
.font("Roboto-Regular", 14) \
.line_spacing(0.7) \
.at(300.1, 500) \
.add()

pdf.save("output.pdf")

Multi-line Paragraph with Courier

Monospace fonts like Courier are perfect for code examples:

from pdfdancer import PDFDancer, StandardFonts

with PDFDancer.open("document.pdf") as pdf:
# Add multi-line code example with Courier
pdf.new_paragraph() \
.text("Courier Monospace\nCode Example") \
.font(StandardFonts.COURIER_BOLD.value, 12) \
.line_spacing(1.5) \
.at(1, 200, 200) \
.add()

pdf.save("output.pdf")

Using Custom TTF Fonts

You can use custom TrueType fonts directly without pre-registration:

from pathlib import Path
from pdfdancer import PDFDancer, Color

with PDFDancer.open("document.pdf") as pdf:
# Use custom font file directly
ttf_path = Path("fonts/DancingScript-Regular.ttf")

pdf.new_paragraph() \
.text("Awesomely\nObvious!") \
.font_file(ttf_path, 24) \
.line_spacing(1.8) \
.color(Color(0, 0, 255)) \
.at(1, 300.1, 500) \
.add()

pdf.save("output.pdf")

Using Service Fonts with find_fonts()

You can search for fonts available on the PDFDancer service:

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
# Find fonts matching "Roboto" at size 14
fonts = pdf.find_fonts("Roboto", 14)

if fonts:
# Use the first match (e.g., "Roboto-Regular")
roboto = fonts[0]
print(f"Using: {roboto.name} at {roboto.size}pt")

pdf.new_paragraph() \
.text("Awesomely\nObvious!") \
.font(roboto.name, roboto.size) \
.line_spacing(0.7) \
.at(1, 300.1, 500) \
.add()

pdf.save("output.pdf")

Editing Paragraphs

Basic Text Replacement

The simplest way to edit a paragraph is to replace its text content. Edit operations return a CommandResult object that provides detailed information about the operation:

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
# Find a paragraph by text prefix
paragraph = pdf.page(1).select_paragraphs_starting_with("The Complete")[0]

# Method 1: Explicit apply() call
result = paragraph.edit().replace("Awesomely\nObvious!").apply()

# Method 2: Context manager (recommended - auto-applies on success)
with paragraph.edit() as editor:
editor.replace("Awesomely\nObvious!")
# Changes are automatically applied when context exits

# Check the result
print(f"Success: {result.success}")
print(f"Command: {result.command_name}")
if result.warning:
print(f"Warning: {result.warning}")

pdf.save("output.pdf")

CommandResult Properties:

  • success (boolean): Whether the operation succeeded
  • commandName (string): Name of the operation performed (e.g., "ModifyParagraph", "ModifyTextLine")
  • elementId (string | null): ID of the modified element
  • message (string | null): Optional informational message
  • warning (string | null): Optional warning message (e.g., when modifying text with embedded fonts)
Python Context Manager Pattern (Recommended)

The context manager pattern (with paragraph.edit() as editor:) is the recommended approach in Python because:

  • Automatic application: Changes are applied when the context exits successfully
  • Error safety: Changes are discarded if an exception occurs
  • Cleaner code: No need to explicitly call apply()
  • Multiple operations: Chain multiple edits in one context
# Multiple edits in one context
with paragraph.edit() as editor:
editor.replace("New text")
editor.font("Helvetica", 12)
editor.color(255, 0, 0) # Red text
# All changes applied automatically here

Editing Text Without Changing Position

When you edit text without specifying a new position, the paragraph stays in its original location:

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
paragraph = pdf.page(1).select_paragraphs_starting_with("The Complete")[0]
original_x = paragraph.position.x()
original_y = paragraph.position.y()

# Edit text and font, keeping original position
paragraph.edit() \
.replace("Awesomely\nObvious!") \
.font("Helvetica", 12) \
.line_spacing(0.7) \
.apply()

# Verify position unchanged
new_para = pdf.page(1).select_paragraphs_starting_with("Awesomely")[0]
assert new_para.position.x() == original_x
assert new_para.position.y() == original_y

pdf.save("output.pdf")

Chaining Multiple Edits

You can chain multiple edits together, including text, font, color, spacing, and position:

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
paragraph = pdf.page(1).select_paragraphs_starting_with("The Complete")[0]

# Chain multiple edits: text, font, spacing, AND position
paragraph.edit() \
.replace("Awesomely\nObvious!") \
.font("Helvetica", 12) \
.line_spacing(0.7) \
.move_to(300.1, 500) \
.apply()

pdf.save("output.pdf")

Changing Only Font

You can change just the font without modifying the text content:

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
paragraph = pdf.page(1).select_paragraphs_starting_with("The Complete")[0]

# Change only the font, keep everything else
paragraph.edit() \
.font("Helvetica", 28) \
.apply()

# Verify font changed
line = pdf.page(1).select_text_lines_starting_with("The Complete")[0]
assert line.object_ref().font_name == "Helvetica"
assert line.object_ref().font_size == 28

pdf.save("output.pdf")

Moving Paragraphs

Move to New Coordinates

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
paragraph = pdf.page(1).select_paragraphs_starting_with("The Complete")[0]

# Move to new coordinates
paragraph.move_to(0.1, 300)

# Verify new position
moved = pdf.page(1).select_paragraphs_at(0.1, 300)[0]
assert moved is not None

pdf.save("output.pdf")

Move Only (Using Edit)

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
paragraph = pdf.page(1).select_paragraphs_starting_with("The Complete")[0]

# Move using edit builder
paragraph.edit() \
.move_to(1, 1) \
.apply()

# Verify new position
new_para = pdf.page(1).select_paragraphs_starting_with("The Complete")[0]
assert new_para.position.x() == 1
assert new_para.position.y() == 1

pdf.save("output.pdf")

Editing Text Lines

Text lines can be edited just like paragraphs. This is useful when you need to modify individual lines within a paragraph or work with single-line text elements.

Basic Text Line Replacement

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
# Find and edit a text line
text_line = pdf.page(1).select_text_lines_starting_with("Invoice Number:")[0]

# Replace the text
text_line.edit().replace("Invoice Number: INV-2024-001").apply()

pdf.save("output.pdf")

Using Context Manager (Recommended):

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
text_line = pdf.page(1).select_text_lines_starting_with("Invoice Number:")[0]

# Context manager automatically applies changes
with text_line.edit() as editor:
editor.replace("Invoice Number: INV-2024-001")

pdf.save("output.pdf")

Editing Text Line Font and Style

from pdfdancer import PDFDancer, Color

with PDFDancer.open("document.pdf") as pdf:
text_line = pdf.page(1).select_text_lines_starting_with("Total:")[0]

# Change text, font, and color
text_line.edit() \
.replace("Total: $1,234.56") \
.font("Helvetica-Bold", 14) \
.color(Color(255, 0, 0)) \
.apply()

pdf.save("output.pdf")

Moving Text Lines

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
text_line = pdf.page(1).select_text_lines_starting_with("Footer")[0]

# Move text line to new position
text_line.edit() \
.move_to(72, 50) \
.apply()

pdf.save("output.pdf")
Text Lines vs Paragraphs
  • Text lines are individual lines of text, useful for precise line-by-line editing
  • Paragraphs are multi-line text blocks with line spacing control
  • Both support the same editing operations: replace(), font(), color(), move_to()
  • Use text lines when you need to edit specific lines within a larger text block

Deleting Text Lines

Text lines can be deleted just like paragraphs:

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
# Find and delete a text line
text_line = pdf.page(1).select_text_lines_starting_with("Footer")[0]
text_line.delete()

# Verify deletion
remaining = pdf.page(1).select_text_lines_starting_with("Footer")
assert remaining == []

pdf.save("output.pdf")

Deleting Paragraphs

from pdfdancer import PDFDancer

with PDFDancer.open("document.pdf") as pdf:
# Find and delete a paragraph
paragraph = pdf.page(1).select_paragraphs_starting_with("The Complete")[0]
paragraph.delete()

# Verify deletion
remaining = pdf.page(1).select_paragraphs_starting_with("The Complete")
assert remaining == []

pdf.save("output.pdf")

Selecting Text Lines

Text lines provide finer-grained control than paragraphs.

All Text Lines

with PDFDancer.open("document.pdf") as pdf:
# Get all text lines across the document
all_lines = pdf.select_text_lines()

# Get all text lines on a specific page
page_lines = pdf.page(1).select_text_lines()

for line in page_lines:
print(f"Line: {line.text}")

Text Lines by Prefix

with PDFDancer.open("document.pdf") as pdf:
# Find text lines starting with specific text
lines = pdf.page(1).select_text_lines_starting_with("Date:")

if lines:
print(f"Found date line: {lines[0].text}")

Next Steps