markup: support rectangle and circle annotations

This commit is contained in:
2020-02-28 17:15:22 -08:00
parent dc85d784ab
commit b625b4f16d
2 changed files with 53 additions and 5 deletions

View File

@ -9,6 +9,8 @@ import dumper
import random as rng
from pathlib import Path
from pdfminer.psparser import LIT
from .utils import cv2_rect, ensure_dir, set_file_perms, WORKDIR
# https://www.pyimagesearch.com/2014/10/20/finding-shapes-images-using-python-opencv/
@ -112,10 +114,10 @@ def write_debug_image(workdir, page_num, prods, scribbles):
set_file_perms(path)
def write_inklist(obj, path):
def write_inklist(obj, mediabox, path):
"""Draw an image of the inklist."""
pagew = int(11*72)
pageh = int(8.5*72)
pagew = mediabox[2] - mediabox[0]
pageh = mediabox[3] - mediabox[1]
img = Image.new('RGBA', (pagew, pageh), (0, 0, 0, 0))
draw = ImageDraw.Draw(img, 'RGBA')
@ -129,3 +131,24 @@ def write_inklist(obj, path):
img.save(path)
set_file_perms(path)
def write_square_or_circle(obj, mediabox, path):
"""Draw an image of the inklist."""
pagew = mediabox[2] - mediabox[0]
pageh = mediabox[3] - mediabox[1]
img = Image.new('RGBA', (pagew, pageh), (0, 0, 0, 0))
draw = ImageDraw.Draw(img, 'RGBA')
if obj["Subtype"] == LIT('Square'):
draw.rectangle(obj['Rect'], fill=None, outline='black', width=3)
else:
draw.ellipse(*obj['Rect'], fill=None, outline='black', width=3)
# account for the difference in coordinate systems
# between pdf and images.
img = img.transpose(Image.FLIP_TOP_BOTTOM)
img.save(path)
set_file_perms(path)

View File

@ -10,7 +10,7 @@ from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import PDFObjRef, resolve1
from .utils import Rect, pdf_rect, ensure_dir, set_file_perms
from .img import write_inklist
from .img import write_inklist, write_square_or_circle
def make_product_box(obj, pagenum, mediabox):
@ -44,7 +44,19 @@ def make_ink_scribble(obj, pagenum, mediabox, workdir):
oid = obj['NM'].decode('utf-8')
png_path = os.path.join(workdir, f"export-page{pagenum:03d}-nm{oid}.png")
write_inklist(obj, png_path)
write_inklist(obj, mediabox, png_path)
return { 'page': pagenum,
'rect': Rect(*mediabox),
'objid': oid,
'image': png_path }
def make_square_or_circle_scribble(obj, pagenum, mediabox, workdir):
oid = obj['NM'].decode('utf-8')
png_path = os.path.join(workdir, f"export-page{pagenum:03d}-nm{oid}.png")
write_square_or_circle(obj, mediabox, png_path)
return { 'page': pagenum,
'rect': Rect(*mediabox),
@ -160,6 +172,13 @@ def is_inklist_annotation(anno):
return 'Subtype' in anno and anno["Subtype"] == LIT('Ink')
def is_square_or_circle_annotation(anno):
if 'Subtype' in anno:
if anno["Subtype"] == LIT('Square') or anno["Subtype"] == LIT('Circle'):
return True
return False
def parse_pdf(fname, workdir, debug=0):
PDFDocument.debug = debug
PDFParser.debug = debug
@ -192,10 +211,16 @@ def parse_pdf(fname, workdir, debug=0):
anno = resolve1(anno)
if is_inklist_annotation(anno):
scribbles.append(make_ink_scribble(anno, pagenum, mediabox, workdir))
elif is_square_or_circle_annotation(anno):
scribbles.append(make_square_or_circle_scribble(anno, pagenum, mediabox, workdir))
elif 'AAPL:AKExtras' in anno:
scribbles.append(make_aapl_scribble(anno, pagenum, mediabox, workdir))
elif 'ProCatName' in anno:
prod_boxes.append(make_product_box(anno, pagenum, mediabox))
elif anno['Subtype'] == LIT('FreeText'):
print('ignoring FreeText annotation')
elif anno['Subtype'] == LIT('Highlight'):
print('ignoring Highlight annotation')
else:
print('ignoring other annotation:')
print(anno)