From b625b4f16d8b2341897c519c5049204c3870303c Mon Sep 17 00:00:00 2001 From: Seth Ladygo Date: Fri, 28 Feb 2020 17:15:22 -0800 Subject: [PATCH] markup: support rectangle and circle annotations --- markup/img.py | 29 ++++++++++++++++++++++++++--- markup/pdf.py | 29 +++++++++++++++++++++++++++-- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/markup/img.py b/markup/img.py index fd5993d..e6c6a14 100644 --- a/markup/img.py +++ b/markup/img.py @@ -9,6 +9,8 @@ import dumper import random as rng from pathlib import Path +from pdfminer.psparser import LIT + from .utils import cv2_rect, ensure_dir, set_file_perms, WORKDIR # https://www.pyimagesearch.com/2014/10/20/finding-shapes-images-using-python-opencv/ @@ -112,10 +114,10 @@ def write_debug_image(workdir, page_num, prods, scribbles): set_file_perms(path) -def write_inklist(obj, path): +def write_inklist(obj, mediabox, path): """Draw an image of the inklist.""" - pagew = int(11*72) - pageh = int(8.5*72) + pagew = mediabox[2] - mediabox[0] + pageh = mediabox[3] - mediabox[1] img = Image.new('RGBA', (pagew, pageh), (0, 0, 0, 0)) draw = ImageDraw.Draw(img, 'RGBA') @@ -129,3 +131,24 @@ def write_inklist(obj, path): img.save(path) set_file_perms(path) + + +def write_square_or_circle(obj, mediabox, path): + """Draw an image of the inklist.""" + pagew = mediabox[2] - mediabox[0] + pageh = mediabox[3] - mediabox[1] + + img = Image.new('RGBA', (pagew, pageh), (0, 0, 0, 0)) + draw = ImageDraw.Draw(img, 'RGBA') + + if obj["Subtype"] == LIT('Square'): + draw.rectangle(obj['Rect'], fill=None, outline='black', width=3) + else: + draw.ellipse(*obj['Rect'], fill=None, outline='black', width=3) + + # account for the difference in coordinate systems + # between pdf and images. + img = img.transpose(Image.FLIP_TOP_BOTTOM) + + img.save(path) + set_file_perms(path) diff --git a/markup/pdf.py b/markup/pdf.py index 56047e6..3f064e8 100644 --- a/markup/pdf.py +++ b/markup/pdf.py @@ -10,7 +10,7 @@ from pdfminer.pdfdocument import PDFDocument from pdfminer.pdftypes import PDFObjRef, resolve1 from .utils import Rect, pdf_rect, ensure_dir, set_file_perms -from .img import write_inklist +from .img import write_inklist, write_square_or_circle def make_product_box(obj, pagenum, mediabox): @@ -44,7 +44,19 @@ def make_ink_scribble(obj, pagenum, mediabox, workdir): oid = obj['NM'].decode('utf-8') png_path = os.path.join(workdir, f"export-page{pagenum:03d}-nm{oid}.png") - write_inklist(obj, png_path) + write_inklist(obj, mediabox, png_path) + + return { 'page': pagenum, + 'rect': Rect(*mediabox), + 'objid': oid, + 'image': png_path } + + +def make_square_or_circle_scribble(obj, pagenum, mediabox, workdir): + oid = obj['NM'].decode('utf-8') + png_path = os.path.join(workdir, f"export-page{pagenum:03d}-nm{oid}.png") + + write_square_or_circle(obj, mediabox, png_path) return { 'page': pagenum, 'rect': Rect(*mediabox), @@ -160,6 +172,13 @@ def is_inklist_annotation(anno): return 'Subtype' in anno and anno["Subtype"] == LIT('Ink') +def is_square_or_circle_annotation(anno): + if 'Subtype' in anno: + if anno["Subtype"] == LIT('Square') or anno["Subtype"] == LIT('Circle'): + return True + return False + + def parse_pdf(fname, workdir, debug=0): PDFDocument.debug = debug PDFParser.debug = debug @@ -192,10 +211,16 @@ def parse_pdf(fname, workdir, debug=0): anno = resolve1(anno) if is_inklist_annotation(anno): scribbles.append(make_ink_scribble(anno, pagenum, mediabox, workdir)) + elif is_square_or_circle_annotation(anno): + scribbles.append(make_square_or_circle_scribble(anno, pagenum, mediabox, workdir)) elif 'AAPL:AKExtras' in anno: scribbles.append(make_aapl_scribble(anno, pagenum, mediabox, workdir)) elif 'ProCatName' in anno: prod_boxes.append(make_product_box(anno, pagenum, mediabox)) + elif anno['Subtype'] == LIT('FreeText'): + print('ignoring FreeText annotation') + elif anno['Subtype'] == LIT('Highlight'): + print('ignoring Highlight annotation') else: print('ignoring other annotation:') print(anno)