markup: support rectangle and circle annotations
This commit is contained in:
@ -9,6 +9,8 @@ import dumper
|
||||
import random as rng
|
||||
from pathlib import Path
|
||||
|
||||
from pdfminer.psparser import LIT
|
||||
|
||||
from .utils import cv2_rect, ensure_dir, set_file_perms, WORKDIR
|
||||
|
||||
# https://www.pyimagesearch.com/2014/10/20/finding-shapes-images-using-python-opencv/
|
||||
@ -112,10 +114,10 @@ def write_debug_image(workdir, page_num, prods, scribbles):
|
||||
set_file_perms(path)
|
||||
|
||||
|
||||
def write_inklist(obj, path):
|
||||
def write_inklist(obj, mediabox, path):
|
||||
"""Draw an image of the inklist."""
|
||||
pagew = int(11*72)
|
||||
pageh = int(8.5*72)
|
||||
pagew = mediabox[2] - mediabox[0]
|
||||
pageh = mediabox[3] - mediabox[1]
|
||||
|
||||
img = Image.new('RGBA', (pagew, pageh), (0, 0, 0, 0))
|
||||
draw = ImageDraw.Draw(img, 'RGBA')
|
||||
@ -129,3 +131,24 @@ def write_inklist(obj, path):
|
||||
|
||||
img.save(path)
|
||||
set_file_perms(path)
|
||||
|
||||
|
||||
def write_square_or_circle(obj, mediabox, path):
|
||||
"""Draw an image of the inklist."""
|
||||
pagew = mediabox[2] - mediabox[0]
|
||||
pageh = mediabox[3] - mediabox[1]
|
||||
|
||||
img = Image.new('RGBA', (pagew, pageh), (0, 0, 0, 0))
|
||||
draw = ImageDraw.Draw(img, 'RGBA')
|
||||
|
||||
if obj["Subtype"] == LIT('Square'):
|
||||
draw.rectangle(obj['Rect'], fill=None, outline='black', width=3)
|
||||
else:
|
||||
draw.ellipse(*obj['Rect'], fill=None, outline='black', width=3)
|
||||
|
||||
# account for the difference in coordinate systems
|
||||
# between pdf and images.
|
||||
img = img.transpose(Image.FLIP_TOP_BOTTOM)
|
||||
|
||||
img.save(path)
|
||||
set_file_perms(path)
|
||||
|
||||
@ -10,7 +10,7 @@ from pdfminer.pdfdocument import PDFDocument
|
||||
from pdfminer.pdftypes import PDFObjRef, resolve1
|
||||
|
||||
from .utils import Rect, pdf_rect, ensure_dir, set_file_perms
|
||||
from .img import write_inklist
|
||||
from .img import write_inklist, write_square_or_circle
|
||||
|
||||
|
||||
def make_product_box(obj, pagenum, mediabox):
|
||||
@ -44,7 +44,19 @@ def make_ink_scribble(obj, pagenum, mediabox, workdir):
|
||||
oid = obj['NM'].decode('utf-8')
|
||||
png_path = os.path.join(workdir, f"export-page{pagenum:03d}-nm{oid}.png")
|
||||
|
||||
write_inklist(obj, png_path)
|
||||
write_inklist(obj, mediabox, png_path)
|
||||
|
||||
return { 'page': pagenum,
|
||||
'rect': Rect(*mediabox),
|
||||
'objid': oid,
|
||||
'image': png_path }
|
||||
|
||||
|
||||
def make_square_or_circle_scribble(obj, pagenum, mediabox, workdir):
|
||||
oid = obj['NM'].decode('utf-8')
|
||||
png_path = os.path.join(workdir, f"export-page{pagenum:03d}-nm{oid}.png")
|
||||
|
||||
write_square_or_circle(obj, mediabox, png_path)
|
||||
|
||||
return { 'page': pagenum,
|
||||
'rect': Rect(*mediabox),
|
||||
@ -160,6 +172,13 @@ def is_inklist_annotation(anno):
|
||||
return 'Subtype' in anno and anno["Subtype"] == LIT('Ink')
|
||||
|
||||
|
||||
def is_square_or_circle_annotation(anno):
|
||||
if 'Subtype' in anno:
|
||||
if anno["Subtype"] == LIT('Square') or anno["Subtype"] == LIT('Circle'):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def parse_pdf(fname, workdir, debug=0):
|
||||
PDFDocument.debug = debug
|
||||
PDFParser.debug = debug
|
||||
@ -192,10 +211,16 @@ def parse_pdf(fname, workdir, debug=0):
|
||||
anno = resolve1(anno)
|
||||
if is_inklist_annotation(anno):
|
||||
scribbles.append(make_ink_scribble(anno, pagenum, mediabox, workdir))
|
||||
elif is_square_or_circle_annotation(anno):
|
||||
scribbles.append(make_square_or_circle_scribble(anno, pagenum, mediabox, workdir))
|
||||
elif 'AAPL:AKExtras' in anno:
|
||||
scribbles.append(make_aapl_scribble(anno, pagenum, mediabox, workdir))
|
||||
elif 'ProCatName' in anno:
|
||||
prod_boxes.append(make_product_box(anno, pagenum, mediabox))
|
||||
elif anno['Subtype'] == LIT('FreeText'):
|
||||
print('ignoring FreeText annotation')
|
||||
elif anno['Subtype'] == LIT('Highlight'):
|
||||
print('ignoring Highlight annotation')
|
||||
else:
|
||||
print('ignoring other annotation:')
|
||||
print(anno)
|
||||
|
||||
Reference in New Issue
Block a user