markup: process Documents inklist annotations

This commit is contained in:
2020-02-28 16:23:33 -08:00
parent 8a1b94b4dc
commit 97ef16e47a
3 changed files with 76 additions and 9 deletions

View File

@ -9,7 +9,8 @@ from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import PDFObjRef, resolve1
from .utils import pdf_rect, ensure_dir, set_file_perms
from .utils import Rect, pdf_rect, ensure_dir, set_file_perms
from .img import write_inklist
def make_product_box(obj, pagenum, mediabox):
@ -39,7 +40,19 @@ def make_product_box(obj, pagenum, mediabox):
return None
def make_scribble(obj, pagenum, mediabox, workdir):
def make_ink_scribble(obj, pagenum, mediabox, workdir):
oid = obj['NM'].decode('utf-8')
png_path = os.path.join(workdir, f"export-page{pagenum:03d}-nm{oid}.png")
write_inklist(obj, png_path)
return { 'page': pagenum,
'rect': Rect(*mediabox),
'objid': oid,
'image': png_path }
def make_aapl_scribble(obj, pagenum, mediabox, workdir):
rect = obj['Rect'] # position on page
# walk the object tree down to the image
@ -143,6 +156,10 @@ def write_pbm(obj, base_path):
return path
def is_inklist_annotation(anno):
return 'Subtype' in anno and anno["Subtype"] == LIT('Ink')
def parse_pdf(fname, workdir, debug=0):
PDFDocument.debug = debug
PDFParser.debug = debug
@ -173,8 +190,10 @@ def parse_pdf(fname, workdir, debug=0):
for anno in annots:
anno = resolve1(anno)
if 'AAPL:AKExtras' in anno:
scribbles.append(make_scribble(anno, pagenum, mediabox, workdir))
if is_inklist_annotation(anno):
scribbles.append(make_ink_scribble(anno, pagenum, mediabox, workdir))
elif 'AAPL:AKExtras' in anno:
scribbles.append(make_aapl_scribble(anno, pagenum, mediabox, workdir))
elif 'ProCatName' in anno:
prod_boxes.append(make_product_box(anno, pagenum, mediabox))
else: