markup: process Documents inklist annotations
This commit is contained in:
@ -9,7 +9,8 @@ from pdfminer.pdfparser import PDFParser
|
||||
from pdfminer.pdfdocument import PDFDocument
|
||||
from pdfminer.pdftypes import PDFObjRef, resolve1
|
||||
|
||||
from .utils import pdf_rect, ensure_dir, set_file_perms
|
||||
from .utils import Rect, pdf_rect, ensure_dir, set_file_perms
|
||||
from .img import write_inklist
|
||||
|
||||
|
||||
def make_product_box(obj, pagenum, mediabox):
|
||||
@ -39,7 +40,19 @@ def make_product_box(obj, pagenum, mediabox):
|
||||
return None
|
||||
|
||||
|
||||
def make_scribble(obj, pagenum, mediabox, workdir):
|
||||
def make_ink_scribble(obj, pagenum, mediabox, workdir):
|
||||
oid = obj['NM'].decode('utf-8')
|
||||
png_path = os.path.join(workdir, f"export-page{pagenum:03d}-nm{oid}.png")
|
||||
|
||||
write_inklist(obj, png_path)
|
||||
|
||||
return { 'page': pagenum,
|
||||
'rect': Rect(*mediabox),
|
||||
'objid': oid,
|
||||
'image': png_path }
|
||||
|
||||
|
||||
def make_aapl_scribble(obj, pagenum, mediabox, workdir):
|
||||
rect = obj['Rect'] # position on page
|
||||
|
||||
# walk the object tree down to the image
|
||||
@ -143,6 +156,10 @@ def write_pbm(obj, base_path):
|
||||
return path
|
||||
|
||||
|
||||
def is_inklist_annotation(anno):
|
||||
return 'Subtype' in anno and anno["Subtype"] == LIT('Ink')
|
||||
|
||||
|
||||
def parse_pdf(fname, workdir, debug=0):
|
||||
PDFDocument.debug = debug
|
||||
PDFParser.debug = debug
|
||||
@ -173,8 +190,10 @@ def parse_pdf(fname, workdir, debug=0):
|
||||
|
||||
for anno in annots:
|
||||
anno = resolve1(anno)
|
||||
if 'AAPL:AKExtras' in anno:
|
||||
scribbles.append(make_scribble(anno, pagenum, mediabox, workdir))
|
||||
if is_inklist_annotation(anno):
|
||||
scribbles.append(make_ink_scribble(anno, pagenum, mediabox, workdir))
|
||||
elif 'AAPL:AKExtras' in anno:
|
||||
scribbles.append(make_aapl_scribble(anno, pagenum, mediabox, workdir))
|
||||
elif 'ProCatName' in anno:
|
||||
prod_boxes.append(make_product_box(anno, pagenum, mediabox))
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user