markup: handle netpbm encoded annotations
This commit is contained in:
@ -20,12 +20,16 @@ def find_shapes(image_path):
|
|||||||
"""
|
"""
|
||||||
path = Path(image_path)
|
path = Path(image_path)
|
||||||
|
|
||||||
img = Image.open(image_path, 'r')
|
print('finding shapes in {}'.format(image_path))
|
||||||
if not img.mode in ('RGBA', 'LA'):
|
|
||||||
print('no alpha channel: {}'.format(img.mode))
|
|
||||||
return None
|
|
||||||
|
|
||||||
alpha_layer = img.convert('RGBA').split()[-1]
|
img = Image.open(image_path, 'r')
|
||||||
|
if img.mode == 'RGBA':
|
||||||
|
alpha_layer = img.convert('RGBA').split()[-1]
|
||||||
|
elif img.mode == 'L':
|
||||||
|
alpha_layer = img
|
||||||
|
else:
|
||||||
|
print('unhandled image mode: {}'.format(img.mode))
|
||||||
|
return None
|
||||||
|
|
||||||
alpha_layer = alpha_layer.filter(ImageFilter.GaussianBlur(5))
|
alpha_layer = alpha_layer.filter(ImageFilter.GaussianBlur(5))
|
||||||
|
|
||||||
|
|||||||
@ -2,7 +2,9 @@ import os
|
|||||||
import sys
|
import sys
|
||||||
import subprocess
|
import subprocess
|
||||||
import shutil
|
import shutil
|
||||||
|
import dumper
|
||||||
|
|
||||||
|
from pdfminer.psparser import LIT
|
||||||
from pdfminer.pdfparser import PDFParser
|
from pdfminer.pdfparser import PDFParser
|
||||||
from pdfminer.pdfdocument import PDFDocument
|
from pdfminer.pdfdocument import PDFDocument
|
||||||
from pdfminer.pdftypes import PDFObjRef, resolve1
|
from pdfminer.pdftypes import PDFObjRef, resolve1
|
||||||
@ -59,8 +61,15 @@ def make_scribble(obj, pagenum, mediabox, workdir):
|
|||||||
'rect': pdf_rect(rect, mediabox[3]),
|
'rect': pdf_rect(rect, mediabox[3]),
|
||||||
'objid': im1.objid,
|
'objid': im1.objid,
|
||||||
'image': path }
|
'image': path }
|
||||||
|
elif flter.name == 'FlateDecode':
|
||||||
|
path = export_netpbm(im1, workdir, pagenum)
|
||||||
|
return { 'page': pagenum,
|
||||||
|
'rect': pdf_rect(rect, mediabox[3]),
|
||||||
|
'objid': im1.objid,
|
||||||
|
'image': path }
|
||||||
else:
|
else:
|
||||||
print('skipping non-jp2 image')
|
print('skipping unrecognized image')
|
||||||
|
# print(dumper.dump(im1))
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
@ -85,6 +94,55 @@ def export_jp2(obj, workdir, pagenum):
|
|||||||
return png_path
|
return png_path
|
||||||
|
|
||||||
|
|
||||||
|
def export_netpbm(obj, workdir, pagenum):
|
||||||
|
oid = obj.objid
|
||||||
|
ensure_dir(workdir)
|
||||||
|
|
||||||
|
pbm_base = os.path.join(workdir, f"export-page{pagenum:03d}-obj{oid:05d}")
|
||||||
|
pbm_path = write_pbm(obj, pbm_base)
|
||||||
|
|
||||||
|
# stencil mask - use instead if present
|
||||||
|
smask = obj.attrs['SMask']
|
||||||
|
if smask:
|
||||||
|
print('extracting pbm mask')
|
||||||
|
mask = resolve1(smask)
|
||||||
|
mask_base = os.path.join(workdir, f"export-page{pagenum:03d}-obj{oid:05d}-mask")
|
||||||
|
mask_path = write_pbm(smask, mask_base)
|
||||||
|
pbm_path = mask_path
|
||||||
|
|
||||||
|
return pbm_path
|
||||||
|
|
||||||
|
|
||||||
|
def write_pbm(obj, base_path):
|
||||||
|
obj = resolve1(obj)
|
||||||
|
color_space = resolve1(obj.attrs['ColorSpace'])
|
||||||
|
|
||||||
|
suffix = '.pgm' if color_space == LIT('DeviceGray') else '.ppm'
|
||||||
|
path = base_path + suffix
|
||||||
|
|
||||||
|
print('writing pbm: {}'.format(path))
|
||||||
|
|
||||||
|
data = obj.get_data()
|
||||||
|
with open(path, 'wb') as out:
|
||||||
|
if suffix == '.pgm':
|
||||||
|
out.write("P5\n".encode())
|
||||||
|
else:
|
||||||
|
out.write("P6\n".encode())
|
||||||
|
|
||||||
|
out.write("{} {}\n".format(obj.attrs['Width'], obj.attrs['Height']).encode())
|
||||||
|
|
||||||
|
if obj.attrs['BitsPerComponent'] == 8:
|
||||||
|
out.write("255\n".encode())
|
||||||
|
else:
|
||||||
|
out.write("65535\n".encode())
|
||||||
|
|
||||||
|
out.write(data)
|
||||||
|
|
||||||
|
set_file_perms(path)
|
||||||
|
|
||||||
|
return path
|
||||||
|
|
||||||
|
|
||||||
def parse_pdf(fname, workdir, debug=0):
|
def parse_pdf(fname, workdir, debug=0):
|
||||||
PDFDocument.debug = debug
|
PDFDocument.debug = debug
|
||||||
PDFParser.debug = debug
|
PDFParser.debug = debug
|
||||||
|
|||||||
Reference in New Issue
Block a user