markup: move functionality to library

This commit is contained in:
2019-10-18 15:14:05 -07:00
parent 5282f7cb2f
commit 94c1a419dc
5 changed files with 104 additions and 24 deletions

View File

@ -9,7 +9,7 @@ from pdfminer.pdftypes import PDFObjRef, resolve1
from django.conf import settings
from .utils import pdf_rect
from .utils import pdf_rect, ensure_dir, set_file_perms
WORKDIR = os.path.join(settings.ASSET_DIR, 'markup', 'work')
@ -34,7 +34,7 @@ def make_product_box(obj, pagenum, mediabox):
return None
def make_scribble(obj, pagenum, mediabox):
def make_scribble(obj, pagenum, mediabox, subdir, name):
rect = obj['Rect'] # position on page
# walk the object tree down to the image
@ -51,7 +51,7 @@ def make_scribble(obj, pagenum, mediabox):
flter = im1['Filter']
if flter.name == 'JPXDecode':
path = export_jp2(im1)
path = export_jp2(im1, subdir, name, pagenum)
return { 'page': pagenum,
'rect': pdf_rect(rect, mediabox[3]),
'objid': im1.objid,
@ -61,33 +61,29 @@ def make_scribble(obj, pagenum, mediabox):
return None
def export_jp2(obj):
jp2_path = os.path.join(WORKDIR, "export-{}.jp2".format(obj.objid))
png_path = os.path.join(WORKDIR, "export-{}.png".format(obj.objid))
def export_jp2(obj, subdir, name, pagenum):
oid = obj.objid
jp2_path = os.path.join(WORKDIR, subdir, f"{name}-export-page{pagenum:03d}-{oid}.jp2")
png_path = os.path.join(WORKDIR, subdir, f"{name}-export-page{pagenum:03d}-{oid}.png")
if not os.path.exists(WORKDIR):
os.makedirs(WORKDIR)
os.chmod(WORKDIR, 0o775)
shutil.chown(WORKDIR, group='procat')
ensure_dir(os.path.join(WORKDIR, subdir))
data = obj.get_rawdata()
print('extracting jp2: {}'.format(jp2_path))
with open(jp2_path, 'wb') as out:
out.write(data)
os.chmod(jp2_path, 0o664)
shutil.chown(jp2_path, group='procat')
set_file_perms(jp2_path)
result = subprocess.run(['opj_decompress', '-i', jp2_path, '-o', png_path], capture_output=True)
if result.returncode != 0:
print('ERROR converting {}:\n{}\n{}'.format(jp2_path, result.stdout.decode(), result.stderr.decode()))
else:
os.chmod(png_path, 0o664)
shutil.chown(png_path, group='procat')
set_file_perms(png_path)
return png_path
def parse_pdf(fname, debug=0):
def parse_pdf(fname, subdir, name, debug=0):
PDFDocument.debug = debug
PDFParser.debug = debug
@ -118,7 +114,7 @@ def parse_pdf(fname, debug=0):
for anno in annots:
anno = resolve1(anno)
if 'AAPL:AKExtras' in anno:
scribbles.append(make_scribble(anno, pagenum, mediabox))
scribbles.append(make_scribble(anno, pagenum, mediabox, subdir, name))
elif 'ProCatName' in anno:
prod_boxes.append(make_product_box(anno, pagenum, mediabox))
else: