diff --git a/markup/email.py b/markup/email.py index 34bd80f..2150ddf 100644 --- a/markup/email.py +++ b/markup/email.py @@ -85,6 +85,10 @@ def send_error_email(subj, einfo): def send(frm, subj, msg): + if not EMAIL_HOST: + log.info(f'not sending email') + return + msg['From'] = 'Keen ProCatalog Markup Bot ' msg['Reply-To'] = 'Keen ProCatalog Support ' msg['To'] = frm diff --git a/markup/img.py b/markup/img.py index ebeb5f8..e6c6a14 100644 --- a/markup/img.py +++ b/markup/img.py @@ -9,6 +9,8 @@ import dumper import random as rng from pathlib import Path +from pdfminer.psparser import LIT + from .utils import cv2_rect, ensure_dir, set_file_perms, WORKDIR # https://www.pyimagesearch.com/2014/10/20/finding-shapes-images-using-python-opencv/ @@ -110,3 +112,43 @@ def write_debug_image(workdir, page_num, prods, scribbles): img.save(path) set_file_perms(path) + + +def write_inklist(obj, mediabox, path): + """Draw an image of the inklist.""" + pagew = mediabox[2] - mediabox[0] + pageh = mediabox[3] - mediabox[1] + + img = Image.new('RGBA', (pagew, pageh), (0, 0, 0, 0)) + draw = ImageDraw.Draw(img, 'RGBA') + + for segment in obj['InkList']: + draw.line(segment, 'black', 3) + + # account for the difference in coordinate systems + # between pdf and images. + img = img.transpose(Image.FLIP_TOP_BOTTOM) + + img.save(path) + set_file_perms(path) + + +def write_square_or_circle(obj, mediabox, path): + """Draw an image of the inklist.""" + pagew = mediabox[2] - mediabox[0] + pageh = mediabox[3] - mediabox[1] + + img = Image.new('RGBA', (pagew, pageh), (0, 0, 0, 0)) + draw = ImageDraw.Draw(img, 'RGBA') + + if obj["Subtype"] == LIT('Square'): + draw.rectangle(obj['Rect'], fill=None, outline='black', width=3) + else: + draw.ellipse(*obj['Rect'], fill=None, outline='black', width=3) + + # account for the difference in coordinate systems + # between pdf and images. + img = img.transpose(Image.FLIP_TOP_BOTTOM) + + img.save(path) + set_file_perms(path) diff --git a/markup/pdf.py b/markup/pdf.py index dd1b7d1..3f064e8 100644 --- a/markup/pdf.py +++ b/markup/pdf.py @@ -9,7 +9,8 @@ from pdfminer.pdfparser import PDFParser from pdfminer.pdfdocument import PDFDocument from pdfminer.pdftypes import PDFObjRef, resolve1 -from .utils import pdf_rect, ensure_dir, set_file_perms +from .utils import Rect, pdf_rect, ensure_dir, set_file_perms +from .img import write_inklist, write_square_or_circle def make_product_box(obj, pagenum, mediabox): @@ -39,7 +40,31 @@ def make_product_box(obj, pagenum, mediabox): return None -def make_scribble(obj, pagenum, mediabox, workdir): +def make_ink_scribble(obj, pagenum, mediabox, workdir): + oid = obj['NM'].decode('utf-8') + png_path = os.path.join(workdir, f"export-page{pagenum:03d}-nm{oid}.png") + + write_inklist(obj, mediabox, png_path) + + return { 'page': pagenum, + 'rect': Rect(*mediabox), + 'objid': oid, + 'image': png_path } + + +def make_square_or_circle_scribble(obj, pagenum, mediabox, workdir): + oid = obj['NM'].decode('utf-8') + png_path = os.path.join(workdir, f"export-page{pagenum:03d}-nm{oid}.png") + + write_square_or_circle(obj, mediabox, png_path) + + return { 'page': pagenum, + 'rect': Rect(*mediabox), + 'objid': oid, + 'image': png_path } + + +def make_aapl_scribble(obj, pagenum, mediabox, workdir): rect = obj['Rect'] # position on page # walk the object tree down to the image @@ -143,6 +168,17 @@ def write_pbm(obj, base_path): return path +def is_inklist_annotation(anno): + return 'Subtype' in anno and anno["Subtype"] == LIT('Ink') + + +def is_square_or_circle_annotation(anno): + if 'Subtype' in anno: + if anno["Subtype"] == LIT('Square') or anno["Subtype"] == LIT('Circle'): + return True + return False + + def parse_pdf(fname, workdir, debug=0): PDFDocument.debug = debug PDFParser.debug = debug @@ -173,10 +209,18 @@ def parse_pdf(fname, workdir, debug=0): for anno in annots: anno = resolve1(anno) - if 'AAPL:AKExtras' in anno: - scribbles.append(make_scribble(anno, pagenum, mediabox, workdir)) + if is_inklist_annotation(anno): + scribbles.append(make_ink_scribble(anno, pagenum, mediabox, workdir)) + elif is_square_or_circle_annotation(anno): + scribbles.append(make_square_or_circle_scribble(anno, pagenum, mediabox, workdir)) + elif 'AAPL:AKExtras' in anno: + scribbles.append(make_aapl_scribble(anno, pagenum, mediabox, workdir)) elif 'ProCatName' in anno: prod_boxes.append(make_product_box(anno, pagenum, mediabox)) + elif anno['Subtype'] == LIT('FreeText'): + print('ignoring FreeText annotation') + elif anno['Subtype'] == LIT('Highlight'): + print('ignoring Highlight annotation') else: print('ignoring other annotation:') print(anno) diff --git a/markup/tasks.py b/markup/tasks.py index c4b9d3b..0452e6c 100644 --- a/markup/tasks.py +++ b/markup/tasks.py @@ -2,13 +2,16 @@ from __future__ import absolute_import, unicode_literals from celery import task, shared_task from celery.utils.log import get_task_logger -import os -import re -import sys import datetime import fileinput +import os +import re +import shutil import smtplib +import sys + from pathlib import Path +from os.path import basename, dirname, isfile from email.feedparser import FeedParser from email.message import EmailMessage @@ -75,7 +78,6 @@ def process_attachment(from_address, subject, attachment): print(f'Using pdf name: {pdf_name}') pdf_base = Path(pdf_name).stem - workdir = os.path.join(WORKDIR, clean_path(from_address), pdf_base) ensure_dir(workdir) pdf_path = os.path.join(workdir, pdf_name) @@ -84,6 +86,32 @@ def process_attachment(from_address, subject, attachment): att.write(attachment.get_payload(decode=True)) set_file_perms(pdf_path) + process_pdf(pdf_path, from_address, subject, workdir) + + +@shared_task(on_failure=on_fail_handler) +def process_markup_pdf(pdf_path, user): + if not Path(pdf_path).is_file(): + print(f'No pdf - exiting ({pdf_path})') + return + + pdf_stem = Path(pdf_path).stem + workdir = os.path.join(WORKDIR, clean_path(user.username), clean_path(pdf_stem)) + ensure_dir(workdir) + + pdf_name = Path(pdf_path).name + dest_path = os.path.join(workdir, pdf_name) + print(f'copying pdf to {dest_path}') + shutil.copy(pdf_path, dest_path) + set_file_perms(dest_path) + + frm = str(make_header(decode_header(f'{user.get_full_name()} <{user.email}>'))) + subject = str(make_header(decode_header(pdf_name))) + + process_pdf(dest_path, frm, subject, workdir) + + +def process_pdf(pdf_path, from_address, subject, workdir): # find matches matches = find_marked_products(pdf_path, workdir, debug=0) if not matches: @@ -94,7 +122,8 @@ def process_attachment(from_address, subject, attachment): print(f'{len(matches)} product matches') # write spreadsheet - xls_path = write_spreadsheet(matches, workdir, pdf_base) + pdf_stem = Path(pdf_path).stem + xls_path = write_spreadsheet(matches, workdir, pdf_stem) if xls_path: # send reply