From 97ef16e47a63cd8c670cf256c7d6d5a5cca34d8b Mon Sep 17 00:00:00 2001
From: Seth Ladygo <alx-git@arclight.industries>
Date: Fri, 28 Feb 2020 16:23:33 -0800
Subject: [PATCH 1/3] markup: process Documents inklist annotations

---
 markup/img.py   | 19 +++++++++++++++++++
 markup/pdf.py   | 27 +++++++++++++++++++++++----
 markup/tasks.py | 39 ++++++++++++++++++++++++++++++++++-----
 3 files changed, 76 insertions(+), 9 deletions(-)

diff --git a/markup/img.py b/markup/img.py
index ebeb5f8..fd5993d 100644
--- a/markup/img.py
+++ b/markup/img.py
@@ -110,3 +110,22 @@ def write_debug_image(workdir, page_num, prods, scribbles):
 
     img.save(path)
     set_file_perms(path)
+
+
+def write_inklist(obj, path):
+    """Draw an image of the inklist."""
+    pagew = int(11*72)
+    pageh = int(8.5*72)
+
+    img = Image.new('RGBA', (pagew, pageh), (0, 0, 0, 0))
+    draw = ImageDraw.Draw(img, 'RGBA')
+
+    for segment in obj['InkList']:
+        draw.line(segment, 'black', 3)
+
+    # account for the difference in coordinate systems
+    # between pdf and images.
+    img = img.transpose(Image.FLIP_TOP_BOTTOM)
+
+    img.save(path)
+    set_file_perms(path)
diff --git a/markup/pdf.py b/markup/pdf.py
index dd1b7d1..56047e6 100644
--- a/markup/pdf.py
+++ b/markup/pdf.py
@@ -9,7 +9,8 @@ from pdfminer.pdfparser import PDFParser
 from pdfminer.pdfdocument import PDFDocument
 from pdfminer.pdftypes import PDFObjRef, resolve1
 
-from .utils import pdf_rect, ensure_dir, set_file_perms
+from .utils import Rect, pdf_rect, ensure_dir, set_file_perms
+from .img import write_inklist
 
 
 def make_product_box(obj, pagenum, mediabox):
@@ -39,7 +40,19 @@ def make_product_box(obj, pagenum, mediabox):
         return None
 
 
-def make_scribble(obj, pagenum, mediabox, workdir):
+def make_ink_scribble(obj, pagenum, mediabox, workdir):
+    oid = obj['NM'].decode('utf-8')
+    png_path = os.path.join(workdir, f"export-page{pagenum:03d}-nm{oid}.png")
+
+    write_inklist(obj, png_path)
+
+    return { 'page': pagenum,
+             'rect': Rect(*mediabox),
+             'objid': oid,
+             'image': png_path }
+
+
+def make_aapl_scribble(obj, pagenum, mediabox, workdir):
     rect = obj['Rect'] # position on page
 
     # walk the object tree down to the image
@@ -143,6 +156,10 @@ def write_pbm(obj, base_path):
     return path
 
 
+def is_inklist_annotation(anno):
+    return 'Subtype' in anno and anno["Subtype"] == LIT('Ink')
+
+
 def parse_pdf(fname, workdir, debug=0):
     PDFDocument.debug = debug
     PDFParser.debug = debug
@@ -173,8 +190,10 @@ def parse_pdf(fname, workdir, debug=0):
 
         for anno in annots:
             anno = resolve1(anno)
-            if 'AAPL:AKExtras' in anno:
-                scribbles.append(make_scribble(anno, pagenum, mediabox, workdir))
+            if is_inklist_annotation(anno):
+                scribbles.append(make_ink_scribble(anno, pagenum, mediabox, workdir))
+            elif 'AAPL:AKExtras' in anno:
+                scribbles.append(make_aapl_scribble(anno, pagenum, mediabox, workdir))
             elif 'ProCatName' in anno:
                 prod_boxes.append(make_product_box(anno, pagenum, mediabox))
             else:
diff --git a/markup/tasks.py b/markup/tasks.py
index c4b9d3b..0452e6c 100644
--- a/markup/tasks.py
+++ b/markup/tasks.py
@@ -2,13 +2,16 @@ from __future__ import absolute_import, unicode_literals
 from celery import task, shared_task
 from celery.utils.log import get_task_logger
 
-import os
-import re
-import sys
 import datetime
 import fileinput
+import os
+import re
+import shutil
 import smtplib
+import sys
+
 from pathlib import Path
+from os.path import basename, dirname, isfile
 
 from email.feedparser import FeedParser
 from email.message import EmailMessage
@@ -75,7 +78,6 @@ def process_attachment(from_address, subject, attachment):
     print(f'Using pdf name: {pdf_name}')
 
     pdf_base = Path(pdf_name).stem
-
     workdir = os.path.join(WORKDIR, clean_path(from_address), pdf_base)
     ensure_dir(workdir)
     pdf_path = os.path.join(workdir, pdf_name)
@@ -84,6 +86,32 @@ def process_attachment(from_address, subject, attachment):
         att.write(attachment.get_payload(decode=True))
     set_file_perms(pdf_path)
 
+    process_pdf(pdf_path, from_address, subject, workdir)
+
+
+@shared_task(on_failure=on_fail_handler)
+def process_markup_pdf(pdf_path, user):
+    if not Path(pdf_path).is_file():
+        print(f'No pdf - exiting ({pdf_path})')
+        return
+
+    pdf_stem = Path(pdf_path).stem
+    workdir = os.path.join(WORKDIR, clean_path(user.username), clean_path(pdf_stem))
+    ensure_dir(workdir)
+
+    pdf_name = Path(pdf_path).name
+    dest_path = os.path.join(workdir, pdf_name)
+    print(f'copying pdf to {dest_path}')
+    shutil.copy(pdf_path, dest_path)
+    set_file_perms(dest_path)
+
+    frm = str(make_header(decode_header(f'{user.get_full_name()} <{user.email}>')))
+    subject = str(make_header(decode_header(pdf_name)))
+
+    process_pdf(dest_path, frm, subject, workdir)
+
+
+def process_pdf(pdf_path, from_address, subject, workdir):
     # find matches
     matches = find_marked_products(pdf_path, workdir, debug=0)
     if not matches:
@@ -94,7 +122,8 @@ def process_attachment(from_address, subject, attachment):
     print(f'{len(matches)} product matches')
 
     # write spreadsheet
-    xls_path = write_spreadsheet(matches, workdir, pdf_base)
+    pdf_stem = Path(pdf_path).stem
+    xls_path = write_spreadsheet(matches, workdir, pdf_stem)
 
     if xls_path:
         # send reply

From dc85d784ab8245347751a70ed06e5d4e20184a3b Mon Sep 17 00:00:00 2001
From: Seth Ladygo <alx-git@arclight.industries>
Date: Fri, 28 Feb 2020 17:11:23 -0800
Subject: [PATCH 2/3] markup/email.py: don't send email if EMAIL_HOST setting
 is None

---
 markup/email.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/markup/email.py b/markup/email.py
index 34bd80f..2150ddf 100644
--- a/markup/email.py
+++ b/markup/email.py
@@ -85,6 +85,10 @@ def send_error_email(subj, einfo):
 
 
 def send(frm, subj, msg):
+    if not EMAIL_HOST:
+        log.info(f'not sending email')
+        return
+
     msg['From'] = 'Keen ProCatalog Markup Bot <markup@procatalog.io>'
     msg['Reply-To'] = 'Keen ProCatalog Support <support@procatalog.io>'
     msg['To'] = frm

From b625b4f16d8b2341897c519c5049204c3870303c Mon Sep 17 00:00:00 2001
From: Seth Ladygo <alx-git@arclight.industries>
Date: Fri, 28 Feb 2020 17:15:22 -0800
Subject: [PATCH 3/3] markup: support rectangle and circle annotations

---
 markup/img.py | 29 ++++++++++++++++++++++++++---
 markup/pdf.py | 29 +++++++++++++++++++++++++++--
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/markup/img.py b/markup/img.py
index fd5993d..e6c6a14 100644
--- a/markup/img.py
+++ b/markup/img.py
@@ -9,6 +9,8 @@ import dumper
 import random as rng
 from pathlib import Path
 
+from pdfminer.psparser import LIT
+
 from .utils import cv2_rect, ensure_dir, set_file_perms, WORKDIR
 
 # https://www.pyimagesearch.com/2014/10/20/finding-shapes-images-using-python-opencv/
@@ -112,10 +114,10 @@ def write_debug_image(workdir, page_num, prods, scribbles):
     set_file_perms(path)
 
 
-def write_inklist(obj, path):
+def write_inklist(obj, mediabox, path):
     """Draw an image of the inklist."""
-    pagew = int(11*72)
-    pageh = int(8.5*72)
+    pagew = mediabox[2] - mediabox[0]
+    pageh = mediabox[3] - mediabox[1]
 
     img = Image.new('RGBA', (pagew, pageh), (0, 0, 0, 0))
     draw = ImageDraw.Draw(img, 'RGBA')
@@ -129,3 +131,24 @@ def write_inklist(obj, path):
 
     img.save(path)
     set_file_perms(path)
+
+
+def write_square_or_circle(obj, mediabox, path):
+    """Draw an image of the inklist."""
+    pagew = mediabox[2] - mediabox[0]
+    pageh = mediabox[3] - mediabox[1]
+
+    img = Image.new('RGBA', (pagew, pageh), (0, 0, 0, 0))
+    draw = ImageDraw.Draw(img, 'RGBA')
+
+    if obj["Subtype"] == LIT('Square'):
+        draw.rectangle(obj['Rect'], fill=None, outline='black', width=3)
+    else:
+        draw.ellipse(*obj['Rect'], fill=None, outline='black', width=3)
+
+    # account for the difference in coordinate systems
+    # between pdf and images.
+    img = img.transpose(Image.FLIP_TOP_BOTTOM)
+
+    img.save(path)
+    set_file_perms(path)
diff --git a/markup/pdf.py b/markup/pdf.py
index 56047e6..3f064e8 100644
--- a/markup/pdf.py
+++ b/markup/pdf.py
@@ -10,7 +10,7 @@ from pdfminer.pdfdocument import PDFDocument
 from pdfminer.pdftypes import PDFObjRef, resolve1
 
 from .utils import Rect, pdf_rect, ensure_dir, set_file_perms
-from .img import write_inklist
+from .img import write_inklist, write_square_or_circle
 
 
 def make_product_box(obj, pagenum, mediabox):
@@ -44,7 +44,19 @@ def make_ink_scribble(obj, pagenum, mediabox, workdir):
     oid = obj['NM'].decode('utf-8')
     png_path = os.path.join(workdir, f"export-page{pagenum:03d}-nm{oid}.png")
 
-    write_inklist(obj, png_path)
+    write_inklist(obj, mediabox, png_path)
+
+    return { 'page': pagenum,
+             'rect': Rect(*mediabox),
+             'objid': oid,
+             'image': png_path }
+
+
+def make_square_or_circle_scribble(obj, pagenum, mediabox, workdir):
+    oid = obj['NM'].decode('utf-8')
+    png_path = os.path.join(workdir, f"export-page{pagenum:03d}-nm{oid}.png")
+
+    write_square_or_circle(obj, mediabox, png_path)
 
     return { 'page': pagenum,
              'rect': Rect(*mediabox),
@@ -160,6 +172,13 @@ def is_inklist_annotation(anno):
     return 'Subtype' in anno and anno["Subtype"] == LIT('Ink')
 
 
+def is_square_or_circle_annotation(anno):
+    if 'Subtype' in anno:
+        if anno["Subtype"] == LIT('Square') or anno["Subtype"] == LIT('Circle'):
+            return True
+    return False
+
+
 def parse_pdf(fname, workdir, debug=0):
     PDFDocument.debug = debug
     PDFParser.debug = debug
@@ -192,10 +211,16 @@ def parse_pdf(fname, workdir, debug=0):
             anno = resolve1(anno)
             if is_inklist_annotation(anno):
                 scribbles.append(make_ink_scribble(anno, pagenum, mediabox, workdir))
+            elif is_square_or_circle_annotation(anno):
+                scribbles.append(make_square_or_circle_scribble(anno, pagenum, mediabox, workdir))
             elif 'AAPL:AKExtras' in anno:
                 scribbles.append(make_aapl_scribble(anno, pagenum, mediabox, workdir))
             elif 'ProCatName' in anno:
                 prod_boxes.append(make_product_box(anno, pagenum, mediabox))
+            elif anno['Subtype'] == LIT('FreeText'):
+                print('ignoring FreeText annotation')
+            elif anno['Subtype'] == LIT('Highlight'):
+                print('ignoring Highlight annotation')
             else:
                 print('ignoring other annotation:')
                 print(anno)