Merge branch 'markup_tool'

2019-11-20 17:04:16 -08:00
parent fdf0187718 deaad1a272
commit 68b7f837ed
28 changed files with 1073 additions and 12 deletions
--- a/catalogedit/views.py
+++ b/catalogedit/views.py
@ -24,8 +24,8 @@ def catalogedit(request, id=0):
    seasons = Season.objects.order_by('ordering')
    context = {
        'catalogID': id,
-        'regions': [r.serialize() for r in regions],
-        'seasons': [s.serialize() for s in seasons],
+        'regions': [r.serialize() for r in regions if r.visible],
+        'seasons': [s.serialize() for s in seasons][::-1], # reversed
    }
    return render(request, 'catalogedit/catalogedit.html', context)

--- a/cataloglist/views.py
+++ b/cataloglist/views.py
@ -27,6 +27,6 @@ def my_catalogs(request):

@login_required
 def public_catalogs(request):
-    cats = Catalog.objects.filter(public=True).exclude(owner=request.user).order_by('-updated')
+    cats = Catalog.objects.filter(public=True).order_by('-updated')
    data = [c.summary() for c in cats]
    return JsonResponse({'catalogs': data})
--- a/markup/init.py
+++ b/markup/init.py
--- a/markup/email.py
+++ b/markup/email.py
@ -0,0 +1,109 @@
+import sys
+import string
+import random
+import smtplib
+from pathlib import Path
+from mailbox import Maildir
+from email.message import EmailMessage
+from email.utils import formatdate
+from email.header import Header, make_header
+
+from procat2.settings import EMAIL_HOST, EMAIL_HOST_USER, EMAIL_HOST_PASSWORD
+
+import logging
+log = logging.getLogger(__name__)
+
+
+body_ok = """Hi,
+
+Attached is a copy of your marked up catalog and a spreadsheet with
+the articles you selected.
+
+Enjoy,
+ProCatalog Markup Bot
+"""
+
+body_missing = """Hi,
+
+I couldn't find a pdf attached to your message.  I can't do much
+without a marked up catalog pdf, so please include that when you try
+again.
+
+Thanks,
+ProCatalog Markup Bot
+"""
+
+body_no_matches = """Hi,
+
+I couldn't find any products marked in your pdf.  Make sure you're
+using a ProCatalog pdf and that you've circled or otherwise scribbled
+over some material images or SKUs before submitting.
+
+Thanks,
+ProCatalog Markup Bot
+"""
+
+
+def reply(frm, subj, xls_path, pdf_path):
+    msg = EmailMessage()
+    msg.set_content(body_ok)
+    subj = f'Re: {subj}'
+
+    with open(xls_path, 'rb') as fp:
+        msg.add_attachment(fp.read(),
+                           maintype='application',
+                           subtype='vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+                           filename=Path(xls_path).name)
+
+    with open(pdf_path, 'rb') as fp:
+        msg.add_attachment(fp.read(),
+                           maintype='application',
+                           subtype='pdf',
+                           filename=Path(pdf_path).name)
+
+    send(frm, subj, msg)
+
+
+def reply_missing(frm, subj):
+    msg = EmailMessage()
+    msg.set_content(body_missing)
+    subj = f'Re: {subj}'
+    send(frm, subj, msg)
+
+
+def reply_no_matches(frm, subj):
+    msg = EmailMessage()
+    msg.set_content(body_no_matches)
+    subj = f'Re: {subj}'
+    send(frm, subj, msg)
+
+
+def send_error_email(subj, einfo):
+    msg = EmailMessage()
+    msg.set_content(einfo)
+    send('error@procatalog.io', subj, msg)
+
+
+def send(frm, subj, msg):
+    msg['From'] = 'Keen ProCatalog Markup Bot <markup@procatalog.io>'
+    msg['Reply-To'] = 'Keen ProCatalog Support <support@procatalog.io>'
+    msg['To'] = frm
+    msg['Bcc'] = 'alx-markup@procatalog.io'
+    msg['Subject'] = Header(subj).encode()
+    msg['Message-ID'] = msgid()
+    msg['Date'] = formatdate()
+
+    maildir = Maildir('/tmp/markup_submit_mail')
+    maildir.add(msg.as_bytes())
+
+    log.info(f'sending email to "{frm}": {subj}')
+
+    with smtplib.SMTP(EMAIL_HOST) as s:
+        s.starttls()
+        s.login(EMAIL_HOST_USER, EMAIL_HOST_PASSWORD)
+        s.send_message(msg)
+
+
+def msgid():
+    rand = ''.join(random.choices(string.ascii_uppercase + string.ascii_lowercase + string.digits, k=16))
+    return f'<{rand}@markup.procatalog.io>'
--- a/markup/img.py
+++ b/markup/img.py
@ -0,0 +1,108 @@
+import os
+import shutil
+
+from PIL import Image, ImageFilter, ImageDraw, ImageFont
+import numpy
+import imutils
+import cv2
+import dumper
+import random as rng
+from pathlib import Path
+
+from .utils import cv2_rect, ensure_dir, set_file_perms, WORKDIR
+
+# https://www.pyimagesearch.com/2014/10/20/finding-shapes-images-using-python-opencv/
+
+
+def find_shapes(image_path):
+    """Find shapes in the image, returning bounding boxes around each.
+    Writes debug images next to the input image.
+    """
+    path = Path(image_path)
+
+    img = Image.open(image_path, 'r')
+    if not img.mode in ('RGBA', 'LA'):
+        print('no alpha channel: {}'.format(img.mode))
+        return None
+
+    alpha_layer = img.convert('RGBA').split()[-1]
+
+    alpha_layer = alpha_layer.filter(ImageFilter.GaussianBlur(5))
+
+    threshold = 5
+    alpha_layer = alpha_layer.point(lambda p: p > threshold and 255)
+    threshold = numpy.array(alpha_layer)
+
+    # alternate method
+    # blurred = cv2.GaussianBlur(gray, (5, 5), 0)
+    # thresh = cv2.threshold(blurred, 60, 255, cv2.THRESH_BINARY)[1]
+
+    thresh_path = str(path.with_suffix('.thresh.png'))
+    cv2.imwrite(thresh_path, threshold)
+    os.chmod(thresh_path, 0o664)
+    shutil.chown(thresh_path, group='procat')
+
+    contours = cv2.findContours(threshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
+    contours = imutils.grab_contours(contours)
+
+    bboxes = []
+    for c in contours:
+        # bounding rect
+        x, y, w, h = cv2.boundingRect(c)
+        # essentially center of mass
+        # NOT center of the bbox!
+        # M = cv2.moments(c)
+        # if M["m00"] == 0: M["m00"] = 0.00001
+        # cX = int(M["m10"] / M["m00"])
+        # cY = int(M["m01"] / M["m00"])
+        bboxes.append(cv2_rect(x, y, w, h))
+
+    # draw contours
+    contour_image = numpy.zeros((threshold.shape[0], threshold.shape[1], 3), dtype=numpy.uint8)
+    for i in range(len(contours)):
+        color = (rng.randint(0,512), rng.randint(0,512), rng.randint(0,512))
+        cv2.drawContours(contour_image, contours, i, color)
+        rect = bboxes[i]
+        cv2.rectangle(contour_image, (rect.left, rect.top), (rect.right, rect.bottom), color, 1)
+        # cv2.circle(contour_image, (cX, cY), 2, color, -1)
+        # cv2.putText(contour_image, "center", (cX - 20, cY - 15),
+        #             cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 1)
+
+    contour_path = str(path.with_suffix('.contour.png'))
+    cv2.imwrite(contour_path, contour_image)
+    os.chmod(contour_path, 0o664)
+    shutil.chown(contour_path, group='procat')
+
+    return img.width, img.height, bboxes
+
+
+def write_debug_image(workdir, page_num, prods, scribbles):
+    """Draw an image with boxes for products, images, and shapes."""
+    ensure_dir(workdir)
+    path = os.path.join(workdir, f"debug-page{page_num:03d}.png")
+
+    pagew = int(11*72)
+    pageh = int(8.5*72)
+
+    img = Image.new('RGBA', (pagew, pageh), 'white')
+    draw = ImageDraw.Draw(img, 'RGBA')
+    fnt = ImageFont.truetype('/usr/share/fonts/truetype/lato/Lato-Regular.ttf', 10)
+
+    for prod in filter(lambda p: p['page'] == page_num, prods):
+        rect = prod['rect']
+        fill_color = "hsv(120, 22%, 100%)" if 'matched' in prod else None
+        outline_color = "hsv(120, 50%, 100%)"
+        draw.rectangle((rect.p1(pageh), rect.p2(pageh)),
+                       fill=fill_color, outline=outline_color, width=2)
+        bl = rect.p1(pageh)
+        draw.text((bl[0] + 3, bl[1] + 3), prod['material'],
+                  font=fnt, fill="hsv(120, 22%, 50%)")
+
+    for scribble in filter(lambda s: s['page'] == page_num, scribbles):
+        rect = scribble['rect']
+        draw.rectangle((rect.p1(pageh), rect.p2(pageh)), outline="hsv(210, 22%, 100%)", width=2)
+        for box in scribble['bboxes']:
+            draw.rectangle((box.p1(pageh), box.p2(pageh)), outline="hsv(0, 22%, 100%)", width=2)
+
+    img.save(path)
+    set_file_perms(path)
--- a/markup/matching.py
+++ b/markup/matching.py
@ -0,0 +1,62 @@
+from .img import find_shapes, write_debug_image
+from .pdf import parse_pdf
+from .utils import overlaps
+
+
+def find_marked_products(pdf, workdir, debug=0):
+    """Main entry point.  Give a pdf, get matches."""
+    (prods, scribbles) = parse_pdf(pdf, workdir, debug)
+
+    if not prods or len(prods) < 1:
+        print('no product placement markers found')
+        return None
+
+    find_scribbles_shapes(scribbles)
+    matches = find_matches(prods, scribbles, 0.10)
+
+    for s in scribbles:
+        write_debug_image(workdir, s['page'], prods, scribbles)
+
+    return matches
+
+
+def find_scribbles_shapes(scribbles):
+    for scribble in scribbles:
+        imgw, imgh, shapes = find_shapes(scribble['image'])
+        rects = [transform(scribble['rect'], imgw, imgh, s) for s in shapes]
+        scribble['bboxes'] = rects
+
+
+def transform(pdf_rect, imgw, imgh, shape):
+    """Convert scribble from image coords to pdf coords"""
+    # get scale factor for image coords
+    # to convert to pdf coordinates
+    pdfw = pdf_rect.right - pdf_rect.left
+    pdfh = pdf_rect.bottom - pdf_rect.top
+    scalew = pdfw / imgw
+    scaleh = pdfh / imgh
+    return shape.scale(scalew, scaleh).translate(pdf_rect.left, pdf_rect.top)
+
+
+def find_matches(all_prods, scribbles, overlap_threshold):
+    # segment by page
+    page_prods = {}
+    for p in all_prods:
+        pagenum = p['page']
+        if pagenum in page_prods:
+            page_prods[pagenum].append(p)
+        else:
+            page_prods[pagenum] = [p]
+
+    matches = []
+    for s in scribbles:
+        pagenum = s['page']
+        if not pagenum in page_prods: continue
+        prods = page_prods[pagenum]
+        for p in prods:
+            for box in s['bboxes']:
+                if overlaps(p['rect'], box, overlap_threshold):
+                    p['matched'] = s
+                    matches.append(p)
+
+    return matches
--- a/markup/pdf.py
+++ b/markup/pdf.py
@ -0,0 +1,127 @@
+import os
+import sys
+import subprocess
+import shutil
+
+from pdfminer.pdfparser import PDFParser
+from pdfminer.pdfdocument import PDFDocument
+from pdfminer.pdftypes import PDFObjRef, resolve1
+
+from .utils import pdf_rect, ensure_dir, set_file_perms
+
+
+def make_product_box(obj, pagenum, mediabox):
+    rect = obj['Rect']
+
+    if rect:
+        name = obj['ProCatName'].decode() if 'ProCatName' in obj else ''
+        material = obj['ProCatMaterialNumber'].decode() if 'ProCatMaterialNumber' in obj else ''
+        color = obj['ProCatColor'].decode() if 'ProCatColor' in obj else ''
+        gender = obj['ProCatGender'].decode() if 'ProCatGender' in obj else ''
+        season = obj['ProCatSeason'].decode() if 'ProCatSeason' in obj else ''
+        size = obj['ProCatSize'].decode() if 'ProCatSize' in obj else ''
+        category = obj['ProCatCategory'].decode() if 'ProCatCategory' in obj else ''
+
+        return { 'material': material,
+                 'name': name,
+                 'color': color,
+                 'gender': gender,
+                 'season': season,
+                 'size': size,
+                 'category': category,
+                 'rect': pdf_rect(rect, mediabox[3]),
+                 'page': pagenum }
+    else:
+        print('Annotation without rect:')
+        print(dumper.dump(obj))
+        return None
+
+
+def make_scribble(obj, pagenum, mediabox, workdir):
+    rect = obj['Rect'] # position on page
+
+    # walk the object tree down to the image
+    appearance = resolve1(obj['AP'])
+    normal_appearance = appearance['N']
+    if not normal_appearance or normal_appearance.objid <= 0:
+        print('skipping scribble - no normal appearance')
+        return
+
+    normal_appearance = resolve1(normal_appearance)
+    resources = resolve1(normal_appearance['Resources'])
+    xobj = resolve1(resources['XObject'])
+    im1 = resolve1(xobj['Im1']) # PDFStream of the image
+
+    flter = im1['Filter']
+    if flter.name == 'JPXDecode':
+        path = export_jp2(im1, workdir, pagenum)
+        return { 'page': pagenum,
+                 'rect': pdf_rect(rect, mediabox[3]),
+                 'objid': im1.objid,
+                 'image': path }
+    else:
+        print('skipping non-jp2 image')
+        return None
+
+
+def export_jp2(obj, workdir, pagenum):
+    oid = obj.objid
+    ensure_dir(workdir)
+    jp2_path = os.path.join(workdir, f"export-page{pagenum:03d}-obj{oid:05d}.jp2")
+    png_path = os.path.join(workdir, f"export-page{pagenum:03d}-obj{oid:05d}.png")
+
+    data = obj.get_rawdata()
+    print('extracting jp2: {}'.format(jp2_path))
+    with open(jp2_path, 'wb') as out:
+        out.write(data)
+        set_file_perms(jp2_path)
+
+    result = subprocess.run(['opj_decompress', '-i', jp2_path, '-o', png_path], capture_output=True)
+    if result.returncode != 0:
+        print('ERROR converting {}:\n{}\n{}'.format(jp2_path, result.stdout.decode(), result.stderr.decode()))
+    else:
+        set_file_perms(png_path)
+
+    return png_path
+
+
+def parse_pdf(fname, workdir, debug=0):
+    PDFDocument.debug = debug
+    PDFParser.debug = debug
+
+    fp = open(fname, 'rb')
+    parser = PDFParser(fp)
+    doc = PDFDocument(parser)
+
+    prod_boxes = []
+    scribbles = []
+
+    page_dict = resolve1(doc.catalog['Pages'])
+    pages = resolve1(page_dict['Kids'])
+    pagenum = 0
+    for page in pages:
+        pagenum += 1
+        page = resolve1(page)
+        if not 'Annots' in page: continue
+
+        mediabox = page['MediaBox']
+        # if 'CropBox' in page:
+        #     cropbox = page['CropBox']
+        #     print('crop',cropbox)
+
+        annots = page['Annots']
+        if isinstance(annots, PDFObjRef):
+            annots = resolve1(annots)
+
+        for anno in annots:
+            anno = resolve1(anno)
+            if 'AAPL:AKExtras' in anno:
+                scribbles.append(make_scribble(anno, pagenum, mediabox, workdir))
+            elif 'ProCatName' in anno:
+                prod_boxes.append(make_product_box(anno, pagenum, mediabox))
+            else:
+                print('ignoring other annotation')
+
+    fp.close()
+
+    return [list(filter(None, prod_boxes)), list(filter(None, scribbles))]
--- a/markup/spreadsheet.py
+++ b/markup/spreadsheet.py
@ -0,0 +1,86 @@
+import os
+from itertools import zip_longest
+
+from openpyxl import Workbook
+from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font
+
+from .utils import ensure_dir, set_file_perms, WORKDIR
+
+
+def format_season(s):
+    if not s or len(s) < 4:
+        return s
+
+    # 'FW20' -> 'F20'
+    return s[:1] + s[2:]
+
+
+def format_name(name, gender):
+    return '{}-{}'.format(name, gender[:1])
+
+
+def write_spreadsheet(matches, workdir, file_base):
+    if not matches:
+        print('write_spreadsheet: no matches.  skipping.')
+        return None
+
+    header_font = Font(name='Calibri', size=12, bold=True)
+    body_font = Font(name='Calibri', size=12)
+    header_fill = PatternFill(start_color="cccccc", end_color="cccccc", fill_type="solid")
+    body_fill = PatternFill(start_color="eeeeee", end_color="eeeeee", fill_type="solid")
+    thin_side = Side(border_style='thin', color='000000')
+    border = Border(bottom=thin_side)
+
+    wb = Workbook()
+    ws = wb.active
+
+    # header row
+    ws.append(['style number', 'product name', 'season', 'color', 'category', 'size range'])
+
+    # style the header row
+    ws.column_dimensions['A'].width = 15
+    ws.column_dimensions['B'].width = 30
+    ws.column_dimensions['C'].width = 10
+    ws.column_dimensions['D'].width = 30
+    ws.column_dimensions['E'].width = 15
+    ws.column_dimensions['F'].width = 35
+
+    for f in ('A1', 'B1', 'C1', 'D1', 'E1', 'F1'):
+        ws[f].font = header_font
+        ws[f].fill = header_fill
+        ws[f].border = border
+
+    # TODO: sort matches
+
+    seen = {}
+
+    for m in matches:
+        # in the case of kids,
+        # we might have multiple products in a match
+        seasons    = m['season'].lower().split('\n')
+        genders    = m['gender'].lower().split('\n')
+        names      = m['name'].lower().split('\n')
+        materials  = m['material'].lower().split('\n')
+        colors     = m['color'].lower().split('\n')
+        sizes      = m['size'].lower().split('\n')
+        categories = m['category'].lower().split('\n')
+
+        for s, g, n, m, c, sz, ct in zip_longest(seasons, genders, names, materials, colors, sizes, categories, fillvalue=''):
+            if not m in seen:
+                ws.append([m, format_name(n, g), format_season(s), c, ct, sz])
+                seen[m] = True
+
+    # style body
+    for row in ws.iter_rows(min_row=2, max_row=None, max_col=None):
+        for cell in row:
+            cell.font = body_font
+            cell.fill = body_fill
+            cell.border = border
+
+    # save
+    ensure_dir(workdir)
+    path = os.path.join(workdir, f"{file_base}.xlsx")
+    wb.save(path)
+    set_file_perms(path)
+
+    return path
--- a/markup/tasks.py
+++ b/markup/tasks.py
@ -0,0 +1,105 @@
+from __future__ import absolute_import, unicode_literals
+from celery import task, shared_task
+from celery.utils.log import get_task_logger
+
+import os
+import re
+import sys
+import datetime
+import fileinput
+import smtplib
+from pathlib import Path
+
+from email.feedparser import FeedParser
+from email.message import EmailMessage
+from email.header import decode_header, make_header
+
+import django
+from django.conf import settings
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'procat2.settings')
+django.setup()
+
+from .utils import clean_path, ensure_dir, set_file_perms, WORKDIR
+from .email import reply, reply_missing, reply_no_matches, send_error_email
+from .matching import find_marked_products
+from .spreadsheet import write_spreadsheet
+from procat2.settings import TREE_NAME
+
+logger = get_task_logger(__name__)
+
+
+def on_fail_handler(self, exc, task_id, args, kwargs, einfo):
+    """Send an email if a task throws an exception."""
+    print(str(einfo))
+    send_error_email(f'ERROR: {TREE_NAME} celery task {task_id}', str(einfo))
+
+
+# @shared_task(on_failure=on_fail_handler)
+# def test_fail(x, y):
+#     test_fail_internal()
+
+# def test_fail_internal():
+#     raise KeyError()
+
+
+@shared_task(on_failure=on_fail_handler)
+def process_message(path):
+    parser = FeedParser()
+    with open(path) as f:
+        for line in f:
+            parser.feed(line)
+    msg = parser.close()
+
+    frm = str(make_header(decode_header(msg['From'])))
+    subject = str(make_header(decode_header(msg['Subject'])))
+
+    found_pdf = False
+    for attach in msg.walk():
+        if attach.get_content_type() == 'application/pdf':
+            process_attachment(frm, subject, attach)
+            found_pdf = True
+
+    if not found_pdf:
+        reply_missing(frm, subject)
+
+
+def process_attachment(from_address, subject, attachment):
+    # write out pdf
+    pdf_name = attachment.get_filename()
+    pdf_name = str(make_header(decode_header(pdf_name)))
+
+    # if pdf name is in UUID format, use email subject
+    if re.match(r'[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\.pdf', pdf_name):
+        pdf_name = f'{subject}.pdf'
+
+    print(f'Using pdf name: {pdf_name}')
+
+    pdf_base = Path(pdf_name).stem
+
+    workdir = os.path.join(WORKDIR, clean_path(from_address), pdf_base)
+    ensure_dir(workdir)
+    pdf_path = os.path.join(workdir, pdf_name)
+    print(f'saving pdf to {pdf_path}')
+    with open(pdf_path, 'wb') as att:
+        att.write(attachment.get_payload(decode=True))
+    set_file_perms(pdf_path)
+
+    # find matches
+    matches = find_marked_products(pdf_path, workdir, debug=0)
+    if not matches:
+        print('no product matches')
+        reply_no_matches(from_address, subject)
+        return
+
+    print(f'{len(matches)} product matches')
+
+    # write spreadsheet
+    xls_path = write_spreadsheet(matches, workdir, pdf_base)
+
+    if xls_path:
+        # send reply
+        print(f'wrote spreadsheet: {xls_path}')
+        reply(from_address, subject, xls_path, pdf_path)
+    else:
+        # send error
+        print(f'error creating spreadsheet')
--- a/markup/urls.py
+++ b/markup/urls.py
@ -0,0 +1,8 @@
+from django.urls import path
+
+from . import views
+
+urlpatterns = [
+    path('submit', views.submit, name='markup_submit'),
+    #path('fail', views.fail, name='markup_fail'),
+]
--- a/markup/utils.py
+++ b/markup/utils.py
@ -0,0 +1,94 @@
+import os
+import re
+import shutil
+
+from django.conf import settings
+
+WORKDIR = os.path.join(settings.ASSET_DIR, 'markup', 'work')
+
+
+def pdf_rect(rect, container_height):
+    x1 = min(rect[0], rect[2])
+    y1 = max(rect[1], rect[3])
+    x2 = max(rect[0], rect[2])
+    y2 = min(rect[1], rect[3])
+    # and convert from pdf to image coords
+    return Rect(x1, container_height - y1, x2, container_height - y2)
+
+
+def cv2_rect(l, t, w, h):
+    return Rect(l, t, l + w, t + h)
+
+
+def overlaps(r1, r2, threshold):
+    A = r1.to_dict()
+    B = r2.to_dict()
+
+    # https://stackoverflow.com/questions/9324339/how-much-do-two-rectangles-overlap
+    SA = A['w'] * A['h']
+    SB = B['w'] * B['h']
+    SI = max([0, 1 + min([A['x2'], B['x2']]) - max([A['x1'], B['x1']])]) * max([0, 1 + min([A['y2'], B['y2']]) - max([A['y1'], B['y1']])])
+    SU = SA + SB - SI
+    overlap = float(SI) / float(SU)
+
+    #print('overlap: {}%'.format(int(overlap * 100)))
+    return overlap > threshold
+
+
+class Rect(object):
+
+    def __init__(self, l, t, r, b):
+        self.left = l
+        self.top = t
+        self.right = r
+        self.bottom = b
+
+    def translate(self, x, y):
+        self.left += x
+        self.top += y
+        self.right += x
+        self.bottom += y
+        return self
+
+    def scale(self, x, y):
+        self.left *= x
+        self.top *= y
+        self.right *= x
+        self.bottom *= y
+        return self
+
+    def p1(self, page_height):
+        return (self.left, self.top)
+
+    def p2(self, page_height):
+        return (self.right, self.bottom)
+
+    def to_dict(self):
+        return {'x1': self.left,
+                'y1': self.top,
+                'x2': self.right,
+                'y2': self.bottom,
+                'w': self.right - self.left,
+                'h': self.bottom - self.top }
+
+    def __repr__(self):
+        return 'Rect[l={}, t={}, r={}, b={}]'.format(int(self.left), int(self.top), int(self.right), int(self.bottom))
+
+
+def ensure_dir(dir):
+    if not os.path.exists(dir):
+        os.makedirs(dir)
+    os.chmod(dir, 0o775)
+    shutil.chown(dir, group='procat')
+
+
+def set_file_perms(file):
+    os.chmod(file, 0o664)
+    shutil.chown(file, group='procat')
+
+
+def clean_path(path):
+    """Replace filesystem-hostile characters"""
+    path = re.sub(r'[<>]', '', path)
+    path = re.sub(r'[^\w@]', '_', path)
+    return path
--- a/markup/views.py
+++ b/markup/views.py
@ -0,0 +1,51 @@
+import os
+import logging
+import humanize
+from tempfile import mkstemp
+from shutil import copyfile
+
+from django.core import serializers
+from django.http import HttpResponseRedirect, HttpResponse, JsonResponse
+from django.shortcuts import render, get_object_or_404
+from django.views.decorators.csrf import csrf_exempt
+from django.views.decorators.http import require_http_methods
+from django.core.files.uploadhandler import TemporaryFileUploadHandler
+
+from .tasks import process_message
+#from .tasks import test_fail
+
+log = logging.getLogger(__name__)
+
+
+
+# @csrf_exempt
+# def fail(request):
+#     test_fail.delay(1, 2)
+#     return JsonResponse({'success': True}, safe=False)
+
+
+@csrf_exempt
+@require_http_methods(["POST"])
+def submit(request):
+    # always upload into a file
+    request.upload_handlers = [TemporaryFileUploadHandler(request)]
+
+    body = request.body
+    if not body or len(body) < 1:
+        return HttpResponse('Bad request: no data', status=400)
+
+    msg_file = request.FILES['file']
+    if not msg_file:
+        return HttpResponse('Bad request: no file', status=400)
+
+    msg_size = humanize.naturalsize(msg_file.size, gnu=True)
+    log.debug('message file size: {}'.format(msg_size))
+
+    _, tmpfile = mkstemp(suffix='.eml', prefix='markup_', dir=None, text=False)
+    log.debug('copy message file from {} to {}'.format(msg_file.temporary_file_path(), tmpfile))
+    copyfile(msg_file.temporary_file_path(), tmpfile)
+    os.chmod(tmpfile, 0o666)
+
+    process_message.delay(tmpfile)
+
+    return JsonResponse({'success': True}, safe=False)
--- a/markup/work/init.py
+++ b/markup/work/init.py
--- a/markup/work/test_all.py
+++ b/markup/work/test_all.py
@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+
+import sys
+import os
+import re
+import inspect
+from pathlib import Path
+
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(currentdir)
+parentparentdir = os.path.dirname(parentdir)
+sys.path.insert(0, parentparentdir)
+
+import dumper
+import getopt
+import django
+from django.conf import settings
+
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'procat2.settings')
+django.setup()
+
+from markup.utils import WORKDIR, clean_path
+from markup.matching import find_marked_products
+from markup.spreadsheet import write_spreadsheet
+
+
+def main(argv):
+    def usage():
+        print('usage: %s -s subdir [-d] file.pdf' % argv[0])
+        return 100
+    try:
+        (opts, args) = getopt.getopt(argv[1:], 'd')
+    except getopt.GetoptError:
+        return usage()
+    if not args: return usage()
+    debug = 0
+    subdir = 'test'
+    for (k, v) in opts:
+        if k == '-d': debug += 1
+        elif k == '-s': subdir = v
+
+    fname = args[0]
+    path = Path(fname)
+    workdir = os.path.join(WORKDIR, 'test', clean_path(path.stem))
+
+    matches = find_marked_products(fname, workdir, debug)
+    print(f'{len(matches)} product matches')
+    write_spreadsheet(matches, workdir, path.stem)
+
+
+if __name__ == '__main__': sys.exit(main(sys.argv))
--- a/markup/work/test_email.py
+++ b/markup/work/test_email.py
@ -0,0 +1,37 @@
+#!/usr/bin/env python3
+#
+# process an .eml file through the whole markup process
+
+import sys
+import os
+import inspect
+
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(currentdir)
+parentparentdir = os.path.dirname(parentdir)
+sys.path.insert(0, parentparentdir)
+
+import getopt
+import django
+from django.conf import settings
+
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'procat2.settings')
+django.setup()
+
+from markup.tasks import process_message
+
+
+def main(argv):
+    def usage():
+        print('usage: %s file.eml' % argv[0])
+        return 100
+    try:
+        (opts, args) = getopt.getopt(argv[1:], '')
+    except getopt.GetoptError:
+        return usage()
+    if not args: return usage()
+
+    process_message(args[0])
+
+
+if __name__ == '__main__': sys.exit(main(sys.argv))
--- a/markup/work/test_pdf.py
+++ b/markup/work/test_pdf.py
@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+import sys
+import os
+import inspect
+
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(currentdir)
+parentparentdir = os.path.dirname(parentdir)
+sys.path.insert(0, parentparentdir)
+
+import getopt
+import django
+from django.conf import settings
+
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'procat2.settings')
+django.setup()
+
+from markup.pdf import parse_pdf
+
+
+def main(argv):
+    def usage():
+        print('usage: %s [-d] file ...' % argv[0])
+        return 100
+    try:
+        (opts, args) = getopt.getopt(argv[1:], 'd')
+    except getopt.GetoptError:
+        return usage()
+    if not args: return usage()
+    debug = 0
+    for (k, v) in opts:
+        if k == '-d': debug += 1
+
+    (prods, scribbles) = parse_pdf(args[0], debug)
+    print('prods', scribbles)
+
+
+if __name__ == '__main__': sys.exit(main(sys.argv))
--- a/markup/work/test_scribble.py
+++ b/markup/work/test_scribble.py
@ -0,0 +1,39 @@
+#!/usr/bin/env python3
+import sys
+import os
+import inspect
+
+currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+parentdir = os.path.dirname(currentdir)
+parentparentdir = os.path.dirname(parentdir)
+sys.path.insert(0, parentparentdir)
+
+#import dumper
+import getopt
+import django
+from django.conf import settings
+
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'procat2.settings')
+django.setup()
+
+from markup.img import find_shapes
+
+
+def main(argv):
+    def usage():
+        print('usage: %s [-d] file ...' % argv[0])
+        return 100
+    try:
+        (opts, args) = getopt.getopt(argv[1:], 'd')
+    except getopt.GetoptError:
+        return usage()
+    if not args: return usage()
+    debug = 0
+    for (k, v) in opts:
+        if k == '-d': debug += 1
+
+    boxes = find_shapes(args[0])
+    print(boxes)
+
+
+if __name__ == '__main__': sys.exit(main(sys.argv))
--- a/procat2/init.py
+++ b/procat2/init.py
@ -0,0 +1,5 @@
+from __future__ import absolute_import, unicode_literals
+
+from .celery import app as celery_app
+
+__all__ = ('celery_app',)
--- a/procat2/celery.py
+++ b/procat2/celery.py
@ -0,0 +1,15 @@
+from __future__ import absolute_import, unicode_literals
+import os
+from celery import Celery
+
+os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'procat2.settings')
+
+app = Celery('procat2')
+
+app.config_from_object('django.conf:settings', namespace='CELERY')
+
+app.autodiscover_tasks()
+
+# @app.task(bind=True)
+# def debug_task(self):
+#     print('Request: {0!r}'.format(self.request))
--- a/procat2/management/commands/add_user.py
+++ b/procat2/management/commands/add_user.py
@ -0,0 +1,33 @@
+import re
+
+from django.core.management.base import BaseCommand, CommandError
+from django.contrib.auth.models import User
+
+
+class Command(BaseCommand):
+    help = 'Add a procat2 user'
+
+    def add_arguments(self, parser):
+        parser.add_argument('first_name', type=str)
+        parser.add_argument('last_name', type=str)
+        parser.add_argument('email', type=str)
+
+    def handle(self, *args, **options):
+        fname = options['first_name']
+        lname = options['last_name']
+        email = options['email']
+        email = re.sub('[<>]', '', email)
+
+        uname = str(lname[:5] + fname[:3]).lower()
+        pw = str(fname[0] + lname[0] + 'visual').lower()
+
+        user = User.objects.create_user(uname, password=pw)
+        user.first_name = fname
+        user.last_name = lname
+        user.email = email
+        user.is_superuser = False
+        user.is_staff = False
+        user.is_active = True
+        user.save()
+
+        self.stdout.write(self.style.SUCCESS(f'Added user "{uname}" with password "{pw}" ({email})'))
--- a/procat2/management/commands/cat_ids.py
+++ b/procat2/management/commands/cat_ids.py
@ -0,0 +1,19 @@
+from django.core.management.base import BaseCommand, CommandError
+from procat2.models import Catalog
+
+
+class Command(BaseCommand):
+    help = 'Return material numbers for a catalog'
+
+    def add_arguments(self, parser):
+        parser.add_argument('cat_ids', nargs='+', type=int)
+
+    def handle(self, *args, **options):
+        for cat_id in options['cat_ids']:
+            try:
+                cat = Catalog.objects.get(pk=cat_id)
+            except Catalog.DoesNotExist:
+                raise CommandError('Catalog "%s" does not exist' % cat_id)
+
+            for id in cat.product_ids():
+                self.stdout.write(id)
--- a/procat2/migrations/0007_region_visible.py
+++ b/procat2/migrations/0007_region_visible.py
@ -0,0 +1,18 @@
+# Generated by Django 2.2.4 on 2019-11-01 23:37
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('procat2', '0006_catalog_email'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='region',
+            name='visible',
+            field=models.BooleanField(default=True),
+        ),
+    ]
--- a/procat2/models.py
+++ b/procat2/models.py
@ -30,12 +30,14 @@ class Region(models.Model):
    id = models.CharField(max_length=30, primary_key=True)
    name = models.CharField(max_length=100)
    ordering = models.PositiveIntegerField(unique=True, default=1000)
+    visible = models.BooleanField(default=True)

    def serialize(self):
        return {
            'id': self.id,
            'name': self.name,
            'ordering': self.ordering,
+            'visible': self.visible
        }


@ -58,7 +60,7 @@ class Catalog(models.Model):
    created = models.DateTimeField(auto_now_add=True)
    updated = models.DateTimeField(auto_now=True, db_index=True)
    name = models.CharField(max_length=300)
-    email = models.CharField(max_length=300, null=True)
+    email = models.CharField(max_length=300, null=True, blank=True)
    public = models.BooleanField(default=False, db_index=True)
    pages = models.PositiveIntegerField(default=0)
    sections = models.PositiveIntegerField(default=0)
--- a/procat2/settings.py
+++ b/procat2/settings.py
@ -9,8 +9,15 @@ https://docs.djangoproject.com/en/2.1/topics/settings/
 For the full list of settings and their values, see
 https://docs.djangoproject.com/en/2.1/ref/settings/
 """
+from __future__ import absolute_import, unicode_literals

 import os
+
+# ^^^ The above is required if you want to import from the celery
+# library.  If you don't have this then `from celery.schedules import`
+# becomes `proj.celery.schedules` in Python 2.x since it allows
+# for relative imports by default.
+
 from django.urls import reverse_lazy

 # Build paths inside the project like this: os.path.join(BASE_DIR, ...)
@ -77,6 +84,12 @@ LOGGING = {
            #'level': os.getenv('DJANGO_LOG_LEVEL', 'INFO'),
            'propagate': True,
        },
+        'markup': {
+            'handlers': ['console', 'file'],
+            'level': 'DEBUG',
+            #'level': os.getenv('DJANGO_LOG_LEVEL', 'INFO'),
+            'propagate': True,
+        },
        'django': {
            'handlers': ['console', 'file'],
            #'level': 'DEBUG',
@ -101,6 +114,7 @@ INSTALLED_APPS = [
    'django.contrib.staticfiles',
    'django.contrib.sites',
    'django_extensions',
+    'django_celery_results',
    'account',
    'lazysignup',
    'webpack_loader',
@ -108,6 +122,7 @@ INSTALLED_APPS = [
    'dashboard',
    'products',
    'quickinfo',
+    'markup',
 ]

 MIDDLEWARE = [
@ -227,3 +242,13 @@ WEBPACK_LOADER = {
        'IGNORE': [r'.+\.hot-update.js', r'.+\.map']
    }
 }
+
+# allow large file uploads
+# for example the markup tool receiving emails
+DATA_UPLOAD_MAX_MEMORY_SIZE = FILE_UPLOAD_MAX_MEMORY_SIZE = 200 * 1024 * 1024  # 200MB
+
+# celery settings
+CELERY_BROKER_URL = 'amqp://guest:guest@localhost//'
+CELERY_ACCEPT_CONTENT = ['json']
+CELERY_RESULT_BACKEND = 'django-db'
+CELERY_TASK_SERIALIZER = 'json'
--- a/procat2/urls.py
+++ b/procat2/urls.py
@ -50,6 +50,8 @@ urlpatterns = [
    path('convert/done/', lazy_convert_done, name='lazysignup_convert_done'),

    path('quickinfo/', include('quickinfo.urls')),
+
+    path('markup/', include('markup.urls')),
 ]

 if settings.DJDT:
--- a/requirements.txt
+++ b/requirements.txt
@ -1,20 +1,37 @@
+amqp==2.5.1
 backcall==0.1.0
+billiard==3.6.1.0
+celery==4.3.0
 decorator==4.4.0
 Django==2.2.4
 django-appconf==1.0.3
+django-celery-results==1.1.2
 django-debug-toolbar==1.11
 django-extensions==2.1.6
 django-lazysignup==2.0.0
 django-settings-export==1.2.1
 django-user-accounts==2.1.0
 django-webpack-loader==0.6.0
+Dumper==1.2.0
+et-xmlfile==1.0.1
+humanize==0.5.1
+importlib-metadata==0.23
+imutils==0.5.3
 ipdb==0.11
 ipython==7.3.0
 ipython-genutils==0.2.0
+jdcal==1.4.1
 jedi==0.13.3
+kombu==4.6.5
+more-itertools==7.2.0
+numpy==1.17.2
+opencv-python==4.1.1.26
+openpyxl==3.0.0
 parso==0.3.4
+pdfminer==20191010
 pexpect==4.6.0
 pickleshare==0.7.5
+Pillow==6.2.0
 prompt-toolkit==2.0.9
 psycopg2-binary==2.7.7
 ptyprocess==0.6.0
@ -23,5 +40,7 @@ pytz==2018.9
 six==1.12.0
 sqlparse==0.3.0
 traitlets==4.3.2
+vine==1.3.0
 wcwidth==0.1.7
 Werkzeug==0.14.1
+zipp==0.6.0
--- a/templates/dashboard/dashboard.html
+++ b/templates/dashboard/dashboard.html
@ -7,6 +7,13 @@
 <div class="uk-section">
  <div class="uk-container">

+    <div class="uk-flex uk-flex-center">
+      <div class="uk-alert-primary" uk-alert>
+        <a class="uk-alert-close" uk-close></a>
+        <p style="padding-right: 10px">Watch the <a href="https://keenfootwear.wistia.com/medias/ld970jfi9o" target="_blank">tutorial video on creating catalogs!</a></p>
+      </div>
+    </div>
+
    <div class="uk-grid-match uk-child-width-expand" uk-grid>

      <div>
@ -23,10 +30,10 @@
        <div class="uk-card uk-card-default uk-card-body">
          <h3 class="uk-card-title">{% trans "Images" %}</h3>
          <ul class="uk-list">
-            <li><a href="http://keen.apparentinc.com/tools/downloader/">{% trans "Image downloader" %}</a></li>
+            <li><a href="http://a.keen.procatalog.io/tools/downloader/">{% trans "Image downloader" %}</a></li>
            {% if not user|is_lazy_user %}
-            <li><a href="http://keen.apparentinc.com/images/upload">{% trans "Image uploader" %}</a></li>
-            <li><a href="http://keen.apparentinc.com/images/">{% trans "Image manager" %}</a></li>
+            <li><a href="http://a.keen.procatalog.io/images/upload">{% trans "Image uploader" %}</a></li>
+            <li><a href="http://a.keen.procatalog.io/tools/status">{% trans "Image status" %}</a></li>
            {% endif %}
          </ul>
        </div>
@ -37,7 +44,7 @@
        <div class="uk-card uk-card-default uk-card-body">
          <h3 class="uk-card-title">{% trans "Tools" %}</h3>
          <ul class="uk-list">
-            <li><a href="http://keen.apparentinc.com/tools/regionizer/">{% trans "Region editor" %}</a></li>
+            <li><a href="http://a.keen.procatalog.io/tools/regionizer/">{% trans "Region editor" %}</a></li>
          </ul>
        </div>
      </div>
--- a/templates/nav.html
+++ b/templates/nav.html
@ -30,10 +30,10 @@
        <a>{% trans "Images" %}</a>
        <div class="uk-navbar-dropdown">
          <ul class="uk-nav uk-navbar-dropdown-nav">
-            <li><a href="http://keen.apparentinc.com/tools/downloader/">Image downloader</a></li>
+            <li><a href="http://a.keen.procatalog.io/tools/downloader/">Image downloader</a></li>
            {% if not user|is_lazy_user %}
-            <li><a href="http://keen.apparentinc.com/images/upload">Image uploader</a></li>
-            <li><a href="http://keen.apparentinc.com/images/">Image manager</a></li>
+            <li><a href="http://a.keen.procatalog.io/images/upload">Image uploader</a></li>
+            <li><a href="http://a.keen.procatalog.io/tools/status">Image status</a></li>
            {% endif %}
          </ul>
        </div>
@ -44,7 +44,7 @@
        <a>{% trans "Tools" %}</a>
        <div class="uk-navbar-dropdown">
          <ul class="uk-nav uk-navbar-dropdown-nav">
-            <li><a href="http://keen.apparentinc.com/tools/regionizer/">Region editor</a></li>
+            <li><a href="http://a.keen.procatalog.io/tools/regionizer/">Region editor</a></li>
          </ul>
        </div>
      </li>
@ -63,6 +63,7 @@
        <div class="uk-navbar-dropdown">
          <ul class="uk-nav uk-navbar-dropdown-nav">
            <li><a href="mailto:support@procatalog.io?Subject=Keen%20ProCatalog%20support%20request">Email support</a></li>
+            <li><a href="https://keenfootwear.wistia.com/medias/ld970jfi9o" target="_blank">Catalog creation tutorial</a></li>
          </ul>
        </div>
      </li>