diff --git a/markup/img.py b/markup/img.py index 1ae5cbc..dfcf031 100644 --- a/markup/img.py +++ b/markup/img.py @@ -76,11 +76,10 @@ def find_shapes(image_path): return img.width, img.height, bboxes -def write_debug_image(subdir, name, page_num, prods, scribbles): +def write_debug_image(workdir, page_num, prods, scribbles): """Draw an image with boxes for products, images, and shapes.""" - dir = os.path.join(WORKDIR, subdir, name) - ensure_dir(dir) - path = os.path.join(dir, f"debug-page{page_num:03d}.png") + ensure_dir(workdir) + path = os.path.join(workdir, f"debug-page{page_num:03d}.png") pagew = int(11*72) pageh = int(8.5*72) diff --git a/markup/matching.py b/markup/matching.py index b5269de..3f558eb 100644 --- a/markup/matching.py +++ b/markup/matching.py @@ -3,9 +3,9 @@ from .pdf import parse_pdf from .utils import overlaps -def find_marked_products(pdf, subdir, catname, debug=0): +def find_marked_products(pdf, workdir, debug=0): """Main entry point. Give a pdf, get matches.""" - (prods, scribbles) = parse_pdf(pdf, subdir, catname, debug) + (prods, scribbles) = parse_pdf(pdf, workdir, debug) if not prods or len(prods) < 1: print('no product placement markers found') @@ -15,7 +15,7 @@ def find_marked_products(pdf, subdir, catname, debug=0): matches = find_matches(prods, scribbles, 0.10) for s in scribbles: - write_debug_image(subdir, catname, s['page'], prods, scribbles) + write_debug_image(workdir, s['page'], prods, scribbles) return matches diff --git a/markup/pdf.py b/markup/pdf.py index 703b050..c9c62be 100644 --- a/markup/pdf.py +++ b/markup/pdf.py @@ -7,7 +7,7 @@ from pdfminer.pdfparser import PDFParser from pdfminer.pdfdocument import PDFDocument from pdfminer.pdftypes import PDFObjRef, resolve1 -from .utils import pdf_rect, ensure_dir, set_file_perms, WORKDIR +from .utils import pdf_rect, ensure_dir, set_file_perms def make_product_box(obj, pagenum, mediabox): @@ -33,7 +33,7 @@ def make_product_box(obj, pagenum, mediabox): return None -def make_scribble(obj, pagenum, mediabox, subdir, name): +def make_scribble(obj, pagenum, mediabox, workdir): rect = obj['Rect'] # position on page # walk the object tree down to the image @@ -50,7 +50,7 @@ def make_scribble(obj, pagenum, mediabox, subdir, name): flter = im1['Filter'] if flter.name == 'JPXDecode': - path = export_jp2(im1, subdir, name, pagenum) + path = export_jp2(im1, workdir, pagenum) return { 'page': pagenum, 'rect': pdf_rect(rect, mediabox[3]), 'objid': im1.objid, @@ -60,12 +60,11 @@ def make_scribble(obj, pagenum, mediabox, subdir, name): return None -def export_jp2(obj, subdir, name, pagenum): +def export_jp2(obj, workdir, pagenum): oid = obj.objid - dir = os.path.join(WORKDIR, subdir, name) - ensure_dir(dir) - jp2_path = os.path.join(dir, f"export-page{pagenum:03d}-obj{oid:05d}.jp2") - png_path = os.path.join(dir, f"export-page{pagenum:03d}-obj{oid:05d}.png") + ensure_dir(workdir) + jp2_path = os.path.join(workdir, f"export-page{pagenum:03d}-obj{oid:05d}.jp2") + png_path = os.path.join(workdir, f"export-page{pagenum:03d}-obj{oid:05d}.png") data = obj.get_rawdata() print('extracting jp2: {}'.format(jp2_path)) @@ -82,7 +81,7 @@ def export_jp2(obj, subdir, name, pagenum): return png_path -def parse_pdf(fname, subdir, name, debug=0): +def parse_pdf(fname, workdir, debug=0): PDFDocument.debug = debug PDFParser.debug = debug @@ -113,7 +112,7 @@ def parse_pdf(fname, subdir, name, debug=0): for anno in annots: anno = resolve1(anno) if 'AAPL:AKExtras' in anno: - scribbles.append(make_scribble(anno, pagenum, mediabox, subdir, name)) + scribbles.append(make_scribble(anno, pagenum, mediabox, workdir)) elif 'ProCatName' in anno: prod_boxes.append(make_product_box(anno, pagenum, mediabox)) else: diff --git a/markup/spreadsheet.py b/markup/spreadsheet.py index b24f66f..aa56e7c 100644 --- a/markup/spreadsheet.py +++ b/markup/spreadsheet.py @@ -15,16 +15,19 @@ def format_season(s): return s[:1] + s[2:] -def write_spreadsheet(matches, subdir, catname, filename): +def write_spreadsheet(matches, workdir, file_base): if not matches: print('write_spreadsheet: no matches. skipping.') - return + return None wb = Workbook() ws = wb.active + # header row ws.append(['Season', 'Gender', 'Name', 'Style Number', 'Color']) + # TODO: uniquify and sort matches + for m in matches: # in the case of kids, # we might have multiple products in a match @@ -37,14 +40,10 @@ def write_spreadsheet(matches, subdir, catname, filename): for s, g, n, m, c in zip_longest(seasons, genders, names, materials, colors, fillvalue=''): ws.append([format_season(s), g, n, m, c]) - # # Python types will automatically be converted - # import datetime - # ws['A2'] = datetime.datetime.now() - # ws['A2'].style = 'Good' - # save - dir = os.path.join(WORKDIR, subdir, catname) - ensure_dir(dir) - path = os.path.join(dir, f"{filename}.xlsx") + ensure_dir(workdir) + path = os.path.join(workdir, f"{file_base}.xlsx") wb.save(path) set_file_perms(path) + + return path diff --git a/markup/work/test_all.py b/markup/work/test_all.py index 067e878..1ce1c53 100755 --- a/markup/work/test_all.py +++ b/markup/work/test_all.py @@ -19,6 +19,7 @@ from django.conf import settings os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'procat2.settings') django.setup() +from markup.utils import WORKDIR, clean_path from markup.matching import find_marked_products from markup.spreadsheet import write_spreadsheet @@ -40,12 +41,11 @@ def main(argv): fname = args[0] path = Path(fname) - catname = path.stem - catname = re.sub(r'[^\w]', '_', catname) + workdir = os.path.join(WORKDIR, 'test', clean_path(path.stem)) - matches = find_marked_products(fname, subdir, catname, debug=0) + matches = find_marked_products(fname, workdir, debug) print(f'{len(matches)} product matches') - write_spreadsheet(matches, subdir, catname, path.stem) + write_spreadsheet(matches, workdir, path.stem) if __name__ == '__main__': sys.exit(main(sys.argv))