from .img import find_shapes, write_debug_image from .pdf import parse_pdf from .utils import overlaps def find_marked_products(pdf, workdir, debug=0): """Main entry point. Give a pdf, get matches.""" (prods, scribbles) = parse_pdf(pdf, workdir, debug) if not prods or len(prods) < 1: print('no product placement markers found') return None find_scribbles_shapes(scribbles) matches = find_matches(prods, scribbles, 0.10) for s in scribbles: write_debug_image(workdir, s['page'], prods, scribbles) return matches def find_scribbles_shapes(scribbles): for scribble in scribbles: imgw, imgh, shapes = find_shapes(scribble['image']) rects = [transform(scribble['rect'], imgw, imgh, s) for s in shapes] scribble['bboxes'] = rects def transform(pdf_rect, imgw, imgh, shape): """Convert scribble from image coords to pdf coords""" # get scale factor for image coords # to convert to pdf coordinates pdfw = pdf_rect.right - pdf_rect.left pdfh = pdf_rect.bottom - pdf_rect.top scalew = pdfw / imgw scaleh = pdfh / imgh return shape.scale(scalew, scaleh).translate(pdf_rect.left, pdf_rect.top) def find_matches(all_prods, scribbles, overlap_threshold): # segment by page page_prods = {} for p in all_prods: pagenum = p['page'] if pagenum in page_prods: page_prods[pagenum].append(p) else: page_prods[pagenum] = [p] matches = [] for s in scribbles: pagenum = s['page'] if not pagenum in page_prods: continue prods = page_prods[pagenum] for p in prods: for box in s['bboxes']: if overlaps(p['rect'], box, overlap_threshold): p['matched'] = s matches.append(p) return matches