63 lines
1.8 KiB
Python
63 lines
1.8 KiB
Python
from .img import find_shapes, write_debug_image
|
|
from .pdf import parse_pdf
|
|
from .utils import overlaps
|
|
|
|
|
|
def find_marked_products(pdf, workdir, debug=0):
|
|
"""Main entry point. Give a pdf, get matches."""
|
|
(prods, scribbles) = parse_pdf(pdf, workdir, debug)
|
|
|
|
if not prods or len(prods) < 1:
|
|
print('no product placement markers found')
|
|
return None
|
|
|
|
find_scribbles_shapes(scribbles)
|
|
matches = find_matches(prods, scribbles, 0.10)
|
|
|
|
for s in scribbles:
|
|
write_debug_image(workdir, s['page'], prods, scribbles)
|
|
|
|
return matches
|
|
|
|
|
|
def find_scribbles_shapes(scribbles):
|
|
for scribble in scribbles:
|
|
imgw, imgh, shapes = find_shapes(scribble['image'])
|
|
rects = [transform(scribble['rect'], imgw, imgh, s) for s in shapes]
|
|
scribble['bboxes'] = rects
|
|
|
|
|
|
def transform(pdf_rect, imgw, imgh, shape):
|
|
"""Convert scribble from image coords to pdf coords"""
|
|
# get scale factor for image coords
|
|
# to convert to pdf coordinates
|
|
pdfw = pdf_rect.right - pdf_rect.left
|
|
pdfh = pdf_rect.bottom - pdf_rect.top
|
|
scalew = pdfw / imgw
|
|
scaleh = pdfh / imgh
|
|
return shape.scale(scalew, scaleh).translate(pdf_rect.left, pdf_rect.top)
|
|
|
|
|
|
def find_matches(all_prods, scribbles, overlap_threshold):
|
|
# segment by page
|
|
page_prods = {}
|
|
for p in all_prods:
|
|
pagenum = p['page']
|
|
if pagenum in page_prods:
|
|
page_prods[pagenum].append(p)
|
|
else:
|
|
page_prods[pagenum] = [p]
|
|
|
|
matches = []
|
|
for s in scribbles:
|
|
pagenum = s['page']
|
|
if not pagenum in page_prods: continue
|
|
prods = page_prods[pagenum]
|
|
for p in prods:
|
|
for box in s['bboxes']:
|
|
if overlaps(p['rect'], box, overlap_threshold):
|
|
p['matched'] = s
|
|
matches.append(p)
|
|
|
|
return matches
|