markup: matching works

This commit is contained in:
2019-10-18 13:11:53 -07:00
parent be2902ca24
commit 5282f7cb2f
4 changed files with 169 additions and 32 deletions

43
markup/matching.py Normal file
View File

@ -0,0 +1,43 @@
from markup.img import find_shapes, write_debug_image
from markup.utils import overlaps
def find_scribbles_shapes(scribbles):
for scribble in scribbles:
imgw, imgh, shapes = find_shapes(scribble['image'])
rects = [transform(scribble['rect'], imgw, imgh, s) for s in shapes]
scribble['bboxes'] = rects
def transform(pdf_rect, imgw, imgh, shape):
"""Convert scribble from image coords to pdf coords"""
# get scale factor for image coords
# to convert to pdf coordinates
pdfw = pdf_rect.right - pdf_rect.left
pdfh = pdf_rect.bottom - pdf_rect.top
scalew = pdfw / imgw
scaleh = pdfh / imgh
return shape.scale(scalew, scaleh).translate(pdf_rect.left, pdf_rect.top)
def find_matches(all_prods, scribbles, overlap_threshold):
# segment by page
page_prods = {}
for p in all_prods:
pagenum = p['page']
if pagenum in page_prods:
page_prods[pagenum].append(p)
else:
page_prods[pagenum] = [p]
matches = []
for s in scribbles:
pagenum = s['page']
prods = page_prods[pagenum]
for p in prods:
for box in s['bboxes']:
if overlaps(p['rect'], box, overlap_threshold):
p['matched'] = s
matches.append(p)
return matches