markup: matching works
This commit is contained in:
43
markup/matching.py
Normal file
43
markup/matching.py
Normal file
@ -0,0 +1,43 @@
|
||||
from markup.img import find_shapes, write_debug_image
|
||||
from markup.utils import overlaps
|
||||
|
||||
|
||||
def find_scribbles_shapes(scribbles):
|
||||
for scribble in scribbles:
|
||||
imgw, imgh, shapes = find_shapes(scribble['image'])
|
||||
rects = [transform(scribble['rect'], imgw, imgh, s) for s in shapes]
|
||||
scribble['bboxes'] = rects
|
||||
|
||||
|
||||
def transform(pdf_rect, imgw, imgh, shape):
|
||||
"""Convert scribble from image coords to pdf coords"""
|
||||
# get scale factor for image coords
|
||||
# to convert to pdf coordinates
|
||||
pdfw = pdf_rect.right - pdf_rect.left
|
||||
pdfh = pdf_rect.bottom - pdf_rect.top
|
||||
scalew = pdfw / imgw
|
||||
scaleh = pdfh / imgh
|
||||
return shape.scale(scalew, scaleh).translate(pdf_rect.left, pdf_rect.top)
|
||||
|
||||
|
||||
def find_matches(all_prods, scribbles, overlap_threshold):
|
||||
# segment by page
|
||||
page_prods = {}
|
||||
for p in all_prods:
|
||||
pagenum = p['page']
|
||||
if pagenum in page_prods:
|
||||
page_prods[pagenum].append(p)
|
||||
else:
|
||||
page_prods[pagenum] = [p]
|
||||
|
||||
matches = []
|
||||
for s in scribbles:
|
||||
pagenum = s['page']
|
||||
prods = page_prods[pagenum]
|
||||
for p in prods:
|
||||
for box in s['bboxes']:
|
||||
if overlaps(p['rect'], box, overlap_threshold):
|
||||
p['matched'] = s
|
||||
matches.append(p)
|
||||
|
||||
return matches
|
||||
Reference in New Issue
Block a user