markup: improve path routines

This commit is contained in:
2020-03-11 18:09:26 -07:00
parent 4c9932a2db
commit 5349455e61
5 changed files with 45 additions and 32 deletions

View File

@ -11,7 +11,7 @@ from pathlib import Path
from pdfminer.psparser import LIT from pdfminer.psparser import LIT
from .utils import cv2_rect, ensure_dir, set_file_perms, WORKDIR from .utils import cv2_rect, ensure_dir, set_file_perms
# https://www.pyimagesearch.com/2014/10/20/finding-shapes-images-using-python-opencv/ # https://www.pyimagesearch.com/2014/10/20/finding-shapes-images-using-python-opencv/

View File

@ -4,7 +4,7 @@ from itertools import zip_longest
from openpyxl import Workbook from openpyxl import Workbook
from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font from openpyxl.styles import PatternFill, Border, Side, Alignment, Protection, Font
from .utils import ensure_dir, set_file_perms, WORKDIR from .utils import ensure_dir, set_file_perms
def format_season(s): def format_season(s):
@ -19,7 +19,7 @@ def format_name(name, gender):
return '{}-{}'.format(name, gender[:1]) return '{}-{}'.format(name, gender[:1])
def write_spreadsheet(matches, workdir, file_base): def write_spreadsheet(matches, xls_path):
if not matches: if not matches:
print('write_spreadsheet: no matches. skipping.') print('write_spreadsheet: no matches. skipping.')
return None return None
@ -78,9 +78,5 @@ def write_spreadsheet(matches, workdir, file_base):
cell.border = border cell.border = border
# save # save
ensure_dir(workdir) wb.save(xls_path)
path = os.path.join(workdir, f"{file_base}.xlsx") set_file_perms(xls_path)
wb.save(path)
set_file_perms(path)
return path

View File

@ -24,7 +24,7 @@ django.setup()
from django.contrib.auth.models import User from django.contrib.auth.models import User
from .utils import clean_path, ensure_dir, set_file_perms, WORKDIR from .utils import clean_path, ensure_dir, set_file_perms, MARKUP_WORK_DIR
from .email import reply, reply_missing, reply_no_matches, send_error_email from .email import reply, reply_missing, reply_no_matches, send_error_email
from .matching import find_marked_products from .matching import find_marked_products
from .spreadsheet import write_spreadsheet from .spreadsheet import write_spreadsheet
@ -47,6 +47,25 @@ def on_fail_handler(self, exc, task_id, args, kwargs, einfo):
# raise KeyError() # raise KeyError()
def workdir(username, pdf_path):
stem = clean_path(Path(pdf_path).stem)
return os.path.join(MARKUP_WORK_DIR, clean_path(username), stem)
def dest_spreadsheet_path(dest_pdf_path):
path = Path(dest_pdf_path)
return os.path.join(path.parent, f"{path.stem}.xlsx")
def work_pdf_path(workdir, pdf_path):
return os.path.join(workdir, Path(pdf_path).name)
def work_spreadsheet_path(username, pdf_path):
stem = Path(pdf_path).stem
return os.path.join(workdir(username, pdf_path), f"{stem}.xlsx")
@shared_task(on_failure=on_fail_handler) @shared_task(on_failure=on_fail_handler)
def process_markup_pdf(pdf_path, username): def process_markup_pdf(pdf_path, username):
if not Path(pdf_path).is_file(): if not Path(pdf_path).is_file():
@ -55,22 +74,21 @@ def process_markup_pdf(pdf_path, username):
user = User.objects.get(username=username) user = User.objects.get(username=username)
pdf_stem = Path(pdf_path).stem work_dir = workdir(user.username, pdf_path)
workdir = os.path.join(WORKDIR, clean_path(user.username), clean_path(pdf_stem)) ensure_dir(work_dir)
ensure_dir(workdir)
work_pdf = work_pdf_path(work_dir, pdf_path)
pdf_name = Path(pdf_path).name
dest_path = os.path.join(workdir, pdf_name)
print(f'copying pdf from {pdf_path}') print(f'copying pdf from {pdf_path}')
print(f'copying pdf to {dest_path}') print(f'copying pdf to {work_pdf}')
shutil.copy(pdf_path, dest_path) shutil.copy(pdf_path, work_pdf)
set_file_perms(dest_path) set_file_perms(work_pdf)
frm = str(make_header(decode_header(f'{user.get_full_name()} <{user.email}>'))) frm = str(make_header(decode_header(f'{user.get_full_name()} <{user.email}>')))
subject = str(make_header(decode_header(pdf_stem))) subject = str(make_header(decode_header(Path(pdf_path).stem)))
# find matches # find matches
matches = find_marked_products(dest_path, workdir, debug=0) matches = find_marked_products(work_pdf, work_dir, debug=0)
if not matches: if not matches:
print('no product matches') print('no product matches')
# reply_no_matches(frm, subject) # reply_no_matches(frm, subject)
@ -79,21 +97,20 @@ def process_markup_pdf(pdf_path, username):
print(f'{len(matches)} product matches') print(f'{len(matches)} product matches')
# write spreadsheet # write spreadsheet
xls_path = write_spreadsheet(matches, workdir, pdf_stem) work_xls_path = work_spreadsheet_path(username, pdf_path)
write_spreadsheet(matches, work_xls_path)
if not xls_path: if not work_xls_path:
# TODO send error # TODO send error
print(f'error creating spreadsheet') print(f'error creating spreadsheet')
return return
webdav_dir = Path(pdf_path).parent dest_xls_path = dest_spreadsheet_path(pdf_path)
xls_name = Path(xls_path).name
xls_webdav_path = os.path.join(webdav_dir, xls_name)
print(f'wrote spreadsheet: {xls_path}') print(f'wrote spreadsheet: {work_xls_path}')
print(f'copying xls to {xls_webdav_path}') print(f'copying xls to {dest_xls_path}')
shutil.copy(xls_path, xls_webdav_path) shutil.copy(work_xls_path, dest_xls_path)
set_file_perms(xls_webdav_path) set_file_perms(dest_xls_path)
reply(frm, subject, xls_webdav_path, pdf_path) reply(frm, subject, dest_xls_path, pdf_path)

View File

@ -4,7 +4,7 @@ import shutil
from django.conf import settings from django.conf import settings
WORKDIR = os.path.join(settings.ASSET_DIR, 'markup', 'work') MARKUP_WORK_DIR = os.path.join(settings.ASSET_DIR, 'markup', 'work')
def pdf_rect(rect, container_height): def pdf_rect(rect, container_height):

View File

@ -27,7 +27,7 @@ from djangodav.views import DavView
from procat2.models import Catalog from procat2.models import Catalog
from procat2.settings import BASE_DIR, ASSET_DIR from procat2.settings import BASE_DIR, ASSET_DIR
from .utils import clean_path, ensure_dir, set_file_perms, WORKDIR from .utils import clean_path, ensure_dir, set_file_perms
from .tasks import process_markup_pdf from .tasks import process_markup_pdf
log = logging.getLogger(__name__) log = logging.getLogger(__name__)