|
1
|
|
|
from fastapi import FastAPI, HTTPException, status, BackgroundTasks |
|
2
|
|
|
from ocrd_validators import ParameterValidator |
|
3
|
|
|
from .database import ( |
|
4
|
|
|
db_get_processing_job, |
|
5
|
|
|
db_get_workspace, |
|
6
|
|
|
) |
|
7
|
|
|
from .models import PYJobInput, PYJobOutput |
|
8
|
|
|
|
|
9
|
|
|
|
|
10
|
|
|
async def _get_processor_job(logger, processor_name: str, job_id: str) -> PYJobOutput: |
|
11
|
|
|
""" Return processing job-information from the database |
|
12
|
|
|
""" |
|
13
|
|
|
try: |
|
14
|
|
|
job = await db_get_processing_job(job_id) |
|
15
|
|
|
return job.to_job_output() |
|
16
|
|
|
except ValueError as e: |
|
17
|
|
|
logger.exception(f"Processing job with id '{job_id}' of processor type " |
|
18
|
|
|
f"'{processor_name}' not existing, error: {e}") |
|
19
|
|
|
raise HTTPException( |
|
20
|
|
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, |
|
21
|
|
|
detail=f"Processing job with id '{job_id}' of processor type '{processor_name}' not existing" |
|
22
|
|
|
) |
|
23
|
|
|
|
|
24
|
|
|
|
|
25
|
|
|
async def validate_and_resolve_mets_path(logger, job_input: PYJobInput, resolve: bool = False) -> PYJobInput: |
|
26
|
|
|
# This check is done to return early in case the workspace_id is provided |
|
27
|
|
|
# but the abs mets path cannot be queried from the DB |
|
28
|
|
|
if not job_input.path_to_mets and job_input.workspace_id: |
|
29
|
|
|
try: |
|
30
|
|
|
db_workspace = await db_get_workspace(job_input.workspace_id) |
|
31
|
|
|
if resolve: |
|
32
|
|
|
job_input.path_to_mets = db_workspace.workspace_mets_path |
|
33
|
|
|
except ValueError as e: |
|
34
|
|
|
logger.exception(f"Workspace with id '{job_input.workspace_id}' not existing: {e}") |
|
35
|
|
|
raise HTTPException( |
|
36
|
|
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, |
|
37
|
|
|
detail=f"Workspace with id '{job_input.workspace_id}' not existing" |
|
38
|
|
|
) |
|
39
|
|
|
return job_input |
|
40
|
|
|
|
|
41
|
|
|
|
|
42
|
|
|
def validate_job_input(logger, processor_name: str, ocrd_tool: dict, job_input: PYJobInput) -> None: |
|
43
|
|
|
if bool(job_input.path_to_mets) == bool(job_input.workspace_id): |
|
44
|
|
|
logger.exception("Either 'path' or 'workspace_id' must be provided, but not both") |
|
45
|
|
|
raise HTTPException( |
|
46
|
|
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, |
|
47
|
|
|
detail="Either 'path' or 'workspace_id' must be provided, but not both" |
|
48
|
|
|
) |
|
49
|
|
|
if not ocrd_tool: |
|
50
|
|
|
logger.exception(f"Processor '{processor_name}' not available. Empty or missing ocrd_tool") |
|
51
|
|
|
raise HTTPException( |
|
52
|
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, |
|
53
|
|
|
detail=f"Processor '{processor_name}' not available. Empty or missing ocrd_tool" |
|
54
|
|
|
) |
|
55
|
|
|
try: |
|
56
|
|
|
report = ParameterValidator(ocrd_tool).validate(dict(job_input.parameters)) |
|
57
|
|
|
except Exception as e: |
|
58
|
|
|
logger.exception(f'Failed to validate processing job against the ocrd_tool: {e}') |
|
59
|
|
|
raise HTTPException( |
|
60
|
|
|
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY, |
|
61
|
|
|
detail=f'Failed to validate processing job against the ocrd_tool' |
|
62
|
|
|
) |
|
63
|
|
|
else: |
|
64
|
|
|
if not report.is_valid: |
|
65
|
|
|
logger.exception(f'Failed to validate processing job ' |
|
66
|
|
|
f'against the ocrd_tool, errors: {report.errors}') |
|
67
|
|
|
raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=report.errors) |
|
68
|
|
|
|