Conditions | 6 |
Total Lines | 27 |
Code Lines | 22 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
1 | """Support for document corpora in JSON format""" |
||
24 | def json_file_to_document( |
||
25 | filename: str, |
||
26 | subject_index: SubjectIndex, |
||
27 | language: str, |
||
28 | require_subjects: bool, |
||
29 | ) -> Document | None: |
||
30 | if os.path.getsize(filename) == 0: |
||
31 | logger.warning(f"Skipping empty file {filename}") |
||
32 | return None |
||
33 | |||
34 | with open(filename) as jsonfile: |
||
35 | try: |
||
36 | data = json.load(jsonfile) |
||
37 | except json.JSONDecodeError as err: |
||
38 | logger.warning(f"JSON parsing failed for file {filename}: {err}") |
||
39 | return None |
||
40 | |||
41 | subject_set = _subjects_to_subject_set( |
||
42 | data.get("subjects", []), subject_index, language |
||
43 | ) |
||
44 | if require_subjects and not subject_set: |
||
45 | return None |
||
46 | |||
47 | return Document( |
||
48 | text=data.get("text", ""), |
||
49 | metadata=data.get("metadata", {}), |
||
50 | subject_set=subject_set, |
||
51 | ) |
||
52 |