| Total Complexity | 5 |
| Total Lines | 26 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
| 1 | """Class for combining multiple corpora so they behave like a single corpus""" |
||
| 2 | |||
| 3 | import itertools |
||
| 4 | from .types import DocumentCorpus |
||
| 5 | |||
| 6 | |||
| 7 | class CombinedCorpus(DocumentCorpus): |
||
| 8 | """Class for combining multiple corpora so they behave like a single |
||
| 9 | corpus""" |
||
| 10 | |||
| 11 | def __init__(self, corpora): |
||
| 12 | self._corpora = corpora |
||
| 13 | |||
| 14 | @property |
||
| 15 | def documents(self): |
||
| 16 | return itertools.chain.from_iterable( |
||
| 17 | [corpus.documents for corpus in self._corpora]) |
||
| 18 | |||
| 19 | def set_subject_index(self, subject_index): |
||
| 20 | """Set a subject index for looking up labels that are necessary for |
||
| 21 | conversion""" |
||
| 22 | |||
| 23 | for corpus in self._corpora: |
||
| 24 | if hasattr(corpus, 'set_subject_index'): |
||
| 25 | corpus.set_subject_index(subject_index) |
||
| 26 |