| Total Complexity | 4 |
| Total Lines | 12 |
| Duplicated Lines | 0 % |
| Changes | 1 | ||
| Bugs | 0 | Features | 0 |
| 1 | """A directory of files as a subject corpus""" |
||
| 16 | class SubjectDirectory: |
||
|
1 ignored issue
–
show
|
|||
| 17 | def __init__(self, path): |
||
| 18 | self.path = path |
||
| 19 | |||
| 20 | def __iter__(self): |
||
| 21 | """Iterate through the directory, yielding Subject objects.""" |
||
| 22 | |||
| 23 | for filename in glob.glob(os.path.join(self.path, '*.txt')): |
||
| 24 | with open(filename) as subjfile: |
||
| 25 | uri, label = subjfile.readline().strip().split(' ', 1) |
||
| 26 | text = ' '.join(subjfile.readlines()) |
||
| 27 | yield Subject(uri, label, text) |
||
| 28 |