Total Complexity | 4 |
Total Lines | 12 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
1 | """A directory of files as a subject corpus""" |
||
16 | class SubjectDirectory: |
||
1 ignored issue
–
show
|
|||
17 | def __init__(self, path): |
||
18 | self.path = path |
||
19 | |||
20 | def __iter__(self): |
||
21 | """Iterate through the directory, yielding Subject objects.""" |
||
22 | |||
23 | for filename in glob.glob(os.path.join(self.path, '*.txt')): |
||
24 | with open(filename) as subjfile: |
||
25 | uri, label = subjfile.readline().strip().split(' ', 1) |
||
26 | text = ' '.join(subjfile.readlines()) |
||
27 | yield Subject(uri, label, text) |
||
28 |