Total Complexity | 3 |
Total Lines | 19 |
Duplicated Lines | 0 % |
Changes | 2 | ||
Bugs | 0 | Features | 0 |
1 | """Backend that returns most similar subjects based on similarity in sparse |
||
25 | class TFIDFBackend(backend.AnnifBackend): |
||
1 ignored issue
–
show
|
|||
26 | name = "tfidf" |
||
27 | |||
28 | def _atomic_save(self, obj, dirname, filename): |
||
29 | tempfd, tempfilename = tempfile.mkstemp(prefix=filename, dir=dirname) |
||
30 | os.close(tempfd) |
||
31 | obj.save(tempfilename) |
||
32 | os.rename(tempfilename, os.path.join(dirname, filename)) |
||
33 | |||
34 | def load_subjects(self, subjects, analyzer): |
||
35 | corpus = subjects.tokens(analyzer) |
||
36 | dictionary = gensim.corpora.Dictionary(corpus) |
||
37 | self._atomic_save(dictionary, self._get_datadir(), 'dictionary') |
||
38 | veccorpus = VectorCorpus(corpus, dictionary) |
||
39 | tfidf = gensim.models.TfidfModel(veccorpus) |
||
40 | self._atomic_save(tfidf, self._get_datadir(), 'tfidf') |
||
41 | |||
42 | def analyze(self, text): |
||
43 | return [] # TODO |
||
44 |