annif.lexical.mllm.MLLMModel._get_label_props() - Code Metrics - Inspection of "Make vocabularies multilingual" - NatLibFi/Annif - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — master (#600)

by Osma

created 2022-08-04 07:41 UTC

annif.lexical.mllm.MLLMModel._get_label_props() A

↳ Parent: annif.lexical.mllm

Complexity

Conditions

Size

Total Lines	10
Code Lines	7

Duplication

Lines	10
Ratio	100 %

Importance

Changes

Metric	Value
cc	2
eloc	7
nop	1
dl	10
loc	10
rs	10
c	0
b	0
f	0

"""MLLM (Maui-like Lexical Matchin) model for Annif"""

import collections
import math
import joblib
from statistics import mean
from enum import IntEnum
import numpy as np
from rdflib.namespace import SKOS
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
import annif.util
import annif.parallel
from annif.exception import OperationFailedException
from annif.lexical.tokenset import TokenSet, TokenSetIndex
from annif.lexical.util import get_subject_labels
from annif.lexical.util import make_relation_matrix, make_collection_matrix


Term = collections.namedtuple('Term', 'subject_id label is_pref')

Match = collections.namedtuple(
    'Match', 'subject_id is_pref n_tokens pos ambiguity')

Candidate = collections.namedtuple(
    'Candidate',
    'doc_length subject_id freq is_pref n_tokens ambiguity ' +
    'first_occ last_occ spread')

ModelData = collections.namedtuple(
    'ModelData',
    'broader narrower related collection ' +
    'doc_freq subj_freq idf')

Feature = IntEnum(
    'Feature',
    'freq doc_freq subj_freq tfidf is_pref n_tokens ambiguity ' +
    'first_occ last_occ spread doc_length ' +
    'broader narrower related collection',
    start=0)


def conflate_matches(matches, doc_length):

    subj_matches = collections.defaultdict(list)
    for match in matches:
        subj_matches[match.subject_id].append(match)
    return [
        Candidate(
            doc_length=doc_length,
            subject_id=subject_id,
            freq=len(matches) / doc_length,
            is_pref=mean((float(m.is_pref) for m in matches)),
            n_tokens=mean((m.n_tokens for m in matches)),
            ambiguity=mean((m.ambiguity for m in matches)),
            first_occ=matches[0].pos / doc_length,
            last_occ=matches[-1].pos / doc_length,
            spread=(matches[-1].pos - matches[0].pos) / doc_length
        )
        for subject_id, matches in subj_matches.items()]


def generate_candidates(text, analyzer, vectorizer, index):

    sentences = analyzer.tokenize_sentences(text)
    sent_tokens = vectorizer.transform(sentences)
    matches = []

    for sent_idx, token_matrix in enumerate(sent_tokens):
        tset = TokenSet(token_matrix.nonzero()[1])
        for ts, ambiguity in index.search(tset):
            matches.append(Match(subject_id=ts.subject_id,
                                 is_pref=ts.is_pref,
                                 n_tokens=len(ts),
                                 pos=sent_idx,
                                 ambiguity=ambiguity))

    return conflate_matches(matches, len(sentences))


def candidates_to_features(candidates, mdata):

    """Convert a list of Candidates to a NumPy feature matrix"""

    matrix = np.zeros((len(candidates), len(Feature)), dtype=np.float32)
    c_ids = [c.subject_id for c in candidates]
    c_vec = np.zeros(mdata.related.shape[0], dtype=bool)
    c_vec[c_ids] = True
    broader = mdata.broader.multiply(c_vec).sum(axis=1)
    narrower = mdata.narrower.multiply(c_vec).sum(axis=1)
    related = mdata.related.multiply(c_vec).sum(axis=1)
    collection = mdata.collection.multiply(c_vec).T.dot(
        mdata.collection).sum(axis=0)
    for idx, c in enumerate(candidates):
        subj = c.subject_id
        matrix[idx, Feature.freq] = c.freq
        matrix[idx, Feature.doc_freq] = mdata.doc_freq[subj]
        matrix[idx, Feature.subj_freq] = mdata.subj_freq.get(subj, 1) - 1
        matrix[idx, Feature.tfidf] = c.freq * mdata.idf[subj]
        matrix[idx, Feature.is_pref] = c.is_pref
        matrix[idx, Feature.n_tokens] = c.n_tokens
        matrix[idx, Feature.ambiguity] = c.ambiguity
        matrix[idx, Feature.first_occ] = c.first_occ
        matrix[idx, Feature.last_occ] = c.last_occ
        matrix[idx, Feature.spread] = c.spread
        matrix[idx, Feature.doc_length] = c.doc_length
        matrix[idx, Feature.broader] = broader[subj, 0] / len(c_ids)
        matrix[idx, Feature.narrower] = narrower[subj, 0] / len(c_ids)
        matrix[idx, Feature.related] = related[subj, 0] / len(c_ids)
        matrix[idx, Feature.collection] = collection[0, subj] / len(c_ids)
    return matrix


class MLLMCandidateGenerator(annif.parallel.BaseWorker):

    @classmethod
    def generate_candidates(cls, doc_subject_ids, text):
        candidates = generate_candidates(text, **cls.args)  # pragma: no cover
        return doc_subject_ids, candidates  # pragma: no cover


class MLLMFeatureConverter(annif.parallel.BaseWorker):

    @classmethod
    def candidates_to_features(cls, candidates):
        return candidates_to_features(candidates,
                                      **cls.args)  # pragma: no cover


class MLLMModel:

    """Maui-like Lexical Matching model"""

    def generate_candidates(self, text, analyzer):
        return generate_candidates(text, analyzer,
                                   self._vectorizer, self._index)

    @property
    def _model_data(self):
        return ModelData(broader=self._broader_matrix,
                         narrower=self._narrower_matrix,
                         related=self._related_matrix,
                         collection=self._collection_matrix,
                         doc_freq=self._doc_freq,
                         subj_freq=self._subj_freq,
                         idf=self._idf)

    def _candidates_to_features(self, candidates):
        return candidates_to_features(candidates, self._model_data)

    @staticmethod
    def _get_label_props(params):
        pref_label_props = [SKOS.prefLabel]

        if annif.util.boolean(params['use_hidden_labels']):
            nonpref_label_props = [SKOS.altLabel, SKOS.hiddenLabel]
        else:
            nonpref_label_props = [SKOS.altLabel]

        return (pref_label_props, nonpref_label_props)

    def _prepare_terms(self, graph, vocab, params):
        pref_label_props, nonpref_label_props = self._get_label_props(params)

        terms = []
        subject_ids = []
        for subj_id, uri, _, _ in vocab.subjects.active:
            subject_ids.append(subj_id)

            for label in get_subject_labels(graph, uri, pref_label_props,
                                            params['language']):
                terms.append(Term(subject_id=subj_id,
                                  label=label,
                                  is_pref=True))

            for label in get_subject_labels(graph, uri, nonpref_label_props,
                                            params['language']):
                terms.append(Term(subject_id=subj_id,
                                  label=label,
                                  is_pref=False))

        return (terms, subject_ids)

    def _prepare_relations(self, graph, vocab):
        self._broader_matrix = make_relation_matrix(
            graph, vocab, SKOS.broader)
        self._narrower_matrix = make_relation_matrix(
            graph, vocab, SKOS.narrower)
        self._related_matrix = make_relation_matrix(
            graph, vocab, SKOS.related)
        self._collection_matrix = make_collection_matrix(graph, vocab)

    def _prepare_train_index(self, vocab, analyzer, params):
        graph = vocab.as_graph()
        terms, subject_ids = self._prepare_terms(graph, vocab, params)
        self._prepare_relations(graph, vocab)

        self._vectorizer = CountVectorizer(
            binary=True,
            tokenizer=analyzer.tokenize_words
        )
        label_corpus = self._vectorizer.fit_transform((t.label for t in terms))

        # frequency of each token used in labels - how rare each word is
        token_freq = np.bincount(label_corpus.indices,
                                 minlength=label_corpus.shape[1])

        self._index = TokenSetIndex()
        for term, label_matrix in zip(terms, label_corpus):
            tokens = label_matrix.nonzero()[1]
            # sort tokens by frequency - use the rarest token as index key
            tokens = sorted(tokens, key=token_freq.__getitem__)
            tset = TokenSet(tokens, term.subject_id, term.is_pref)
            self._index.add(tset)

        return subject_ids

    def _prepare_train_data(self, corpus, vocab, analyzer, n_jobs):
        # frequency of subjects (by id) in the generated candidates
        self._doc_freq = collections.Counter()
        # frequency of manually assigned subjects ("domain keyphraseness")
        self._subj_freq = collections.Counter()
        train_x = []
        train_y = []

        jobs, pool_class = annif.parallel.get_pool(n_jobs)

        cg_args = {
            'analyzer': analyzer,
            'vectorizer': self._vectorizer,
            'index': self._index
        }

        with pool_class(jobs,
                        initializer=MLLMCandidateGenerator.init,
                        initargs=(cg_args,)) as pool:
            params = (([vocab.subjects.by_uri(uri) for uri in doc.uris],
                       doc.text)
                      for doc in corpus.documents)
            for doc_subject_ids, candidates in pool.starmap(
                    MLLMCandidateGenerator.generate_candidates, params, 10):

                self._subj_freq.update(doc_subject_ids)
                self._doc_freq.update([c.subject_id for c in candidates])
                train_x.append(candidates)
                train_y += [(c.subject_id in doc_subject_ids)
                            for c in candidates]

        return (train_x, train_y)

    def _calculate_idf(self, subject_ids, doc_count):
        idf = collections.defaultdict(float)
        for subj_id in subject_ids:
            idf[subj_id] = math.log((doc_count + 1) /
                                    (self._doc_freq[subj_id] + 1)) + 1

        return idf

    def _prepare_features(self, train_x, n_jobs):
        fc_args = {'mdata': self._model_data}
        jobs, pool_class = annif.parallel.get_pool(n_jobs)

        with pool_class(jobs,
                        initializer=MLLMFeatureConverter.init,
                        initargs=(fc_args,)) as pool:
            features = pool.map(
                MLLMFeatureConverter.candidates_to_features, train_x, 10)

        return features

    def prepare_train(self, corpus, vocab, analyzer, params, n_jobs):
        # create an index from the vocabulary terms
        subject_ids = self._prepare_train_index(vocab, analyzer, params)

        # convert the corpus into train data
        train_x, train_y = self._prepare_train_data(
            corpus, vocab, analyzer, n_jobs)

        # precalculate idf values for all candidate subjects
        self._idf = self._calculate_idf(subject_ids, len(train_x))

        # convert the train data into feature values
        features = self._prepare_features(train_x, n_jobs)

        return (np.vstack(features), np.array(train_y))

    def _create_classifier(self, params):
        return BaggingClassifier(
            DecisionTreeClassifier(
                min_samples_leaf=int(params['min_samples_leaf']),
                max_leaf_nodes=int(params['max_leaf_nodes'])
            ), max_samples=float(params['max_samples']))

    def train(self, train_x, train_y, params):
        # fit the model on the training corpus
        self._classifier = self._create_classifier(params)
        self._classifier.fit(train_x, train_y)
        # sanity check: verify that the classifier has seen both classes
        if self._classifier.n_classes_ != 2:
            raise OperationFailedException(
                "Unable to create classifier: " +
                "Not enough positive and negative examples " +
                "in the training data. Please check that your training " +
                "data matches your vocabulary.")

    def _prediction_to_list(self, scores, candidates):
        subj_scores = [(score[1], c.subject_id)
                       for score, c in zip(scores, candidates)]
        return sorted(subj_scores, reverse=True)

    def predict(self, candidates):
        if not candidates:
            return []
        features = self._candidates_to_features(candidates)
        scores = self._classifier.predict_proba(features)
        return self._prediction_to_list(scores, candidates)

    def save(self, filename):
        return joblib.dump(self, filename)

    @staticmethod
    def load(filename):
        return joblib.load(filename)


1		"""MLLM (Maui-like Lexical Matchin) model for Annif"""
2
3		import collections
4		import math
5		import joblib
6		from statistics import mean
7		from enum import IntEnum
8		import numpy as np
9		from rdflib.namespace import SKOS
10		from sklearn.feature_extraction.text import CountVectorizer
11		from sklearn.ensemble import BaggingClassifier
12		from sklearn.tree import DecisionTreeClassifier
13		import annif.util
14		import annif.parallel
15		from annif.exception import OperationFailedException
16		from annif.lexical.tokenset import TokenSet, TokenSetIndex
17		from annif.lexical.util import get_subject_labels
18		from annif.lexical.util import make_relation_matrix, make_collection_matrix
19
20
21		Term = collections.namedtuple('Term', 'subject_id label is_pref')
22
23		Match = collections.namedtuple(
24		'Match', 'subject_id is_pref n_tokens pos ambiguity')
25
26		Candidate = collections.namedtuple(
27		'Candidate',
28		'doc_length subject_id freq is_pref n_tokens ambiguity ' +
29		'first_occ last_occ spread')
30
31		ModelData = collections.namedtuple(
32		'ModelData',
33		'broader narrower related collection ' +
34		'doc_freq subj_freq idf')
35
36		Feature = IntEnum(
37		'Feature',
38		'freq doc_freq subj_freq tfidf is_pref n_tokens ambiguity ' +
39		'first_occ last_occ spread doc_length ' +
40		'broader narrower related collection',
41		start=0)
42
43
44	View Code Duplication	def conflate_matches(matches, doc_length):
		0 ignored issues – show Duplication introduced 2022-01-18 08:57 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
45		subj_matches = collections.defaultdict(list)
46		for match in matches:
47		subj_matches[match.subject_id].append(match)
48		return [
49		Candidate(
50		doc_length=doc_length,
51		subject_id=subject_id,
52		freq=len(matches) / doc_length,
53		is_pref=mean((float(m.is_pref) for m in matches)),
54		n_tokens=mean((m.n_tokens for m in matches)),
55		ambiguity=mean((m.ambiguity for m in matches)),
56		first_occ=matches[0].pos / doc_length,
57		last_occ=matches[-1].pos / doc_length,
58		spread=(matches[-1].pos - matches[0].pos) / doc_length
59		)
60		for subject_id, matches in subj_matches.items()]
61
62
63	View Code Duplication	def generate_candidates(text, analyzer, vectorizer, index):
		0 ignored issues – show Duplication introduced 2022-01-18 08:57 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
64		sentences = analyzer.tokenize_sentences(text)
65		sent_tokens = vectorizer.transform(sentences)
66		matches = []
67
68		for sent_idx, token_matrix in enumerate(sent_tokens):
69		tset = TokenSet(token_matrix.nonzero()[1])
70		for ts, ambiguity in index.search(tset):
71		matches.append(Match(subject_id=ts.subject_id,
72		is_pref=ts.is_pref,
73		n_tokens=len(ts),
74		pos=sent_idx,
75		ambiguity=ambiguity))
76
77		return conflate_matches(matches, len(sentences))
78
79
80	View Code Duplication	def candidates_to_features(candidates, mdata):
		0 ignored issues – show Duplication introduced 2022-01-18 08:57 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
81		"""Convert a list of Candidates to a NumPy feature matrix"""
82
83		matrix = np.zeros((len(candidates), len(Feature)), dtype=np.float32)
84		c_ids = [c.subject_id for c in candidates]
85		c_vec = np.zeros(mdata.related.shape[0], dtype=bool)
86		c_vec[c_ids] = True
87		broader = mdata.broader.multiply(c_vec).sum(axis=1)
88		narrower = mdata.narrower.multiply(c_vec).sum(axis=1)
89		related = mdata.related.multiply(c_vec).sum(axis=1)
90		collection = mdata.collection.multiply(c_vec).T.dot(
91		mdata.collection).sum(axis=0)
92		for idx, c in enumerate(candidates):
93		subj = c.subject_id
94		matrix[idx, Feature.freq] = c.freq
95		matrix[idx, Feature.doc_freq] = mdata.doc_freq[subj]
96		matrix[idx, Feature.subj_freq] = mdata.subj_freq.get(subj, 1) - 1
97		matrix[idx, Feature.tfidf] = c.freq * mdata.idf[subj]
98		matrix[idx, Feature.is_pref] = c.is_pref
99		matrix[idx, Feature.n_tokens] = c.n_tokens
100		matrix[idx, Feature.ambiguity] = c.ambiguity
101		matrix[idx, Feature.first_occ] = c.first_occ
102		matrix[idx, Feature.last_occ] = c.last_occ
103		matrix[idx, Feature.spread] = c.spread
104		matrix[idx, Feature.doc_length] = c.doc_length
105		matrix[idx, Feature.broader] = broader[subj, 0] / len(c_ids)
106		matrix[idx, Feature.narrower] = narrower[subj, 0] / len(c_ids)
107		matrix[idx, Feature.related] = related[subj, 0] / len(c_ids)
108		matrix[idx, Feature.collection] = collection[0, subj] / len(c_ids)
109		return matrix
110
111
112		class MLLMCandidateGenerator(annif.parallel.BaseWorker):
113
114		@classmethod
115		def generate_candidates(cls, doc_subject_ids, text):
116		candidates = generate_candidates(text, **cls.args) # pragma: no cover
117		return doc_subject_ids, candidates # pragma: no cover
118
119
120		class MLLMFeatureConverter(annif.parallel.BaseWorker):
121
122		@classmethod
123		def candidates_to_features(cls, candidates):
124		return candidates_to_features(candidates,
125		**cls.args) # pragma: no cover
126
127
128	View Code Duplication	class MLLMModel:
		0 ignored issues – show Duplication introduced 2022-01-18 08:57 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history...
129		"""Maui-like Lexical Matching model"""
130
131		def generate_candidates(self, text, analyzer):
132		return generate_candidates(text, analyzer,
133		self._vectorizer, self._index)
134
135		@property
136		def _model_data(self):
137		return ModelData(broader=self._broader_matrix,
138		narrower=self._narrower_matrix,
139		related=self._related_matrix,
140		collection=self._collection_matrix,
141		doc_freq=self._doc_freq,
142		subj_freq=self._subj_freq,
143		idf=self._idf)
144
145		def _candidates_to_features(self, candidates):
146		return candidates_to_features(candidates, self._model_data)
147
148		@staticmethod
149		def _get_label_props(params):
150		pref_label_props = [SKOS.prefLabel]
151
152		if annif.util.boolean(params['use_hidden_labels']):
153		nonpref_label_props = [SKOS.altLabel, SKOS.hiddenLabel]
154		else:
155		nonpref_label_props = [SKOS.altLabel]
156
157		return (pref_label_props, nonpref_label_props)
158
159		def _prepare_terms(self, graph, vocab, params):
160		pref_label_props, nonpref_label_props = self._get_label_props(params)
161
162		terms = []
163		subject_ids = []
164		for subj_id, uri, _, _ in vocab.subjects.active:
165		subject_ids.append(subj_id)
166
167		for label in get_subject_labels(graph, uri, pref_label_props,
168		params['language']):
169		terms.append(Term(subject_id=subj_id,
170		label=label,
171		is_pref=True))
172
173		for label in get_subject_labels(graph, uri, nonpref_label_props,
174		params['language']):
175		terms.append(Term(subject_id=subj_id,
176		label=label,
177		is_pref=False))
178
179		return (terms, subject_ids)
180
181		def _prepare_relations(self, graph, vocab):
182		self._broader_matrix = make_relation_matrix(
183		graph, vocab, SKOS.broader)
184		self._narrower_matrix = make_relation_matrix(
185		graph, vocab, SKOS.narrower)
186		self._related_matrix = make_relation_matrix(
187		graph, vocab, SKOS.related)
188		self._collection_matrix = make_collection_matrix(graph, vocab)
189
190		def _prepare_train_index(self, vocab, analyzer, params):
191		graph = vocab.as_graph()
192		terms, subject_ids = self._prepare_terms(graph, vocab, params)
193		self._prepare_relations(graph, vocab)
194
195		self._vectorizer = CountVectorizer(
196		binary=True,
197		tokenizer=analyzer.tokenize_words
198		)
199		label_corpus = self._vectorizer.fit_transform((t.label for t in terms))
200
201		# frequency of each token used in labels - how rare each word is
202		token_freq = np.bincount(label_corpus.indices,
203		minlength=label_corpus.shape[1])
204
205		self._index = TokenSetIndex()
206		for term, label_matrix in zip(terms, label_corpus):
207		tokens = label_matrix.nonzero()[1]
208		# sort tokens by frequency - use the rarest token as index key
209		tokens = sorted(tokens, key=token_freq.__getitem__)
210		tset = TokenSet(tokens, term.subject_id, term.is_pref)
211		self._index.add(tset)
212
213		return subject_ids
214
215		def _prepare_train_data(self, corpus, vocab, analyzer, n_jobs):
216		# frequency of subjects (by id) in the generated candidates
217		self._doc_freq = collections.Counter()
218		# frequency of manually assigned subjects ("domain keyphraseness")
219		self._subj_freq = collections.Counter()
220		train_x = []
221		train_y = []
222
223		jobs, pool_class = annif.parallel.get_pool(n_jobs)
224
225		cg_args = {
226		'analyzer': analyzer,
227		'vectorizer': self._vectorizer,
228		'index': self._index
229		}
230
231		with pool_class(jobs,
232		initializer=MLLMCandidateGenerator.init,
233		initargs=(cg_args,)) as pool:
234		params = (([vocab.subjects.by_uri(uri) for uri in doc.uris],
235		doc.text)
236		for doc in corpus.documents)
237		for doc_subject_ids, candidates in pool.starmap(
238		MLLMCandidateGenerator.generate_candidates, params, 10):
239
240		self._subj_freq.update(doc_subject_ids)
241		self._doc_freq.update([c.subject_id for c in candidates])
242		train_x.append(candidates)
243		train_y += [(c.subject_id in doc_subject_ids)
244		for c in candidates]
245
246		return (train_x, train_y)
247
248		def _calculate_idf(self, subject_ids, doc_count):
249		idf = collections.defaultdict(float)
250		for subj_id in subject_ids:
251		idf[subj_id] = math.log((doc_count + 1) /
252		(self._doc_freq[subj_id] + 1)) + 1
253
254		return idf
255
256		def _prepare_features(self, train_x, n_jobs):
257		fc_args = {'mdata': self._model_data}
258		jobs, pool_class = annif.parallel.get_pool(n_jobs)
259
260		with pool_class(jobs,
261		initializer=MLLMFeatureConverter.init,
262		initargs=(fc_args,)) as pool:
263		features = pool.map(
264		MLLMFeatureConverter.candidates_to_features, train_x, 10)
265
266		return features
267
268		def prepare_train(self, corpus, vocab, analyzer, params, n_jobs):
269		# create an index from the vocabulary terms
270		subject_ids = self._prepare_train_index(vocab, analyzer, params)
271
272		# convert the corpus into train data
273		train_x, train_y = self._prepare_train_data(
274		corpus, vocab, analyzer, n_jobs)
275
276		# precalculate idf values for all candidate subjects
277		self._idf = self._calculate_idf(subject_ids, len(train_x))
278
279		# convert the train data into feature values
280		features = self._prepare_features(train_x, n_jobs)
281
282		return (np.vstack(features), np.array(train_y))
283
284		def _create_classifier(self, params):
285		return BaggingClassifier(
286		DecisionTreeClassifier(
287		min_samples_leaf=int(params['min_samples_leaf']),
288		max_leaf_nodes=int(params['max_leaf_nodes'])
289		), max_samples=float(params['max_samples']))
290
291		def train(self, train_x, train_y, params):
292		# fit the model on the training corpus
293		self._classifier = self._create_classifier(params)
294		self._classifier.fit(train_x, train_y)
295		# sanity check: verify that the classifier has seen both classes
296		if self._classifier.n_classes_ != 2:
297		raise OperationFailedException(
298		"Unable to create classifier: " +
299		"Not enough positive and negative examples " +
300		"in the training data. Please check that your training " +
301		"data matches your vocabulary.")
302
303		def _prediction_to_list(self, scores, candidates):
304		subj_scores = [(score[1], c.subject_id)
305		for score, c in zip(scores, candidates)]
306		return sorted(subj_scores, reverse=True)
307
308		def predict(self, candidates):
309		if not candidates:
310		return []
311		features = self._candidates_to_features(candidates)
312		scores = self._classifier.predict_proba(features)
313		return self._prediction_to_list(scores, candidates)
314
315		def save(self, filename):
316		return joblib.dump(self, filename)
317
318		@staticmethod
319		def load(filename):
320		return joblib.load(filename)
321

NatLibFi / Annif

Pull Request — master (#600)

annif.lexical.mllm.MLLMModel._get_label_props() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like