tracking_policy_agendas.classifiers.meta_clf.MetaClf.predict() - Code Metrics - Inspection of "Test and coverage" - MohammadForouhesh/tracking-policy-agendas - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Pull Request — main (#5)

by Mohammad

created 2022-03-13 12:54 UTC

MetaClf.predict() A

↳ Parent: tracking_policy_agendas.classifiers.meta_clf

Complexity

Conditions

Size

Total Lines	4
Code Lines	4

Duplication

Lines	3
Ratio	75 %

Code Coverage

Tests	4
CRAP Score	1

Importance

Changes

Metric	Value
eloc	4
dl	3
loc	4
ccs	4
cts	4
cp	1
rs	10
c	0
b	0
f	0
cc	1
nop	2
crap	1

import os

import pickle
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

from ..api import get_resources
from ..preprocess.preprocessing import remove_redundant_characters, remove_emoji
from ..word2vec.w2v_emb import W2VEmb


class MetaClf:

    def __init__(self, classifier_instance, text_array: list = None, embedding_doc: list = None, labels: list = None, load_path: str = None):

        if not isinstance(text_array, pd.Series): text_array = pd.Series(text_array)


        self.clf = classifier_instance
        self.emb = W2VEmb()
        self.scaler = None
        self.dir_path = os.path.dirname(
            os.path.dirname(
                os.path.dirname(
                    os.path.realpath(__file__)))) + "/"
        if load_path is not None:
            get_resources(self.dir_path, resource_name=load_path)
            self.load_model(load_path)
        else:
            assert text_array is not None and labels is not None
            text_array.fillna('', inplace=True)
            self.emb = W2VEmb(embedding_doc)

            encoded = list(map(self.emb.encode, tqdm(text_array)))
            self.labels = list(labels)
            self.scaler = self.prep_scaler(encoded)
            self.encoded_input = self.scaler.transform(encoded)

    def prep_scaler(self, encoded):
class Foo:
    def some_method(self, x, y):
        return x + y;
        scaler = MinMaxScaler()
        scaler.fit(encoded)
        return scaler

    def fit(self):

        X_train, X_test, y_train, y_test = train_test_split(self.encoded_input, self.labels, test_size=0.2,

                                                            random_state=42, stratify=self.labels)
        self.clf.fit(X_train, y_train)
        print('score: ', self.clf.score(X_test, y_test))
        print('============================trian============================')
        print(classification_report(y_train, self.clf.predict(X_train)))
        print('=============================test============================')
        print(classification_report(y_test, self.clf.predict(X_test)))
        return self.clf

    def load_model(self, load_path: str):

        loading_prep = lambda string: f'model_dir/{load_path}/{string}'
        self.clf.load_model(loading_prep('model.json'))
        self.emb.load(loading_prep('emb.pkl'))
        with open(loading_prep('scaler.pkl'), 'rb') as f:

            self.scaler = pickle.load(f)

    def save_model(self, save_path: str):

        os.makedirs(f'model_dir/{save_path}', exist_ok=True)
        saving_prep = lambda string: f'model_dir/{save_path}/{string}'
        self.clf.save_model(saving_prep('model.json'))
        self.emb.save(saving_prep('emb.pkl'))
        with open(saving_prep('scaler.pkl'), 'wb') as f:

            pickle.dump(self.scaler, f, pickle.HIGHEST_PROTOCOL)

    def __getitem__(self, item: str) -> int:
        return self.predict(item)

    def predict(self, input_text: str) -> int:

        prep_text = remove_redundant_characters(remove_emoji(input_text))
        vector = self.scaler.transform(self.emb.encode(prep_text).reshape(1, -1))
        return self.clf.predict(vector)[0]


1	1		import os
			0 ignored issues – show introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report Missing module docstring Loading history...
2	1		import pickle
3	1		import pandas as pd
4	1		from tqdm import tqdm
5	1		from sklearn.preprocessing import MinMaxScaler
6	1		from sklearn.metrics import classification_report
7	1		from sklearn.model_selection import train_test_split
8
9	1		from ..api import get_resources
10	1		from ..preprocess.preprocessing import remove_redundant_characters, remove_emoji
11	1		from ..word2vec.w2v_emb import W2VEmb
12
13
14	1	View Code Duplication	class MetaClf:
			0 ignored issues – show Duplication introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report This code seems to be duplicated in your project. Loading history... introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report Missing class docstring Loading history...
15	1		def __init__(self, classifier_instance, text_array: list = None, embedding_doc: list = None, labels: list = None, load_path: str = None):
			0 ignored issues – show Coding Style introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (141/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history... best-practice introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report Too many arguments (6/5) Loading history...
16	1		if not isinstance(text_array, pd.Series): text_array = pd.Series(text_array)
			0 ignored issues – show Coding Style introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report More than one statement on a single line Loading history...
17
18	1		self.clf = classifier_instance
19	1		self.emb = W2VEmb()
20	1		self.scaler = None
21	1		self.dir_path = os.path.dirname(
22			os.path.dirname(
23			os.path.dirname(
24			os.path.realpath(__file__)))) + "/"
25	1		if load_path is not None:
26	1		get_resources(self.dir_path, resource_name=load_path)
27	1		self.load_model(load_path)
28			else:
29			assert text_array is not None and labels is not None
30			text_array.fillna('', inplace=True)
31			self.emb = W2VEmb(embedding_doc)
32
33			encoded = list(map(self.emb.encode, tqdm(text_array)))
34			self.labels = list(labels)
35			self.scaler = self.prep_scaler(encoded)
36			self.encoded_input = self.scaler.transform(encoded)
37
38	1		def prep_scaler(self, encoded):
			0 ignored issues – show introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history... Coding Style introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report This method could be written as a function/class method. If a method does not access any attributes of the class, it could also be implemented as a function or static method. This can help improve readability. For example class Foo: def some_method(self, x, y): return x + y; could be written as class Foo: @classmethod def some_method(cls, x, y): return x + y; Loading history...
39			scaler = MinMaxScaler()
40			scaler.fit(encoded)
41			return scaler
42
43	1		def fit(self):
			0 ignored issues – show introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
44			X_train, X_test, y_train, y_test = train_test_split(self.encoded_input, self.labels, test_size=0.2,
			0 ignored issues – show Coding Style Naming introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report Variable name "X_test" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style Naming introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report Variable name "X_train" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history... Coding Style introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report This line is too long as per the coding-style (107/100). This check looks for lines that are too long. You can specify the maximum line length. Loading history...
45			random_state=42, stratify=self.labels)
46			self.clf.fit(X_train, y_train)
47			print('score: ', self.clf.score(X_test, y_test))
48			print('============================trian============================')
49			print(classification_report(y_train, self.clf.predict(X_train)))
50			print('=============================test============================')
51			print(classification_report(y_test, self.clf.predict(X_test)))
52			return self.clf
53
54	1		def load_model(self, load_path: str):
			0 ignored issues – show introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
55	1		loading_prep = lambda string: f'model_dir/{load_path}/{string}'
56	1		self.clf.load_model(loading_prep('model.json'))
57	1		self.emb.load(loading_prep('emb.pkl'))
58	1		with open(loading_prep('scaler.pkl'), 'rb') as f:
			0 ignored issues – show Coding Style Naming introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
59	1		self.scaler = pickle.load(f)
60
61	1		def save_model(self, save_path: str):
			0 ignored issues – show introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
62			os.makedirs(f'model_dir/{save_path}', exist_ok=True)
63			saving_prep = lambda string: f'model_dir/{save_path}/{string}'
64			self.clf.save_model(saving_prep('model.json'))
65			self.emb.save(saving_prep('emb.pkl'))
66			with open(saving_prep('scaler.pkl'), 'wb') as f:
			0 ignored issues – show Coding Style Naming introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report Variable name "f" doesn't conform to snake_case naming style ('([^\\W\\dA-Z][^\\WA-Z]2,\|_[^\\WA-Z]*\|__[^\\WA-Z\\d_][^\\WA-Z]+__)$' pattern) This check looks for invalid names for a range of different identifiers. You can set regular expressions to which the identifiers must conform if the defaults do not match your requirements. If your project includes a Pylint configuration file, the settings contained in that file take precedence. To find out more about Pylint, please refer to their site. Loading history...
67			pickle.dump(self.scaler, f, pickle.HIGHEST_PROTOCOL)
68
69	1		def __getitem__(self, item: str) -> int:
70	1		return self.predict(item)
71
72	1		def predict(self, input_text: str) -> int:
			0 ignored issues – show introduced 2022-03-13 12:10 UTC by Report Bug Copy Issue Report Missing function or method docstring Loading history...
73	1		prep_text = remove_redundant_characters(remove_emoji(input_text))
74	1		vector = self.scaler.transform(self.emb.encode(prep_text).reshape(1, -1))
75			return self.clf.predict(vector)[0]
76

MohammadForouhesh / tracking-policy-agendas

Pull Request — main (#5)

MetaClf.predict() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like