| 
                    1
                 | 
                                    
                             1                          | 
                
                 | 
                import os  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    2
                 | 
                                    
                             1                          | 
                
                 | 
                import pickle  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    3
                 | 
                                    
                             1                          | 
                
                 | 
                import pandas as pd  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    4
                 | 
                                    
                             1                          | 
                
                 | 
                from tqdm import tqdm  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    5
                 | 
                                    
                             1                          | 
                
                 | 
                from sklearn.preprocessing import MinMaxScaler  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    6
                 | 
                                    
                             1                          | 
                
                 | 
                from sklearn.metrics import classification_report  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    7
                 | 
                                    
                             1                          | 
                
                 | 
                from sklearn.model_selection import train_test_split  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    8
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    9
                 | 
                                    
                             1                          | 
                
                 | 
                from ..api import get_resources  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    10
                 | 
                                    
                             1                          | 
                
                 | 
                from ..preprocess.preprocessing import remove_redundant_characters, remove_emoji  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    11
                 | 
                                    
                             1                          | 
                
                 | 
                from ..word2vec.w2v_emb import W2VEmb  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    12
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    13
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                                                    
                                                                                                        
            
            
                | 
                    14
                 | 
                                    
                             1                          | 
                
                View Code Duplication | 
                class MetaClf:  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                                                                        
                        
                         
                                                                                        
                                                                                            
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    15
                 | 
                                    
                             1                          | 
                
                 | 
                    def __init__(self, classifier_instance, text_array: list = None, embedding_doc: list = None, labels: list = None, load_path: str = None):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                                                                        
                        
                         
                                                                                        
                                                                                            
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    16
                 | 
                                    
                             1                          | 
                
                 | 
                        if not isinstance(text_array, pd.Series): text_array = pd.Series(text_array)  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    17
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    18
                 | 
                                    
                             1                          | 
                
                 | 
                        self.clf = classifier_instance  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    19
                 | 
                                    
                             1                          | 
                
                 | 
                        self.emb = W2VEmb()  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    20
                 | 
                                    
                             1                          | 
                
                 | 
                        self.scaler = None  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    21
                 | 
                                    
                             1                          | 
                
                 | 
                        self.dir_path = os.path.dirname(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    22
                 | 
                                    
                                                     | 
                
                 | 
                            os.path.dirname(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    23
                 | 
                                    
                                                     | 
                
                 | 
                                os.path.dirname(  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    24
                 | 
                                    
                                                     | 
                
                 | 
                                    os.path.realpath(__file__)))) + "/"  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    25
                 | 
                                    
                             1                          | 
                
                 | 
                        if load_path is not None:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    26
                 | 
                                    
                             1                          | 
                
                 | 
                            get_resources(self.dir_path, resource_name=load_path)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    27
                 | 
                                    
                             1                          | 
                
                 | 
                            self.load_model(load_path)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    28
                 | 
                                    
                                                     | 
                
                 | 
                        else:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    29
                 | 
                                    
                                                     | 
                
                 | 
                            assert text_array is not None and labels is not None  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    30
                 | 
                                    
                                                     | 
                
                 | 
                            text_array.fillna('', inplace=True) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    31
                 | 
                                    
                                                     | 
                
                 | 
                            self.emb = W2VEmb(embedding_doc)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    32
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    33
                 | 
                                    
                                                     | 
                
                 | 
                            encoded = list(map(self.emb.encode, tqdm(text_array)))  | 
            
            
                                                                                                            
                                                                
            
                                    
            
            
                | 
                    34
                 | 
                                    
                                                     | 
                
                 | 
                            self.labels = list(labels)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    35
                 | 
                                    
                                                     | 
                
                 | 
                            self.scaler = self.prep_scaler(encoded)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    36
                 | 
                                    
                                                     | 
                
                 | 
                            self.encoded_input = self.scaler.transform(encoded)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    37
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    38
                 | 
                                    
                             1                          | 
                
                 | 
                    def prep_scaler(self, encoded):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                                                                        
                        
                         
                                                                                        
                                                                                            
                                                                                     
                     | 
                
            
                                                                        
                            
            
                                    
            
            
                | 
                    39
                 | 
                                    
                                                     | 
                
                 | 
                        scaler = MinMaxScaler()  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    40
                 | 
                                    
                                                     | 
                
                 | 
                        scaler.fit(encoded)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    41
                 | 
                                    
                                                     | 
                
                 | 
                        return scaler  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    42
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    43
                 | 
                                    
                             1                          | 
                
                 | 
                    def fit(self):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                        
                            
            
                                    
            
            
                | 
                    44
                 | 
                                    
                                                     | 
                
                 | 
                        X_train, X_test, y_train, y_test = train_test_split(self.encoded_input, self.labels, test_size=0.2,  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                                                                                                                            
                        
                         
                                                                                        
                                                                                            
                                                                                            
                                                                                     
                     | 
                
            
                                                                        
                            
            
                                    
            
            
                | 
                    45
                 | 
                                    
                                                     | 
                
                 | 
                                                                            random_state=42, stratify=self.labels)  | 
            
            
                                                                        
                            
            
                                    
            
            
                | 
                    46
                 | 
                                    
                                                     | 
                
                 | 
                        self.clf.fit(X_train, y_train)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    47
                 | 
                                    
                                                     | 
                
                 | 
                        print('score: ', self.clf.score(X_test, y_test)) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    48
                 | 
                                    
                                                     | 
                
                 | 
                        print('============================trian============================') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    49
                 | 
                                    
                                                     | 
                
                 | 
                        print(classification_report(y_train, self.clf.predict(X_train)))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    50
                 | 
                                    
                                                     | 
                
                 | 
                        print('=============================test============================') | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    51
                 | 
                                    
                                                     | 
                
                 | 
                        print(classification_report(y_test, self.clf.predict(X_test)))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    52
                 | 
                                    
                                                     | 
                
                 | 
                        return self.clf  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    53
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    54
                 | 
                                    
                             1                          | 
                
                 | 
                    def load_model(self, load_path: str):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    55
                 | 
                                    
                             1                          | 
                
                 | 
                        loading_prep = lambda string: f'model_dir/{load_path}/{string}' | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    56
                 | 
                                    
                             1                          | 
                
                 | 
                        self.clf.load_model(loading_prep('model.json')) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    57
                 | 
                                    
                             1                          | 
                
                 | 
                        self.emb.load(loading_prep('emb.pkl')) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    58
                 | 
                                    
                             1                          | 
                
                 | 
                        with open(loading_prep('scaler.pkl'), 'rb') as f: | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    59
                 | 
                                    
                             1                          | 
                
                 | 
                            self.scaler = pickle.load(f)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    60
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    61
                 | 
                                    
                             1                          | 
                
                 | 
                    def save_model(self, save_path: str):  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    62
                 | 
                                    
                                                     | 
                
                 | 
                        os.makedirs(f'model_dir/{save_path}', exist_ok=True) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    63
                 | 
                                    
                                                     | 
                
                 | 
                        saving_prep = lambda string: f'model_dir/{save_path}/{string}' | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    64
                 | 
                                    
                                                     | 
                
                 | 
                        self.clf.save_model(saving_prep('model.json')) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    65
                 | 
                                    
                                                     | 
                
                 | 
                        self.emb.save(saving_prep('emb.pkl')) | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    66
                 | 
                                    
                                                     | 
                
                 | 
                        with open(saving_prep('scaler.pkl'), 'wb') as f: | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    67
                 | 
                                    
                                                     | 
                
                 | 
                            pickle.dump(self.scaler, f, pickle.HIGHEST_PROTOCOL)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    68
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    69
                 | 
                                    
                             1                          | 
                
                 | 
                    def __getitem__(self, item: str) -> int:  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    70
                 | 
                                    
                             1                          | 
                
                 | 
                        return self.predict(item)  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    71
                 | 
                                    
                                                     | 
                
                 | 
                 | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    72
                 | 
                                    
                             1                          | 
                
                 | 
                    def predict(self, input_text: str) -> int:  | 
            
                            
                    | 
                        
                     | 
                     | 
                     | 
                    
                                                                                                    
                        
                         
                                                                                        
                                                                                     
                     | 
                
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    73
                 | 
                                    
                             1                          | 
                
                 | 
                        prep_text = remove_redundant_characters(remove_emoji(input_text))  | 
            
            
                                                                                                            
                            
            
                                    
            
            
                | 
                    74
                 | 
                                    
                             1                          | 
                
                 | 
                        vector = self.scaler.transform(self.emb.encode(prep_text).reshape(1, -1))  | 
            
            
                                                                                                            
                                                                
            
                                    
            
            
                | 
                    75
                 | 
                                    
                                                     | 
                
                 | 
                        return self.clf.predict(vector)[0]  | 
            
            
                                                        
            
                                    
            
            
                | 
                    76
                 | 
                                    
                                                     | 
                
                 | 
                 |