| 1 | 1 |  | import os | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 | 1 |  | import pickle | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 | 1 |  | import pandas as pd | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 | 1 |  | from tqdm import tqdm | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 | 1 |  | from sklearn.preprocessing import MinMaxScaler | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 | 1 |  | from sklearn.metrics import classification_report | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 | 1 |  | from sklearn.model_selection import train_test_split | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 | 1 |  | from ..api import get_resources | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 | 1 |  | from ..preprocess.preprocessing import remove_redundant_characters, remove_emoji | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 | 1 |  | from ..word2vec.w2v_emb import W2VEmb | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  | 
            
                                                                                                            
                            
            
                                                                    
                                                                                                        
            
            
                | 14 | 1 | View Code Duplication | class MetaClf: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 | 1 |  |     def __init__(self, classifier_instance, text_array: list = None, embedding_doc: list = None, labels: list = None, load_path: str = None): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 | 1 |  |         if not isinstance(text_array, pd.Series): text_array = pd.Series(text_array) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 | 1 |  |         self.clf = classifier_instance | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 | 1 |  |         self.emb = W2VEmb() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 | 1 |  |         self.scaler = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 | 1 |  |         self.dir_path = os.path.dirname( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |             os.path.dirname( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |                 os.path.dirname( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |                     os.path.realpath(__file__)))) + "/" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 | 1 |  |         if load_path is not None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 | 1 |  |             get_resources(self.dir_path, resource_name=load_path) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 | 1 |  |             self.load_model(load_path) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |         else: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |             assert text_array is not None and labels is not None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |             text_array.fillna('', inplace=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |             self.emb = W2VEmb(embedding_doc) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |             encoded = list(map(self.emb.encode, tqdm(text_array))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |             self.labels = list(labels) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |             self.scaler = self.prep_scaler(encoded) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |             self.encoded_input = self.scaler.transform(encoded) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 | 1 |  |     def prep_scaler(self, encoded): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |         scaler = MinMaxScaler() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |         scaler.fit(encoded) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |         return scaler | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 | 1 |  |     def fit(self): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |         X_train, X_test, y_train, y_test = train_test_split(self.encoded_input, self.labels, test_size=0.2, | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                            
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |                                                             random_state=42, stratify=self.labels) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |         self.clf.fit(X_train, y_train) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |         print('score: ', self.clf.score(X_test, y_test)) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |         print('============================trian============================') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |         print(classification_report(y_train, self.clf.predict(X_train))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |         print('=============================test============================') | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |         print(classification_report(y_test, self.clf.predict(X_test))) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |         return self.clf | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 | 1 |  |     def load_model(self, load_path: str): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 | 1 |  |         loading_prep = lambda string: f'model_dir/{load_path}/{string}' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 | 1 |  |         self.clf.load_model(loading_prep('model.json')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 | 1 |  |         self.emb.load(loading_prep('emb.pkl')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 | 1 |  |         with open(loading_prep('scaler.pkl'), 'rb') as f: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 | 1 |  |             self.scaler = pickle.load(f) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 | 1 |  |     def save_model(self, save_path: str): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |         os.makedirs(f'model_dir/{save_path}', exist_ok=True) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |         saving_prep = lambda string: f'model_dir/{save_path}/{string}' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |         self.clf.save_model(saving_prep('model.json')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  |         self.emb.save(saving_prep('emb.pkl')) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |         with open(saving_prep('scaler.pkl'), 'wb') as f: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |             pickle.dump(self.scaler, f, pickle.HIGHEST_PROTOCOL) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 | 1 |  |     def __getitem__(self, item: str) -> int: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 | 1 |  |         return self.predict(item) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 71 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 72 | 1 |  |     def predict(self, input_text: str) -> int: | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 73 | 1 |  |         prep_text = remove_redundant_characters(remove_emoji(input_text)) | 
            
                                                        
            
                                    
            
            
                | 74 | 1 |  |         vector = self.scaler.transform(self.emb.encode(prep_text).reshape(1, -1)) | 
            
                                                        
            
                                    
            
            
                | 75 |  |  |         return self.clf.predict(vector)[0] | 
            
                                                        
            
                                    
            
            
                | 76 |  |  |  |