| 1 |  |  | from sklearn.manifold import TSNE | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 2 |  |  | import matplotlib.pyplot as plt | 
            
                                                        
            
                                    
            
            
                | 3 |  |  | import numpy as np | 
            
                                                        
            
                                    
            
            
                | 4 |  |  | import random | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 5 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 6 |  |  |  | 
            
                                                        
            
                                                                    
                                                                                                        
            
            
                | 7 |  | View Code Duplication | def reduce_dimensions(w2v_model): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 8 |  |  |     num_dimensions = 2 | 
            
                                                        
            
                                    
            
            
                | 9 |  |  |     vectors = np.asarray(w2v_model.wv.vectors) | 
            
                                                        
            
                                    
            
            
                | 10 |  |  |     labels = np.asarray(w2v_model.wv.index_to_key) | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 12 |  |  |     tsne = TSNE(n_components=num_dimensions, random_state=42) | 
            
                                                        
            
                                    
            
            
                | 13 |  |  |     vectors = tsne.fit_transform(vectors) | 
            
                                                        
            
                                    
            
            
                | 14 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 15 |  |  |     x_evals = [v[0] for v in vectors] | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 16 |  |  |     y_evals = [v[1] for v in vectors] | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 17 |  |  |     return x_evals, y_evals, labels | 
            
                                                        
            
                                    
            
            
                | 18 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 19 |  |  |  | 
            
                                                        
            
                                                                    
                                                                                                        
            
            
                | 20 |  | View Code Duplication | def plot_with_matplotlib(x_evals, y_evals, labels): | 
                            
                    |  |  |  | 
                                                                                        
                                                                                            
                                                                                            
                                                                                            
                                                                                            
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 21 |  |  |     random.seed(0) | 
            
                                                        
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 23 |  |  |     plt.figure(figsize=(12, 12)) | 
            
                                                        
            
                                    
            
            
                | 24 |  |  |     plt.scatter(x_evals, y_evals) | 
            
                                                        
            
                                    
            
            
                | 25 |  |  |     indices = list(range(len(labels))) | 
            
                                                        
            
                                    
            
            
                | 26 |  |  |     selected_indices = random.sample(indices, 25) | 
            
                                                        
            
                                    
            
            
                | 27 |  |  |     for i in selected_indices: | 
            
                                                        
            
                                    
            
            
                | 28 |  |  |         plt.annotate(labels[i], (x_evals[i], y_evals[i])) | 
            
                                                        
            
                                    
            
            
                | 29 |  |  |     plt.savefig('w2v_plot.png') | 
            
                                                        
            
                                    
            
            
                | 30 |  |  |     plt.show() | 
            
                                                        
            
                                    
            
            
                | 31 |  |  |  | 
            
                                                        
            
                                    
            
            
                | 32 |  |  |  | 
            
                                                        
            
                                                                    
                                                                                                        
            
            
                | 33 |  | View Code Duplication | if __name__ == '__main__': | 
                            
                    |  |  |  | 
                                                                                        
                                                                                     | 
            
                                                        
            
                                    
            
            
                | 34 |  |  |     from .w2v_corpus import W2VCorpus | 
            
                                                        
            
                                    
            
            
                | 35 |  |  |     import pandas as pd | 
            
                                                        
            
                                    
            
            
                | 36 |  |  |     import gensim.models | 
            
                                                        
            
                                    
            
            
                | 37 |  |  |     corpus = pd.read_excel('test_corpus.xlsx') | 
            
                                                        
            
                                    
            
            
                | 38 |  |  |     print(corpus.head()) | 
            
                                                        
            
                                    
            
            
                | 39 |  |  |     corpus = W2VCorpus(list(corpus[0])) | 
            
                                                        
            
                                    
            
            
                | 40 |  |  |     model = gensim.models.Word2Vec(sentences=corpus) | 
            
                                                        
            
                                    
            
            
                | 41 |  |  |     x_evals, y_evals, labels = reduce_dimensions(model) | 
            
                                                        
            
                                    
            
            
                | 42 |  |  |     plot_with_matplotlib(x_evals, y_evals, labels) | 
            
                                                        
            
                                    
            
            
                | 43 |  |  |  |