| 1 |  |  | """Common functionality for transforming text of input documents.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | import abc | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | from annif.corpus import TransformingDocumentCorpus | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  | from annif.exception import ConfigurationException | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | class BaseTransform(metaclass=abc.ABCMeta): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  |     """Base class for text transformations, which need to implement the | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |     transform function.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  |     name = None | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  |     def __init__(self, project): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |         self.project = project | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  |     @abc.abstractmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |     def transform_fn(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |         """Perform the text transformation.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |         pass  # pragma: no cover | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  | class IdentityTransform(BaseTransform): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |     """Transform that does not modify text but simply passes it through.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |     name = 'pass' | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     def transform_fn(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |         return text | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  | class TransformChain(): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |     """Class instantiating and holding the transformation objects performing | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |     the actual text transformation.""" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |     def __init__(self, transform_classes, args, project): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |         self.project = project | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |         self.transforms = self._init_transforms(transform_classes, args) | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 39 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 40 |  |  |     def _init_transforms(self, transform_classes, args): | 
            
                                                                        
                            
            
                                    
            
            
                | 41 |  |  |         transforms = [] | 
            
                                                                        
                            
            
                                    
            
            
                | 42 |  |  |         for trans, (posargs, kwargs) in zip(transform_classes, args): | 
            
                                                                        
                            
            
                                    
            
            
                | 43 |  |  |             try: | 
            
                                                                        
                            
            
                                    
            
            
                | 44 |  |  |                 transforms.append( | 
            
                                                                        
                            
            
                                    
            
            
                | 45 |  |  |                     trans(self.project, *posargs, **kwargs)) | 
            
                                                                        
                            
            
                                    
            
            
                | 46 |  |  |             except (ValueError, TypeError): | 
            
                                                                        
                            
            
                                    
            
            
                | 47 |  |  |                 raise ConfigurationException( | 
            
                                                                        
                            
            
                                    
            
            
                | 48 |  |  |                     f"Invalid arguments to {trans.name} transform: " | 
            
                                                                        
                            
            
                                    
            
            
                | 49 |  |  |                     f"{posargs}, {kwargs})", | 
            
                                                                        
                            
            
                                    
            
            
                | 50 |  |  |                     project_id=self.project.project_id) | 
            
                                                                        
                            
            
                                    
            
            
                | 51 |  |  |         return transforms | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |     def transform_text(self, text): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |         for trans in self.transforms: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |             text = trans.transform_fn(text) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |         return text | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |     def transform_corpus(self, corpus): | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 59 |  |  |         return TransformingDocumentCorpus(corpus, self.transform_text) | 
            
                                                        
            
                                    
            
            
                | 60 |  |  |  |