| 1 |  |  | from abc import ABC, abstractmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 2 |  |  | from datetime import datetime | 
            
                                                                                                            
                            
            
                                    
            
            
                | 3 |  |  | import logging | 
            
                                                                                                            
                            
            
                                    
            
            
                | 4 |  |  | from typing import Any, ClassVar, Final | 
            
                                                                                                            
                            
            
                                    
            
            
                | 5 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 6 |  |  | from pandas import DataFrame | 
            
                                                                                                            
                            
            
                                    
            
            
                | 7 |  |  | import requests | 
            
                                                                                                            
                            
            
                                    
            
            
                | 8 |  |  | from requests import HTTPError | 
            
                                                                                                            
                            
            
                                    
            
            
                | 9 |  |  | from sqlalchemy import MetaData, Table | 
            
                                                                                                            
                            
            
                                    
            
            
                | 10 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 11 |  |  | # future: remove the comment below when stubs for the library below are available | 
            
                                                                                                            
                            
            
                                    
            
            
                | 12 |  |  | import xlrd  # type: ignore | 
            
                                                                                                            
                            
            
                                    
            
            
                | 13 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 14 |  |  | from src.kalauz.new_data_processors.database_connection import Database | 
            
                                                                                                            
                            
            
                                    
            
            
                | 15 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 16 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 17 |  |  | class DataProcessor(ABC): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 18 |  |  |     TODAY = datetime.today().date() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 19 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 20 |  |  |     def __init__(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 21 |  |  |         super().__init__() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 22 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 23 |  |  |         self.logger = logging.getLogger(__name__) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 24 |  |  |         self.database = Database() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 25 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 26 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 27 |  |  | class TableUpdater(DataProcessor, ABC): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 28 |  |  |     TABLE_NAME: ClassVar[str] = NotImplemented | 
            
                                                                                                            
                            
            
                                    
            
            
                | 29 |  |  |     database_metadata: ClassVar[MetaData] = NotImplemented | 
            
                                                                                                            
                            
            
                                    
            
            
                | 30 |  |  |     table: ClassVar[Table] = NotImplemented | 
            
                                                                                                            
                            
            
                                    
            
            
                | 31 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 32 |  |  |     def __init__(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 33 |  |  |         super().__init__() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 34 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 35 |  |  |         self.DATA_URL: str = NotImplemented | 
            
                                                                                                            
                            
            
                                    
            
            
                | 36 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 37 |  |  |         self.data: Any = NotImplemented | 
            
                                                                                                            
                            
            
                                    
            
            
                | 38 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 39 |  |  |     def run(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 40 |  |  |         self.process_data() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 41 |  |  |         self.store_data() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 42 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 43 |  |  |         self.logger.info(f"Table `{self.TABLE_NAME}` sucessfully updated!") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 44 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 45 |  |  |     @abstractmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 46 |  |  |     def process_data(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 47 |  |  |         pass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 48 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 49 |  |  |     def store_data(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 50 |  |  |         self.create_table_if_not_exists() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 51 |  |  |         self.add_data() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 52 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 53 |  |  |     def create_table_if_not_exists(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 54 |  |  |         self.table.create( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 55 |  |  |             bind=self.database.engine, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 56 |  |  |             checkfirst=True, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 57 |  |  |         ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 58 |  |  |         self.logger.debug(f"Table `{self.TABLE_NAME}` sucessfully created (if needed)!") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 59 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 60 |  |  |     @abstractmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 61 |  |  |     def add_data(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 62 |  |  |         pass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 63 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 64 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 65 |  |  | class DataDownloader(TableUpdater, ABC): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 66 |  |  |     def __init__(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 67 |  |  |         super().__init__() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 68 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 69 |  |  |         self.data: DataFrame = NotImplemented | 
            
                                                                                                            
                            
            
                                    
            
            
                | 70 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 71 |  |  |         self._dowload_session = requests.Session() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 72 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 73 |  |  |     def get_data(self, url: str) -> bytes: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 74 |  |  |         try: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 75 |  |  |             response = self._dowload_session.get( | 
            
                                                                                                            
                            
            
                                    
            
            
                | 76 |  |  |                 url=url, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 77 |  |  |                 headers={ | 
            
                                                                                                            
                            
            
                                    
            
            
                | 78 |  |  |                     "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) " | 
            
                                                                                                            
                            
            
                                    
            
            
                | 79 |  |  |                     "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1.2 Safari/605.1.15" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 80 |  |  |                 }, | 
            
                                                                                                            
                            
            
                                    
            
            
                | 81 |  |  |             ) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 82 |  |  |             response.raise_for_status() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 83 |  |  |             self.logger.debug(f"File successfully downloaded from {url}!") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 84 |  |  |             return bytes(response.content) | 
            
                                                                                                            
                            
            
                                    
            
            
                | 85 |  |  |         except HTTPError: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 86 |  |  |             self.logger.critical(f"Failed to download file from {url}!") | 
            
                                                                                                            
                            
            
                                    
            
            
                | 87 |  |  |             raise | 
            
                                                                                                            
                            
            
                                    
            
            
                | 88 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 89 |  |  |     def process_data(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 90 |  |  |         self.correct_column_names() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 91 |  |  |         self.delete_data() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 92 |  |  |         self.correct_data() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 93 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 94 |  |  |     def correct_column_names(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 95 |  |  |         self.rename_columns_manually() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 96 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 97 |  |  |     @abstractmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 98 |  |  |     def rename_columns_manually(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 99 |  |  |         pass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 100 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 101 |  |  |     def delete_data(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 102 |  |  |         pass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 103 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 104 |  |  |     def correct_data(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 105 |  |  |         self.correct_data_manually() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 106 |  |  |         self.correct_boolean_values() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 107 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 108 |  |  |     @abstractmethod | 
            
                                                                                                            
                            
            
                                    
            
            
                | 109 |  |  |     def correct_data_manually(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 110 |  |  |         pass | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 111 |  |  |  | 
            
                                                                        
                            
            
                                    
            
            
                | 112 |  |  |     @abstractmethod | 
            
                                                                        
                            
            
                                    
            
            
                | 113 |  |  |     def correct_boolean_values(self) -> None: | 
            
                                                                        
                            
            
                                    
            
            
                | 114 |  |  |         pass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 115 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 116 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 117 |  |  | class UICTableUpdater(DataDownloader, ABC): | 
            
                                                                                                            
                            
            
                                    
            
            
                | 118 |  |  |     def __init__(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 119 |  |  |         super().__init__() | 
            
                                                                                                            
                            
            
                                    
            
            
                | 120 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 121 |  |  |         self.DATA_BASE_URL: Final = "https://uic.org/spip.php?action=telecharger&arg=" | 
            
                                                                                                            
                            
            
                                    
            
            
                | 122 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 123 |  |  |         self._data_to_process: bytes = NotImplemented | 
            
                                                                                                            
                            
            
                                    
            
            
                | 124 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 125 |  |  |     def rename_columns_manually(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 126 |  |  |         pass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 127 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 128 |  |  |     def correct_data_manually(self) -> None: | 
            
                                                                                                            
                            
            
                                    
            
            
                | 129 |  |  |         pass | 
            
                                                                                                            
                            
            
                                    
            
            
                | 130 |  |  |  | 
            
                                                                                                            
                            
            
                                    
            
            
                | 131 |  |  |     def correct_boolean_values(self) -> None: | 
            
                                                                                                            
                                                                
            
                                    
            
            
                | 132 |  |  |         pass | 
            
                                                        
            
                                    
            
            
                | 133 |  |  |  |