| Total Complexity | 56 |
| Total Lines | 284 |
| Duplicated Lines | 0 % |
| Changes | 0 | ||
Complex classes like ExternRefTransformer often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use ExternRefTransformer, and based on these observations, apply Extract Interface, too.
| 1 | <?php |
||
| 33 | class ExternRefTransformer implements TransformerInterface |
||
| 34 | { |
||
| 35 | |||
| 36 | const SKIPPED_FILE_LOG = __DIR__.'/resources/external_skipped.log'; |
||
| 37 | const LOG_REQUEST_ERROR = __DIR__.'/resources/external_request_error.log'; |
||
| 38 | public $skipUnauthorised = true; |
||
| 39 | /** |
||
| 40 | * @var array |
||
| 41 | */ |
||
| 42 | public $summaryLog = []; |
||
| 43 | /** |
||
| 44 | * @var LoggerInterface |
||
| 45 | */ |
||
| 46 | protected $log; |
||
| 47 | private $config; |
||
| 48 | /** |
||
| 49 | * @var string|string[] |
||
| 50 | */ |
||
| 51 | private $domain; |
||
| 52 | /** |
||
| 53 | * @var string |
||
| 54 | */ |
||
| 55 | private $url; |
||
| 56 | /** |
||
| 57 | * @var ExternMapper |
||
| 58 | */ |
||
| 59 | private $mapper; |
||
| 60 | /** |
||
| 61 | * @var array |
||
| 62 | */ |
||
| 63 | private $data = []; |
||
| 64 | /** |
||
| 65 | * @var array |
||
| 66 | */ |
||
| 67 | private $skip_domain = []; |
||
| 68 | /** |
||
| 69 | * @var \App\Domain\ExternPage |
||
| 70 | */ |
||
| 71 | private $externalPage; |
||
| 72 | |||
| 73 | /** |
||
| 74 | * ExternalRefTransformer constructor. |
||
| 75 | * |
||
| 76 | * @param LoggerInterface $log |
||
| 77 | */ |
||
| 78 | public function __construct(LoggerInterface $log) |
||
| 79 | { |
||
| 80 | $this->log = $log; |
||
| 81 | |||
| 82 | // todo REFAC DataObject[] |
||
| 83 | $this->config = Yaml::parseFile(__DIR__.'/resources/config_presse.yaml'); |
||
| 84 | $skipFromFile = file(__DIR__.'/resources/config_skip_domain.txt'); |
||
| 85 | $this->skip_domain = ($skipFromFile) ? $skipFromFile : []; |
||
| 86 | |||
| 87 | $this->data['newspaper'] = json_decode(file_get_contents(__DIR__.'/resources/data_newspapers.json'), true); |
||
| 88 | $this->data['scientific domain'] = json_decode( |
||
| 89 | file_get_contents(__DIR__.'/resources/data_scientific_domain.json'), |
||
| 90 | true |
||
| 91 | ); |
||
| 92 | $this->data['scientific wiki'] = json_decode( |
||
| 93 | file_get_contents(__DIR__.'/resources/data_scientific_wiki.json'), |
||
| 94 | true |
||
| 95 | ); |
||
| 96 | |||
| 97 | $this->mapper = new ExternMapper(new Logger()); |
||
| 98 | } |
||
| 99 | |||
| 100 | /** |
||
| 101 | * @param string $string |
||
| 102 | * |
||
| 103 | * @return string |
||
| 104 | * @throws \Exception |
||
| 105 | */ |
||
| 106 | public function process(string $string): string |
||
| 107 | { |
||
| 108 | if (!$this->isURLAutorized($string)) { |
||
| 109 | return $string; |
||
| 110 | } |
||
| 111 | try { |
||
| 112 | sleep(5); |
||
| 113 | $this->externalPage = ExternPageFactory::fromURL($string, $this->log); |
||
| 114 | $pageData = $this->externalPage->getData(); |
||
| 115 | $this->log->debug('metaData', $this->externalPage->getData()); |
||
| 116 | } catch (\Exception $e) { |
||
| 117 | // ne pas générer de {lien brisé}, car peut-être 404 temporaire |
||
| 118 | $this->log->notice('erreur sur extractWebData '.$e->getMessage()); |
||
| 119 | file_put_contents(self::LOG_REQUEST_ERROR, $this->domain); |
||
| 120 | } |
||
| 121 | |||
| 122 | if (empty($pageData) |
||
| 123 | || (empty($pageData['JSON-LD']) && empty($pageData['meta'])) |
||
| 124 | ) { |
||
| 125 | // site avec HTML pourri |
||
| 126 | return $string; |
||
| 127 | } |
||
| 128 | |||
| 129 | if (isset($pageData['robots']) && strpos($pageData['robots'], 'noindex') !== false) { |
||
| 130 | $this->log->notice('SKIP robots: noindex'); |
||
| 131 | |||
| 132 | return $string; |
||
| 133 | } |
||
| 134 | |||
| 135 | $mapData = $this->mapper->process($pageData); |
||
| 136 | |||
| 137 | // check dataValide |
||
| 138 | if (empty($mapData) || empty($mapData['url']) || empty($mapData['titre'])) { |
||
| 139 | $this->skip_domain[] = $this->domain; |
||
| 140 | $this->log->info('Mapping incomplet'); |
||
| 141 | // Todo : temp data |
||
| 142 | try { |
||
| 143 | file_put_contents(self::SKIPPED_FILE_LOG, $this->domain.",".$this->url."\n", FILE_APPEND); |
||
| 144 | } catch (\Throwable $e) { |
||
| 145 | unset($e); |
||
| 146 | } |
||
| 147 | |||
| 148 | return $string; |
||
| 149 | } |
||
| 150 | |||
| 151 | $this->tagAndLog($mapData); |
||
| 152 | $this->addSummaryLog($mapData); |
||
| 153 | |||
| 154 | $template = $this->chooseTemplateByData($mapData); |
||
| 155 | |||
| 156 | $mapData = $this->replaceSitenameByConfig($mapData, $template); |
||
| 157 | $mapData = $this->replaceURLbyOriginal($mapData); |
||
| 158 | |||
| 159 | $template->hydrate($mapData); |
||
| 160 | |||
| 161 | $serialized = $template->serialize(true); |
||
| 162 | $this->log->info($serialized."\n"); |
||
| 163 | |||
| 164 | return Normalizer::normalize($serialized); |
||
| 165 | } |
||
| 166 | |||
| 167 | /** |
||
| 168 | * @param string $string |
||
| 169 | * |
||
| 170 | * @return bool |
||
| 171 | * @throws \Exception |
||
| 172 | */ |
||
| 173 | protected function isURLAutorized(string $string): bool |
||
| 174 | { |
||
| 175 | if (!preg_match('#^http?s://[^ ]+$#i', $string)) { |
||
| 176 | return false; |
||
| 177 | } |
||
| 178 | |||
| 179 | $this->url = $string; |
||
| 180 | $this->domain = ExternDomains::extractSubDomain($this->url); |
||
| 181 | |||
| 182 | if (in_array($this->domain, $this->skip_domain)) { |
||
| 183 | return false; |
||
| 184 | } |
||
| 185 | |||
| 186 | if (!isset($this->config[$this->domain])) { |
||
| 187 | $this->log->info("Domain ".$this->domain." non configuré\n"); |
||
| 188 | if ($this->skipUnauthorised) { |
||
| 189 | return false; |
||
| 190 | } |
||
| 191 | } else { |
||
| 192 | echo "> Domaine ".Color::LIGHT_GREEN.$this->domain.Color::NORMAL." configuré\n"; |
||
| 193 | } |
||
| 194 | |||
| 195 | $this->config[$this->domain] = $this->config[$this->domain] ?? []; |
||
| 196 | $this->config[$this->domain] = is_array($this->config[$this->domain]) ? $this->config[$this->domain] : []; |
||
| 197 | |||
| 198 | if ($this->config[$this->domain] === 'desactived' || isset($this->config[$this->domain]['desactived'])) { |
||
| 199 | $this->log->info("Domain ".$this->domain." desactivé\n"); |
||
| 200 | |||
| 201 | return false; |
||
| 202 | } |
||
| 203 | |||
| 204 | return true; |
||
| 205 | } |
||
| 206 | |||
| 207 | private function tagAndLog(array $mapData) |
||
| 208 | { |
||
| 209 | $this->log->debug('mapData', $mapData); |
||
| 210 | |||
| 211 | if (isset($mapData['DATA-ARTICLE']) && $mapData['DATA-ARTICLE']) { |
||
| 212 | $this->log->notice("Article OK"); |
||
| 213 | } |
||
| 214 | if (isset($this->data['newspaper'][$this->domain])) { |
||
| 215 | $this->log->notice('PRESSE'); |
||
| 216 | } |
||
| 217 | if ($this->isScientificDomain()) { |
||
| 218 | $this->log->notice('SCIENCE'); |
||
| 219 | } |
||
| 220 | } |
||
| 221 | |||
| 222 | private function isScientificDomain(): bool |
||
| 223 | { |
||
| 224 | if (isset($this->data['scientific domain'][$this->domain])) { |
||
| 225 | return true; |
||
| 226 | } |
||
| 227 | if (strpos('.revues.org', $this->domain) > 0) { |
||
| 228 | return true; |
||
| 229 | } |
||
| 230 | |||
| 231 | return false; |
||
| 232 | } |
||
| 233 | |||
| 234 | private function addSummaryLog(array $mapData) |
||
| 235 | { |
||
| 236 | $this->summaryLog[] = $mapData['site'] ?? $mapData['périodique'] ?? '?'; |
||
| 237 | } |
||
| 238 | |||
| 239 | /** |
||
| 240 | * todo refac lisible |
||
| 241 | */ |
||
| 242 | private function chooseTemplateByData(array $mapData): AbstractWikiTemplate |
||
| 243 | { |
||
| 244 | // Logique : choix template |
||
| 245 | $this->config[$this->domain]['template'] = $this->config[$this->domain]['template'] ?? []; |
||
| 246 | $mapData['DATA-ARTICLE'] = $mapData['DATA-ARTICLE'] ?? false; |
||
| 247 | |||
| 248 | if ($this->config[$this->domain]['template'] === 'article' |
||
| 249 | || ($this->config[$this->domain]['template'] === 'auto' && $mapData['DATA-ARTICLE']) |
||
| 250 | || ($mapData['DATA-ARTICLE'] && !empty($this->data['newspaper'][$this->domain])) |
||
| 251 | || $this->isScientificDomain() |
||
| 252 | ) { |
||
| 253 | $templateName = 'article'; |
||
| 254 | } |
||
| 255 | if (!isset($templateName) || $this->config[$this->domain]['template'] === 'lien web') { |
||
| 256 | $templateName = 'lien web'; |
||
| 257 | } |
||
| 258 | $template = WikiTemplateFactory::create($templateName); |
||
| 259 | $template->userSeparator = " |"; |
||
| 260 | |||
| 261 | return $template; |
||
| 262 | } |
||
| 263 | |||
| 264 | /** |
||
| 265 | * Logique : remplacement titre périodique ou nom du site |
||
| 266 | * |
||
| 267 | * @param array $mapData |
||
| 268 | * @param $template |
||
| 269 | * |
||
| 270 | * @return array |
||
| 271 | */ |
||
| 272 | private function replaceSitenameByConfig(array $mapData, $template): array |
||
| 311 | } |
||
| 312 | |||
| 313 | private function replaceURLbyOriginal(array $mapData):array |
||
| 314 | { |
||
| 315 | $mapData['url'] = $this->url; |
||
| 316 | return $mapData; |
||
| 317 | } |
||
| 318 | |||
| 319 | } |
||
| 320 |