@@ -37,7 +37,7 @@ |
||
37 | 37 | // Attention : pas de post-processing (sanitize title, etc.) |
38 | 38 | $result = $trans->process($url, $summary); |
39 | 39 | } catch (Exception $e) { |
40 | - $result = "EXCEPTION ". $e->getMessage().$e->getFile().$e->getLine(); |
|
40 | + $result = "EXCEPTION ".$e->getMessage().$e->getFile().$e->getLine(); |
|
41 | 41 | } |
42 | 42 | |
43 | 43 | echo $result."\n"; |
@@ -69,7 +69,7 @@ discard block |
||
69 | 69 | ) |
70 | 70 | { |
71 | 71 | if (!ExternHttpClient::isHttpURL($url)) { |
72 | - throw new Exception('string is not an URL ' . $url); |
|
72 | + throw new Exception('string is not an URL '.$url); |
|
73 | 73 | } |
74 | 74 | $this->url = $url; |
75 | 75 | $this->html = $html; |
@@ -189,7 +189,7 @@ discard block |
||
189 | 189 | { |
190 | 190 | try { |
191 | 191 | if (!ExternHttpClient::isHttpURL($this->url)) { |
192 | - throw new Exception('string is not an URL ' . $this->url); |
|
192 | + throw new Exception('string is not an URL '.$this->url); |
|
193 | 193 | } |
194 | 194 | if (!$this->domainParser instanceof InternetDomainParserInterface) { |
195 | 195 | $this->log->notice('InternetDomainParser is not set'); |
@@ -200,7 +200,7 @@ discard block |
||
200 | 200 | return $this->domainParser->getRegistrableDomainFromURL($this->url); |
201 | 201 | } catch (Exception $e) { |
202 | 202 | if ($this->log !== null) { |
203 | - $this->log->warning('InternetDomainParser->getRegistrableDomainFromURL NULL ' . $this->url); |
|
203 | + $this->log->warning('InternetDomainParser->getRegistrableDomainFromURL NULL '.$this->url); |
|
204 | 204 | } |
205 | 205 | throw new Exception('InternetDomainParser->getRegistrableDomainFromURL NULL', $e->getCode(), $e); |
206 | 206 | } |
@@ -23,7 +23,7 @@ |
||
23 | 23 | /** |
24 | 24 | * @throws Exception |
25 | 25 | */ |
26 | - public static function fromURL($url, ExternHttpClientInterface $httpClient,LoggerInterface $logger = null): ExternPage |
|
26 | + public static function fromURL($url, ExternHttpClientInterface $httpClient, LoggerInterface $logger = null): ExternPage |
|
27 | 27 | { |
28 | 28 | if (!ExternHttpClient::isHttpURL($url)) { |
29 | 29 | throw new Exception('string is not an URL '.$url); |
@@ -14,7 +14,7 @@ discard block |
||
14 | 14 | |
15 | 15 | class ExternHttpErrorLogic |
16 | 16 | { |
17 | - public const LOG_REQUEST_ERROR = __DIR__ . '/../../Application/resources/external_request_error.log'; |
|
17 | + public const LOG_REQUEST_ERROR = __DIR__.'/../../Application/resources/external_request_error.log'; |
|
18 | 18 | |
19 | 19 | /** |
20 | 20 | * @var LoggerInterface |
@@ -46,12 +46,12 @@ discard block |
||
46 | 46 | } |
47 | 47 | return $url; |
48 | 48 | } elseif (preg_match('#401 Unauthorized#i', $errorMessage)) { |
49 | - $this->log->notice('401 Unauthorized : skip ' . $url); |
|
49 | + $this->log->notice('401 Unauthorized : skip '.$url); |
|
50 | 50 | |
51 | 51 | return $url; |
52 | 52 | } else { |
53 | 53 | // autre : ne pas générer de {lien brisé}, car peut-être 404 temporaire |
54 | - $this->log->warning('erreur sur extractWebData ' . $errorMessage); |
|
54 | + $this->log->warning('erreur sur extractWebData '.$errorMessage); |
|
55 | 55 | |
56 | 56 | //file_put_contents(self::LOG_REQUEST_ERROR, $this->domain."\n", FILE_APPEND); |
57 | 57 | |
@@ -78,7 +78,7 @@ discard block |
||
78 | 78 | { |
79 | 79 | $text = str_replace(['https://', 'http://', 'www.'], '', $url); |
80 | 80 | if (strlen($text) > 30) { |
81 | - $text = substr($text, 0, 30) . '…'; |
|
81 | + $text = substr($text, 0, 30).'…'; |
|
82 | 82 | } |
83 | 83 | |
84 | 84 | return $text; |
@@ -86,7 +86,7 @@ discard block |
||
86 | 86 | |
87 | 87 | protected function log403(string $url): void |
88 | 88 | { |
89 | - $this->log->warning('403 Forbidden : ' . $url); |
|
90 | - file_put_contents(self::LOG_REQUEST_ERROR, '403 Forbidden : ' . $url . "\n", FILE_APPEND); |
|
89 | + $this->log->warning('403 Forbidden : '.$url); |
|
90 | + file_put_contents(self::LOG_REQUEST_ERROR, '403 Forbidden : '.$url."\n", FILE_APPEND); |
|
91 | 91 | } |
92 | 92 | } |
93 | 93 | \ No newline at end of file |
@@ -31,12 +31,12 @@ discard block |
||
31 | 31 | use SummaryExternTrait, RobotsRulesTrait, PublisherLogicTrait; |
32 | 32 | |
33 | 33 | public const HTTP_REQUEST_LOOP_DELAY = 10; |
34 | - public const SKIP_DOMAIN_FILENAME = __DIR__ . '/../resources/config_skip_domain.txt'; |
|
34 | + public const SKIP_DOMAIN_FILENAME = __DIR__.'/../resources/config_skip_domain.txt'; |
|
35 | 35 | public const REPLACE_404 = true; |
36 | - public const CONFIG_PRESSE = __DIR__ . '/../resources/config_presse.yaml'; |
|
37 | - public const CONFIG_NEWSPAPER_JSON = __DIR__ . '/../resources/data_newspapers.json'; |
|
38 | - public const CONFIG_SCIENTIFIC_JSON = __DIR__ . '/../resources/data_scientific_domain.json'; |
|
39 | - public const CONFIG_SCIENTIFIC_WIKI_JSON = __DIR__ . '/../resources/data_scientific_wiki.json'; |
|
36 | + public const CONFIG_PRESSE = __DIR__.'/../resources/config_presse.yaml'; |
|
37 | + public const CONFIG_NEWSPAPER_JSON = __DIR__.'/../resources/data_newspapers.json'; |
|
38 | + public const CONFIG_SCIENTIFIC_JSON = __DIR__.'/../resources/data_scientific_domain.json'; |
|
39 | + public const CONFIG_SCIENTIFIC_WIKI_JSON = __DIR__.'/../resources/data_scientific_wiki.json'; |
|
40 | 40 | |
41 | 41 | public $skipSiteBlacklisted = true; |
42 | 42 | public $skipRobotNoIndex = true; |
@@ -160,7 +160,7 @@ discard block |
||
160 | 160 | protected function isSiteBlackListed(): bool |
161 | 161 | { |
162 | 162 | if ($this->skipSiteBlacklisted && in_array($this->registrableDomain, $this->skip_domain)) { |
163 | - $this->log->notice("Skip web site " . $this->registrableDomain); |
|
163 | + $this->log->notice("Skip web site ".$this->registrableDomain); |
|
164 | 164 | return true; |
165 | 165 | } |
166 | 166 | return false; |
@@ -178,7 +178,7 @@ discard block |
||
178 | 178 | $this->config[$domain] = is_array($this->config[$domain]) ? $this->config[$domain] : []; |
179 | 179 | |
180 | 180 | if ($this->config[$domain] === 'deactivated' || isset($this->config[$domain]['deactivated'])) { |
181 | - $this->log->info("Domain " . $domain . " disabled\n"); |
|
181 | + $this->log->info("Domain ".$domain." disabled\n"); |
|
182 | 182 | |
183 | 183 | return false; |
184 | 184 | } |
@@ -192,9 +192,9 @@ discard block |
||
192 | 192 | protected function logDebugConfigWebDomain(string $domain): void |
193 | 193 | { |
194 | 194 | if (!isset($this->config[$domain])) { |
195 | - $this->log->debug("Domain " . $domain . " non configuré"); |
|
195 | + $this->log->debug("Domain ".$domain." non configuré"); |
|
196 | 196 | } else { |
197 | - $this->log->debug("Domain " . $domain . " configuré"); |
|
197 | + $this->log->debug("Domain ".$domain." configuré"); |
|
198 | 198 | } |
199 | 199 | } |
200 | 200 | |
@@ -219,7 +219,7 @@ discard block |
||
219 | 219 | if ($pageData === [] |
220 | 220 | || (empty($pageData['JSON-LD']) && empty($pageData['meta'])) |
221 | 221 | ) { |
222 | - $this->log->notice('No metadata : ' . $url); |
|
222 | + $this->log->notice('No metadata : '.$url); |
|
223 | 223 | |
224 | 224 | return true; |
225 | 225 | } |
@@ -234,7 +234,7 @@ discard block |
||
234 | 234 | protected function emptyMapData(array $mapData, string $url): bool |
235 | 235 | { |
236 | 236 | if ($mapData === [] || empty($mapData['url']) || empty($mapData['titre'])) { |
237 | - $this->log->info('Mapping incomplet : ' . $url); |
|
237 | + $this->log->info('Mapping incomplet : '.$url); |
|
238 | 238 | |
239 | 239 | return true; |
240 | 240 | } |
@@ -291,7 +291,7 @@ discard block |
||
291 | 291 | |
292 | 292 | $template = WikiTemplateFactory::create($templateName); |
293 | 293 | $template->userSeparator = " |"; |
294 | - $this->summary->memo['count ' . $templateName] = 1 + ($this->summary->memo['count ' . $templateName] ?? 0); |
|
294 | + $this->summary->memo['count '.$templateName] = 1 + ($this->summary->memo['count '.$templateName] ?? 0); |
|
295 | 295 | |
296 | 296 | return $template; |
297 | 297 | } |
@@ -349,7 +349,7 @@ discard block |
||
349 | 349 | $templateOptimized = $optimizer->getOptiTemplate(); |
350 | 350 | |
351 | 351 | $serialized = $templateOptimized->serialize(true); |
352 | - $this->log->info('Serialized 444: ' . $serialized . "\n"); |
|
352 | + $this->log->info('Serialized 444: '.$serialized."\n"); |
|
353 | 353 | return $serialized; |
354 | 354 | } |
355 | 355 | } |
@@ -27,7 +27,7 @@ discard block |
||
27 | 27 | || stripos($robots, 'none') !== false |
28 | 28 | ) |
29 | 29 | ) { |
30 | - $this->log->notice('robots NOINDEX : ' . $url); |
|
30 | + $this->log->notice('robots NOINDEX : '.$url); |
|
31 | 31 | |
32 | 32 | return !$this->isNoIndexDomainWhitelisted($pageData['meta']['prettyDomainName']); |
33 | 33 | } |
@@ -38,7 +38,7 @@ discard block |
||
38 | 38 | protected function isNoIndexDomainWhitelisted(?string $prettyDomain): bool |
39 | 39 | { |
40 | 40 | if (in_array($prettyDomain ?? '', $this->noindexWhitelist)) { |
41 | - $this->log->notice('ROBOT_NOINDEX_WHITELIST ' . $prettyDomain); |
|
41 | + $this->log->notice('ROBOT_NOINDEX_WHITELIST '.$prettyDomain); |
|
42 | 42 | |
43 | 43 | return true; |
44 | 44 | } |
@@ -44,7 +44,7 @@ discard block |
||
44 | 44 | $this->url = $url; |
45 | 45 | $this->registrableDomain = null; |
46 | 46 | if (!ExternHttpClient::isHttpURL($url)) { |
47 | - $this->log->debug('Skip : not a valid URL : ' . $url); |
|
47 | + $this->log->debug('Skip : not a valid URL : '.$url); |
|
48 | 48 | return false; |
49 | 49 | } |
50 | 50 | |
@@ -52,7 +52,7 @@ discard block |
||
52 | 52 | return false; |
53 | 53 | } |
54 | 54 | if (!ExternHttpClient::isHttpURL($url)) { |
55 | - throw new Exception('string is not an URL ' . $url); |
|
55 | + throw new Exception('string is not an URL '.$url); |
|
56 | 56 | } |
57 | 57 | |
58 | 58 | $this->findRegistrableDomain(); |
@@ -74,7 +74,7 @@ discard block |
||
74 | 74 | try { |
75 | 75 | $this->registrableDomain = (new InternetDomainParser())->getRegistrableDomainFromURL($this->url); |
76 | 76 | } catch (Exception $e) { |
77 | - $this->log->warning('Skip : not a valid URL : ' . $this->url); |
|
77 | + $this->log->warning('Skip : not a valid URL : '.$this->url); |
|
78 | 78 | return null; |
79 | 79 | } |
80 | 80 | return $this->registrableDomain; |
@@ -91,7 +91,7 @@ discard block |
||
91 | 91 | */ |
92 | 92 | protected function hasForbiddenFilenameExtension(): bool |
93 | 93 | { |
94 | - return (bool)preg_match( |
|
94 | + return (bool) preg_match( |
|
95 | 95 | '#\.(pdf|jpg|jpeg|gif|png|xls|xlsx|xlr|xml|xlt|txt|csv|js|docx|exe|gz|zip|ini|movie|mp3|mp4|ogg|raw|rss|tar|tgz|wma)$#i', |
96 | 96 | $this->url |
97 | 97 | ); |
@@ -11,5 +11,5 @@ |
||
11 | 11 | |
12 | 12 | interface ExternHttpClientInterface |
13 | 13 | { |
14 | - public function getHTML(string $url, ?bool $normalized=false): ?string; |
|
14 | + public function getHTML(string $url, ?bool $normalized = false): ?string; |
|
15 | 15 | } |
@@ -25,7 +25,7 @@ discard block |
||
25 | 25 | public const SLEEP_AFTER_EDITION = 15; // sec |
26 | 26 | public const MINUTES_DELAY_AFTER_LAST_HUMAN_EDIT = 10; // minutes |
27 | 27 | public const CHECK_EDIT_CONFLICT = true; |
28 | - public const ARTICLE_ANALYZED_FILENAME = __DIR__ . '/resources/article_externRef_edited.txt'; |
|
28 | + public const ARTICLE_ANALYZED_FILENAME = __DIR__.'/resources/article_externRef_edited.txt'; |
|
29 | 29 | public const SKIP_ADQ = false; |
30 | 30 | public const SKIP_LASTEDIT_BY_BOT = false; |
31 | 31 | public const CITATION_NUMBER_ON_FIRE = 15; |
@@ -58,7 +58,7 @@ discard block |
||
58 | 58 | $result = $this->transformer->process($refContent, $this->summary); |
59 | 59 | } catch (Throwable $e) { |
60 | 60 | echo "** Problème détecté 234242\n"; |
61 | - $this->log->critical($e->getMessage() . " " . $e->getFile() . ":" . $e->getLine()); |
|
61 | + $this->log->critical($e->getMessage()." ".$e->getFile().":".$e->getLine()); |
|
62 | 62 | // TODO : parse $e->message -> variable process, taskName, botflag... |
63 | 63 | |
64 | 64 | return $refContent; |
@@ -112,10 +112,10 @@ discard block |
||
112 | 112 | $prefixSummary = ($this->summary->isBotFlag()) ? 'bot ' : ''; |
113 | 113 | $suffix = ''; |
114 | 114 | if (isset($this->summary->memo['count article'])) { |
115 | - $suffix .= ' ' . $this->summary->memo['count article'] . 'x {article}'; |
|
115 | + $suffix .= ' '.$this->summary->memo['count article'].'x {article}'; |
|
116 | 116 | } |
117 | 117 | if (isset($this->summary->memo['count lien web'])) { |
118 | - $suffix .= ' ' . $this->summary->memo['count lien web'] . 'x {lien web}'; |
|
118 | + $suffix .= ' '.$this->summary->memo['count lien web'].'x {lien web}'; |
|
119 | 119 | } |
120 | 120 | if (isset($this->summary->memo['presse'])) { |
121 | 121 | $suffix .= ' |