@@ -31,12 +31,12 @@ discard block |
||
| 31 | 31 | use SummaryExternTrait, RobotsRulesTrait, PublisherLogicTrait; |
| 32 | 32 | |
| 33 | 33 | public const HTTP_REQUEST_LOOP_DELAY = 10; |
| 34 | - public const SKIP_DOMAIN_FILENAME = __DIR__ . '/../resources/config_skip_domain.txt'; |
|
| 34 | + public const SKIP_DOMAIN_FILENAME = __DIR__.'/../resources/config_skip_domain.txt'; |
|
| 35 | 35 | public const REPLACE_404 = true; |
| 36 | - public const CONFIG_PRESSE = __DIR__ . '/../resources/config_presse.yaml'; |
|
| 37 | - public const CONFIG_NEWSPAPER_JSON = __DIR__ . '/../resources/data_newspapers.json'; |
|
| 38 | - public const CONFIG_SCIENTIFIC_JSON = __DIR__ . '/../resources/data_scientific_domain.json'; |
|
| 39 | - public const CONFIG_SCIENTIFIC_WIKI_JSON = __DIR__ . '/../resources/data_scientific_wiki.json'; |
|
| 36 | + public const CONFIG_PRESSE = __DIR__.'/../resources/config_presse.yaml'; |
|
| 37 | + public const CONFIG_NEWSPAPER_JSON = __DIR__.'/../resources/data_newspapers.json'; |
|
| 38 | + public const CONFIG_SCIENTIFIC_JSON = __DIR__.'/../resources/data_scientific_domain.json'; |
|
| 39 | + public const CONFIG_SCIENTIFIC_WIKI_JSON = __DIR__.'/../resources/data_scientific_wiki.json'; |
|
| 40 | 40 | |
| 41 | 41 | public $skipSiteBlacklisted = true; |
| 42 | 42 | public $skipRobotNoIndex = true; |
@@ -160,7 +160,7 @@ discard block |
||
| 160 | 160 | protected function isSiteBlackListed(): bool |
| 161 | 161 | { |
| 162 | 162 | if ($this->skipSiteBlacklisted && in_array($this->registrableDomain, $this->skip_domain)) { |
| 163 | - $this->log->notice("Skip web site " . $this->registrableDomain); |
|
| 163 | + $this->log->notice("Skip web site ".$this->registrableDomain); |
|
| 164 | 164 | return true; |
| 165 | 165 | } |
| 166 | 166 | return false; |
@@ -178,7 +178,7 @@ discard block |
||
| 178 | 178 | $this->config[$domain] = is_array($this->config[$domain]) ? $this->config[$domain] : []; |
| 179 | 179 | |
| 180 | 180 | if ($this->config[$domain] === 'deactivated' || isset($this->config[$domain]['deactivated'])) { |
| 181 | - $this->log->info("Domain " . $domain . " disabled\n"); |
|
| 181 | + $this->log->info("Domain ".$domain." disabled\n"); |
|
| 182 | 182 | |
| 183 | 183 | return false; |
| 184 | 184 | } |
@@ -192,9 +192,9 @@ discard block |
||
| 192 | 192 | protected function logDebugConfigWebDomain(string $domain): void |
| 193 | 193 | { |
| 194 | 194 | if (!isset($this->config[$domain])) { |
| 195 | - $this->log->debug("Domain " . $domain . " non configuré"); |
|
| 195 | + $this->log->debug("Domain ".$domain." non configuré"); |
|
| 196 | 196 | } else { |
| 197 | - $this->log->debug("Domain " . $domain . " configuré"); |
|
| 197 | + $this->log->debug("Domain ".$domain." configuré"); |
|
| 198 | 198 | } |
| 199 | 199 | } |
| 200 | 200 | |
@@ -219,7 +219,7 @@ discard block |
||
| 219 | 219 | if ($pageData === [] |
| 220 | 220 | || (empty($pageData['JSON-LD']) && empty($pageData['meta'])) |
| 221 | 221 | ) { |
| 222 | - $this->log->notice('No metadata : ' . $url); |
|
| 222 | + $this->log->notice('No metadata : '.$url); |
|
| 223 | 223 | |
| 224 | 224 | return true; |
| 225 | 225 | } |
@@ -234,7 +234,7 @@ discard block |
||
| 234 | 234 | protected function emptyMapData(array $mapData, string $url): bool |
| 235 | 235 | { |
| 236 | 236 | if ($mapData === [] || empty($mapData['url']) || empty($mapData['titre'])) { |
| 237 | - $this->log->info('Mapping incomplet : ' . $url); |
|
| 237 | + $this->log->info('Mapping incomplet : '.$url); |
|
| 238 | 238 | |
| 239 | 239 | return true; |
| 240 | 240 | } |
@@ -291,7 +291,7 @@ discard block |
||
| 291 | 291 | |
| 292 | 292 | $template = WikiTemplateFactory::create($templateName); |
| 293 | 293 | $template->userSeparator = " |"; |
| 294 | - $this->summary->memo['count ' . $templateName] = 1 + ($this->summary->memo['count ' . $templateName] ?? 0); |
|
| 294 | + $this->summary->memo['count '.$templateName] = 1 + ($this->summary->memo['count '.$templateName] ?? 0); |
|
| 295 | 295 | |
| 296 | 296 | return $template; |
| 297 | 297 | } |
@@ -349,7 +349,7 @@ discard block |
||
| 349 | 349 | $templateOptimized = $optimizer->getOptiTemplate(); |
| 350 | 350 | |
| 351 | 351 | $serialized = $templateOptimized->serialize(true); |
| 352 | - $this->log->info('Serialized 444: ' . $serialized . "\n"); |
|
| 352 | + $this->log->info('Serialized 444: '.$serialized."\n"); |
|
| 353 | 353 | return $serialized; |
| 354 | 354 | } |
| 355 | 355 | } |
@@ -27,7 +27,7 @@ discard block |
||
| 27 | 27 | || stripos($robots, 'none') !== false |
| 28 | 28 | ) |
| 29 | 29 | ) { |
| 30 | - $this->log->notice('robots NOINDEX : ' . $url); |
|
| 30 | + $this->log->notice('robots NOINDEX : '.$url); |
|
| 31 | 31 | |
| 32 | 32 | return !$this->isNoIndexDomainWhitelisted($pageData['meta']['prettyDomainName']); |
| 33 | 33 | } |
@@ -38,7 +38,7 @@ discard block |
||
| 38 | 38 | protected function isNoIndexDomainWhitelisted(?string $prettyDomain): bool |
| 39 | 39 | { |
| 40 | 40 | if (in_array($prettyDomain ?? '', $this->noindexWhitelist)) { |
| 41 | - $this->log->notice('ROBOT_NOINDEX_WHITELIST ' . $prettyDomain); |
|
| 41 | + $this->log->notice('ROBOT_NOINDEX_WHITELIST '.$prettyDomain); |
|
| 42 | 42 | |
| 43 | 43 | return true; |
| 44 | 44 | } |
@@ -37,7 +37,7 @@ |
||
| 37 | 37 | // Attention : pas de post-processing (sanitize title, etc.) |
| 38 | 38 | $result = $trans->process($url, $summary); |
| 39 | 39 | } catch (Exception $e) { |
| 40 | - $result = "EXCEPTION ". $e->getMessage().$e->getFile().$e->getLine(); |
|
| 40 | + $result = "EXCEPTION ".$e->getMessage().$e->getFile().$e->getLine(); |
|
| 41 | 41 | } |
| 42 | 42 | |
| 43 | 43 | echo $result."\n"; |
@@ -94,7 +94,7 @@ discard block |
||
| 94 | 94 | [ |
| 95 | 95 | 'page' => $title ?? '', |
| 96 | 96 | 'verify' => date("Y-m-d H:i:s"), |
| 97 | - 'altered' => (int)$stat, |
|
| 97 | + 'altered' => (int) $stat, |
|
| 98 | 98 | ] |
| 99 | 99 | ); |
| 100 | 100 | } |
@@ -149,7 +149,7 @@ discard block |
||
| 149 | 149 | |
| 150 | 150 | } |
| 151 | 151 | |
| 152 | - return (int)round(($count - $found) / count($data) * 100); |
|
| 152 | + return (int) round(($count - $found) / count($data) * 100); |
|
| 153 | 153 | } |
| 154 | 154 | |
| 155 | 155 | } |
@@ -51,9 +51,9 @@ |
||
| 51 | 51 | sleep(60 * 10); |
| 52 | 52 | exit; |
| 53 | 53 | } |
| 54 | - if(preg_match('#Quota exceeded#', $e->getMessage())) { |
|
| 54 | + if (preg_match('#Quota exceeded#', $e->getMessage())) { |
|
| 55 | 55 | echo "ouvrageCompleteProcess : Quota exceeded. Sleep 4h and EXIT."; |
| 56 | - sleep(60*60*4); |
|
| 56 | + sleep(60 * 60 * 4); |
|
| 57 | 57 | exit; |
| 58 | 58 | } |
| 59 | 59 | |
@@ -78,7 +78,7 @@ |
||
| 78 | 78 | //echo count($titles)." titles\n"; |
| 79 | 79 | $edited = file(__DIR__.'/../resources/article_externRef_edited.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); |
| 80 | 80 | $filtered = array_diff($titles, $edited); |
| 81 | -$list = new PageList( $filtered ); |
|
| 81 | +$list = new PageList($filtered); |
|
| 82 | 82 | echo ">".$list->count()." dans liste\n"; |
| 83 | 83 | |
| 84 | 84 | new ExternRefWorker($botConfig, $wiki, $list, null, new InternetDomainParser()); |
@@ -37,25 +37,25 @@ discard block |
||
| 37 | 37 | $data = []; |
| 38 | 38 | |
| 39 | 39 | $monitor = $db->fetchRow('select count(id) from page_ouvrages where optidate is null and edited is null and skip=0'); |
| 40 | -$data['not analyzed citation'] = (int)$monitor['count(id)']; |
|
| 40 | +$data['not analyzed citation'] = (int) $monitor['count(id)']; |
|
| 41 | 41 | |
| 42 | 42 | $monitor = $db->fetchRow('select count(id) from page_ouvrages where optidate is not null'); |
| 43 | -$data['analyzed citation'] = (int)$monitor['count(id)']; |
|
| 43 | +$data['analyzed citation'] = (int) $monitor['count(id)']; |
|
| 44 | 44 | |
| 45 | 45 | $monitor = $db->fetchRow('select count(distinct page) as n from page_ouvrages where skip=1 and edited is null'); // ? |
| 46 | -$data['skip pages'] = (int)$monitor['n']; |
|
| 46 | +$data['skip pages'] = (int) $monitor['n']; |
|
| 47 | 47 | |
| 48 | 48 | $monitor = $db->fetchRow('select count(distinct page) as n from page_ouvrages where edited is true'); |
| 49 | -$data['edited pages'] = (int)$monitor['n']; |
|
| 49 | +$data['edited pages'] = (int) $monitor['n']; |
|
| 50 | 50 | |
| 51 | 51 | $monitor = $db->fetchRow('select count(id) from page_ouvrages where optidate > SUBDATE(NOW(),1)'); |
| 52 | -$data['analyzed citation 24H'] = (int)$monitor['count(id)']; |
|
| 52 | +$data['analyzed citation 24H'] = (int) $monitor['count(id)']; |
|
| 53 | 53 | |
| 54 | 54 | $monitor = $db->fetchRow('select count(id) as n from page_ouvrages where edited > SUBDATE(NOW(),1)'); |
| 55 | -$data['edited citations 24H'] = (int)$monitor['n']; |
|
| 55 | +$data['edited citations 24H'] = (int) $monitor['n']; |
|
| 56 | 56 | |
| 57 | 57 | $monitor = $db->fetchRow('select count(distinct page) as n from page_ouvrages where edited > SUBDATE(NOW(),1)'); |
| 58 | -$data['edited pages 24H'] = (int)$monitor['n']; |
|
| 58 | +$data['edited pages 24H'] = (int) $monitor['n']; |
|
| 59 | 59 | |
| 60 | 60 | $monitor = $db->fetchRow( |
| 61 | 61 | 'SELECT count(distinct A.page) FROM page_ouvrages A |
@@ -74,7 +74,7 @@ discard block |
||
| 74 | 74 | AND A.page = B.page |
| 75 | 75 | )' |
| 76 | 76 | ); |
| 77 | -$data['waiting pages'] = (int)$monitor['count(distinct A.page)']; |
|
| 77 | +$data['waiting pages'] = (int) $monitor['count(distinct A.page)']; |
|
| 78 | 78 | |
| 79 | 79 | $data['currentdate'] = DateUtil::english2french((new DateTime())->format('j F Y \à H\:i').' (CEST)'); |
| 80 | 80 | |
@@ -40,10 +40,10 @@ |
||
| 40 | 40 | $db = new Mysql($pdo); |
| 41 | 41 | |
| 42 | 42 | $monitor = $db->fetchRow('select count(id) from page_ouvrages where optidate is not null'); |
| 43 | -$number = (int)$monitor['count(id)']; |
|
| 43 | +$number = (int) $monitor['count(id)']; |
|
| 44 | 44 | |
| 45 | 45 | $monitor = $db->fetchRow('select count(distinct page) as pages from page_ouvrages where optidate is not null and isbn<>""'); |
| 46 | -$pageNb = (int)$monitor['pages']; |
|
| 46 | +$pageNb = (int) $monitor['pages']; |
|
| 47 | 47 | |
| 48 | 48 | $newText = <<<EOF |
| 49 | 49 | <div style="background:#EBF6E9;border:2px solid grey;padding:10px;border-radius:10px;"> |
@@ -49,7 +49,7 @@ discard block |
||
| 49 | 49 | $this->url = $url; |
| 50 | 50 | $this->registrableDomain = null; |
| 51 | 51 | if (!ExternHttpClient::isHttpURL($url)) { |
| 52 | - $this->log->debug('Skip : not a valid URL : ' . $url); |
|
| 52 | + $this->log->debug('Skip : not a valid URL : '.$url); |
|
| 53 | 53 | return false; |
| 54 | 54 | } |
| 55 | 55 | |
@@ -57,7 +57,7 @@ discard block |
||
| 57 | 57 | return false; |
| 58 | 58 | } |
| 59 | 59 | if (!ExternHttpClient::isHttpURL($url)) { |
| 60 | - throw new Exception('string is not an URL ' . $url); |
|
| 60 | + throw new Exception('string is not an URL '.$url); |
|
| 61 | 61 | } |
| 62 | 62 | |
| 63 | 63 | $this->findRegistrableDomain(); |
@@ -79,7 +79,7 @@ discard block |
||
| 79 | 79 | try { |
| 80 | 80 | $this->registrableDomain = $this->internetDomainParser->getRegistrableDomainFromURL($this->url); |
| 81 | 81 | } catch (Exception $e) { |
| 82 | - $this->log->warning('Skip : not a valid URL : ' . $this->url); |
|
| 82 | + $this->log->warning('Skip : not a valid URL : '.$this->url); |
|
| 83 | 83 | return null; |
| 84 | 84 | } |
| 85 | 85 | return $this->registrableDomain; |
@@ -92,7 +92,7 @@ discard block |
||
| 92 | 92 | */ |
| 93 | 93 | protected function hasForbiddenFilenameExtension(): bool |
| 94 | 94 | { |
| 95 | - return (bool)preg_match( |
|
| 95 | + return (bool) preg_match( |
|
| 96 | 96 | '#\.(pdf|jpg|jpeg|gif|png|xls|xlsx|xlr|xml|xlt|txt|csv|js|docx|exe|gz|zip|ini|movie|mp3|mp4|ogg|raw|rss|tar|tgz|wma)$#i', |
| 97 | 97 | $this->url |
| 98 | 98 | ); |