@@ -35,13 +35,13 @@ discard block |
||
| 35 | 35 | use SummaryExternTrait, PublisherLogicTrait; |
| 36 | 36 | |
| 37 | 37 | public const HTTP_REQUEST_LOOP_DELAY = 10; |
| 38 | - public const SKIP_DOMAIN_FILENAME = __DIR__ . '/../resources/config_skip_domain.txt'; |
|
| 38 | + public const SKIP_DOMAIN_FILENAME = __DIR__.'/../resources/config_skip_domain.txt'; |
|
| 39 | 39 | public const REPLACE_404 = true; |
| 40 | 40 | public const REPLACE_410 = true; |
| 41 | - public const CONFIG_PRESSE = __DIR__ . '/../resources/config_presse.yaml'; |
|
| 42 | - public const CONFIG_NEWSPAPER_JSON = __DIR__ . '/../resources/data_newspapers.json'; |
|
| 43 | - public const CONFIG_SCIENTIFIC_JSON = __DIR__ . '/../resources/data_scientific_domain.json'; |
|
| 44 | - public const CONFIG_SCIENTIFIC_WIKI_JSON = __DIR__ . '/../resources/data_scientific_wiki.json'; |
|
| 41 | + public const CONFIG_PRESSE = __DIR__.'/../resources/config_presse.yaml'; |
|
| 42 | + public const CONFIG_NEWSPAPER_JSON = __DIR__.'/../resources/data_newspapers.json'; |
|
| 43 | + public const CONFIG_SCIENTIFIC_JSON = __DIR__.'/../resources/data_scientific_domain.json'; |
|
| 44 | + public const CONFIG_SCIENTIFIC_WIKI_JSON = __DIR__.'/../resources/data_scientific_wiki.json'; |
|
| 45 | 45 | |
| 46 | 46 | public bool $skipSiteBlacklisted = true; |
| 47 | 47 | public bool $skipRobotNoIndex = true; |
@@ -93,12 +93,12 @@ discard block |
||
| 93 | 93 | } |
| 94 | 94 | $this->registrableDomain = $this->urlChecker->getRegistrableDomain($url); // hack |
| 95 | 95 | if ($this->isSiteBlackListed()) { |
| 96 | - $this->log->debug('Site blacklisted : ' . $this->registrableDomain); |
|
| 96 | + $this->log->debug('Site blacklisted : '.$this->registrableDomain); |
|
| 97 | 97 | return $url; |
| 98 | 98 | } |
| 99 | 99 | |
| 100 | 100 | if (!$this->validateConfigWebDomain($this->registrableDomain)) { |
| 101 | - $this->log->debug('Domain not validate by config : ' . $this->registrableDomain); |
|
| 101 | + $this->log->debug('Domain not validate by config : '.$this->registrableDomain); |
|
| 102 | 102 | return $url; |
| 103 | 103 | } |
| 104 | 104 | |
@@ -150,7 +150,7 @@ discard block |
||
| 150 | 150 | protected function isSiteBlackListed(): bool |
| 151 | 151 | { |
| 152 | 152 | if ($this->skipSiteBlacklisted && in_array($this->registrableDomain, $this->skip_domain)) { |
| 153 | - $this->log->notice("Skip web site " . $this->registrableDomain); |
|
| 153 | + $this->log->notice("Skip web site ".$this->registrableDomain); |
|
| 154 | 154 | return true; |
| 155 | 155 | } |
| 156 | 156 | return false; |
@@ -168,7 +168,7 @@ discard block |
||
| 168 | 168 | $this->config[$domain] = is_array($this->config[$domain]) ? $this->config[$domain] : []; |
| 169 | 169 | |
| 170 | 170 | if ($this->config[$domain] === 'deactivated' || isset($this->config[$domain]['deactivated'])) { |
| 171 | - $this->log->info("Domain " . $domain . " disabled\n"); |
|
| 171 | + $this->log->info("Domain ".$domain." disabled\n"); |
|
| 172 | 172 | |
| 173 | 173 | return false; |
| 174 | 174 | } |
@@ -179,9 +179,9 @@ discard block |
||
| 179 | 179 | protected function logDebugConfigWebDomain(string $domain): void |
| 180 | 180 | { |
| 181 | 181 | if (!isset($this->config[$domain])) { |
| 182 | - $this->log->debug("Domain " . $domain . " non configuré"); |
|
| 182 | + $this->log->debug("Domain ".$domain." non configuré"); |
|
| 183 | 183 | } else { |
| 184 | - $this->log->debug("Domain " . $domain . " configuré"); |
|
| 184 | + $this->log->debug("Domain ".$domain." configuré"); |
|
| 185 | 185 | } |
| 186 | 186 | } |
| 187 | 187 | |
@@ -206,7 +206,7 @@ discard block |
||
| 206 | 206 | if ($pageData === [] |
| 207 | 207 | || (empty($pageData['JSON-LD']) && empty($pageData['meta'])) |
| 208 | 208 | ) { |
| 209 | - $this->log->notice('No metadata : ' . $url); |
|
| 209 | + $this->log->notice('No metadata : '.$url); |
|
| 210 | 210 | |
| 211 | 211 | return true; |
| 212 | 212 | } |
@@ -221,7 +221,7 @@ discard block |
||
| 221 | 221 | protected function emptyMapData(array $mapData, string $url): bool |
| 222 | 222 | { |
| 223 | 223 | if ($mapData === [] || empty($mapData['url']) || empty($mapData['titre'])) { |
| 224 | - $this->log->info('Mapping incomplet : ' . $url); |
|
| 224 | + $this->log->info('Mapping incomplet : '.$url); |
|
| 225 | 225 | |
| 226 | 226 | return true; |
| 227 | 227 | } |
@@ -274,7 +274,7 @@ discard block |
||
| 274 | 274 | |
| 275 | 275 | $template = WikiTemplateFactory::create($templateName); |
| 276 | 276 | $template->userSeparator = " |"; |
| 277 | - $this->summary->memo['count ' . $templateName] = 1 + ($this->summary->memo['count ' . $templateName] ?? 0); |
|
| 277 | + $this->summary->memo['count '.$templateName] = 1 + ($this->summary->memo['count '.$templateName] ?? 0); |
|
| 278 | 278 | |
| 279 | 279 | return $template; |
| 280 | 280 | } |
@@ -330,7 +330,7 @@ discard block |
||
| 330 | 330 | $templateOptimized = $optimizer->getOptiTemplate(); |
| 331 | 331 | |
| 332 | 332 | $serialized = $templateOptimized->serialize(true); |
| 333 | - $this->log->info('Serialized 444: ' . $serialized . "\n"); |
|
| 333 | + $this->log->info('Serialized 444: '.$serialized."\n"); |
|
| 334 | 334 | return $serialized; |
| 335 | 335 | } |
| 336 | 336 | |
@@ -56,13 +56,13 @@ discard block |
||
| 56 | 56 | // idn_to_ascii('teßt.com',IDNA_NONTRANSITIONAL_TO_ASCII,INTL_IDNA_VARIANT_UTS46) |
| 57 | 57 | // checkdnsrr($string, "A") // check DNS record |
| 58 | 58 | if (!self::isHttpURL($url)) { |
| 59 | - throw new DomainException('URL not compatible : ' . $url); |
|
| 59 | + throw new DomainException('URL not compatible : '.$url); |
|
| 60 | 60 | } |
| 61 | 61 | $response = $this->client->get($url); |
| 62 | 62 | |
| 63 | 63 | if (200 !== $response->getStatusCode()) { |
| 64 | - echo 'HTTP error ' . $response->getStatusCode(); |
|
| 65 | - $this->log->error('HTTP error ' . $response->getStatusCode() . ' ' . $response->getReasonPhrase()); |
|
| 64 | + echo 'HTTP error '.$response->getStatusCode(); |
|
| 65 | + $this->log->error('HTTP error '.$response->getStatusCode().' '.$response->getReasonPhrase()); |
|
| 66 | 66 | |
| 67 | 67 | return null; |
| 68 | 68 | } |
@@ -85,7 +85,7 @@ discard block |
||
| 85 | 85 | */ |
| 86 | 86 | public static function isHttpURL(string $url): bool |
| 87 | 87 | { |
| 88 | - return (bool)preg_match('#^https?://[^ ]+$#i', $url); |
|
| 88 | + return (bool) preg_match('#^https?://[^ ]+$#i', $url); |
|
| 89 | 89 | } |
| 90 | 90 | |
| 91 | 91 | /** |
@@ -107,20 +107,20 @@ discard block |
||
| 107 | 107 | $charset = $this->extractCharset($html) ?? 'WINDOWS-1252'; |
| 108 | 108 | |
| 109 | 109 | if (empty($charset)) { |
| 110 | - throw new DomainException('normalized html error and no charset found : ' . $url); |
|
| 110 | + throw new DomainException('normalized html error and no charset found : '.$url); |
|
| 111 | 111 | } |
| 112 | 112 | try { |
| 113 | 113 | $html2 = iconv($charset, 'UTF-8//TRANSLIT', $html); |
| 114 | 114 | // PHP Notice: iconv(): Detected an illegal character in input string on line 107 |
| 115 | 115 | if (false === $html2) { |
| 116 | - throw new DomainException("error iconv : $charset to UTF-8 on " . $url); |
|
| 116 | + throw new DomainException("error iconv : $charset to UTF-8 on ".$url); |
|
| 117 | 117 | } |
| 118 | 118 | $html2 = Normalizer::normalize($html2); |
| 119 | 119 | if (!is_string($html2)) { |
| 120 | - throw new DomainException("error normalizer : $charset to UTF-8 on " . $url); |
|
| 120 | + throw new DomainException("error normalizer : $charset to UTF-8 on ".$url); |
|
| 121 | 121 | } |
| 122 | 122 | } catch (Throwable $e) { |
| 123 | - throw new DomainException("error converting : $charset to UTF-8 on " . $url, $e->getCode(), $e); |
|
| 123 | + throw new DomainException("error converting : $charset to UTF-8 on ".$url, $e->getCode(), $e); |
|
| 124 | 124 | } |
| 125 | 125 | |
| 126 | 126 | return $html2; |
@@ -52,7 +52,7 @@ discard block |
||
| 52 | 52 | $url, |
| 53 | 53 | (string) $archiveData['longformurl'], |
| 54 | 54 | $archiveData['timestamp'] |
| 55 | - ? DateTimeImmutable::createFromFormat('U', (string)$archiveData['timestamp']) |
|
| 55 | + ? DateTimeImmutable::createFromFormat('U', (string) $archiveData['timestamp']) |
|
| 56 | 56 | : null |
| 57 | 57 | ); // todo factory ? |
| 58 | 58 | } |
@@ -61,7 +61,7 @@ discard block |
||
| 61 | 61 | { |
| 62 | 62 | $response = $this->externHttpClient->getClient()->request( |
| 63 | 63 | 'GET', |
| 64 | - self::API_URL . urlencode($url) |
|
| 64 | + self::API_URL.urlencode($url) |
|
| 65 | 65 | ); |
| 66 | 66 | |
| 67 | 67 | if (!$response instanceof ResponseInterface || $response->getStatusCode() !== 200) { |
@@ -75,8 +75,8 @@ discard block |
||
| 75 | 75 | $data = json_decode($jsonString, true) ?? []; |
| 76 | 76 | |
| 77 | 77 | // check wikiwix archive status |
| 78 | - if (empty($data['status']) || (int)$data['status'] !== 200) { |
|
| 79 | - $this->log->debug('WikiwixAdapter incorrect response: ' . $jsonString); |
|
| 78 | + if (empty($data['status']) || (int) $data['status'] !== 200) { |
|
| 79 | + $this->log->debug('WikiwixAdapter incorrect response: '.$jsonString); |
|
| 80 | 80 | |
| 81 | 81 | return []; |
| 82 | 82 | } |
@@ -43,7 +43,7 @@ |
||
| 43 | 43 | $webarchive = $this->archiver->searchWebarchive($url); |
| 44 | 44 | if ($webarchive instanceof WebarchiveDTO) { |
| 45 | 45 | $this->log->notice(' |
@@ -32,7 +32,7 @@ |
||
| 32 | 32 | $this->summary->memo['sites'][] = $this->externalPage->getPrettyDomainName(); // ??? |
| 33 | 33 | } |
| 34 | 34 | if (isset($mapData['accès url'])) { |
| 35 | - $this->log->debug('accès |
|
@@ -26,7 +26,7 @@ |
||
| 26 | 26 | { |
| 27 | 27 | protected const USER_RC_LIMIT = 100; |
| 28 | 28 | protected const TASK_NAME = ' |
@@ -29,7 +29,7 @@ discard block |
||
| 29 | 29 | public const SLEEP_AFTER_EDITION = 15; // sec |
| 30 | 30 | public const MINUTES_DELAY_AFTER_LAST_HUMAN_EDIT = 10; // minutes |
| 31 | 31 | public const CHECK_EDIT_CONFLICT = true; |
| 32 | - public const ARTICLE_ANALYZED_FILENAME = __DIR__ . '/../resources/article_externRef_edited.txt'; |
|
| 32 | + public const ARTICLE_ANALYZED_FILENAME = __DIR__.'/../resources/article_externRef_edited.txt'; |
|
| 33 | 33 | public const SKIP_ADQ = false; |
| 34 | 34 | public const SKIP_LASTEDIT_BY_BOT = false; |
| 35 | 35 | public const CITATION_NUMBER_ON_FIRE = 15; |
@@ -61,7 +61,7 @@ discard block |
||
| 61 | 61 | try { |
| 62 | 62 | $result = $this->transformer->process($refContent, $this->summary); |
| 63 | 63 | } catch (Throwable $e) { |
| 64 | - $this->log->critical('Error patate34 '. $e->getMessage() . " " . $e->getFile() . ":" . $e->getLine()); |
|
| 64 | + $this->log->critical('Error patate34 '.$e->getMessage()." ".$e->getFile().":".$e->getLine()); |
|
| 65 | 65 | // TODO : parse $e->message -> variable process, taskName, botflag... |
| 66 | 66 | |
| 67 | 67 | return $refContent; |
@@ -132,10 +132,10 @@ discard block |
||
| 132 | 132 | $prefixSummary = ($this->summary->isBotFlag()) ? 'bot ' : ''; |
| 133 | 133 | $suffix = ''; |
| 134 | 134 | if (isset($this->summary->memo['count article'])) { |
| 135 | - $suffix .= ' ' . $this->summary->memo['count article'] . 'x {article}'; |
|
| 135 | + $suffix .= ' '.$this->summary->memo['count article'].'x {article}'; |
|
| 136 | 136 | } |
| 137 | 137 | if (isset($this->summary->memo['count lien web'])) { |
| 138 | - $suffix .= ' ' . $this->summary->memo['count lien web'] . 'x {lien web}'; |
|
| 138 | + $suffix .= ' '.$this->summary->memo['count lien web'].'x {lien web}'; |
|
| 139 | 139 | } |
| 140 | 140 | if (isset($this->summary->memo['presse'])) { |
| 141 | 141 | $suffix .= ' |
@@ -31,7 +31,7 @@ discard block |
||
| 31 | 31 | class WikiBotConfig |
| 32 | 32 | { |
| 33 | 33 | public const VERSION = '2.1'; |
| 34 | - public const WATCHPAGE_FILENAME = __DIR__ . '/resources/watch_pages.json'; |
|
| 34 | + public const WATCHPAGE_FILENAME = __DIR__.'/resources/watch_pages.json'; |
|
| 35 | 35 | public const EXIT_ON_CHECK_WATCHPAGE = false; |
| 36 | 36 | // do not stop if they play with {stop} on bot talk page |
| 37 | 37 | public const BLACKLIST_EDITOR = ['OrlodrimBot']; |
@@ -97,9 +97,8 @@ discard block |
||
| 97 | 97 | { |
| 98 | 98 | $text = WikiTextUtil::removeHTMLcomments($text); |
| 99 | 99 | $botName = $botName ?: self::getBotName(); |
| 100 | - $denyReg = (empty($botName)) ? '' : |
|
| 101 | - '|\{\{bots ?\| ?(optout|deny)\=[^\}]*' . preg_quote($botName, '#') . '[^\}]*\}\}'; |
|
| 102 | - return preg_match('#({{nobots}}|{{bots ?\| ?(optout|deny) ?= ?all ?}}' . $denyReg . ')#i', $text) > 0; |
|
| 100 | + $denyReg = (empty($botName)) ? '' : '|\{\{bots ?\| ?(optout|deny)\=[^\}]*'.preg_quote($botName, '#').'[^\}]*\}\}'; |
|
| 101 | + return preg_match('#({{nobots}}|{{bots ?\| ?(optout|deny) ?= ?all ?}}'.$denyReg.')#i', $text) > 0; |
|
| 103 | 102 | } |
| 104 | 103 | |
| 105 | 104 | protected static function getBotOwner() |
@@ -155,7 +154,7 @@ discard block |
||
| 155 | 154 | |
| 156 | 155 | protected function getBotTalkPageTitle(): string |
| 157 | 156 | { |
| 158 | - return self::TALK_PAGE_PREFIX . $this::getBotName(); |
|
| 157 | + return self::TALK_PAGE_PREFIX.$this::getBotName(); |
|
| 159 | 158 | } |
| 160 | 159 | |
| 161 | 160 | /** |
@@ -266,8 +265,8 @@ discard block |
||
| 266 | 265 | */ |
| 267 | 266 | public function minutesSinceLastEdit(string $title): int |
| 268 | 267 | { |
| 269 | - $time = $this->getTimestamp($title); // 2011-09-02T16:31:13Z |
|
| 268 | + $time = $this->getTimestamp($title); // 2011-09-02T16:31:13Z |
|
| 270 | 269 | |
| 271 | - return (int)round((time() - strtotime($time)) / 60); |
|
| 270 | + return (int) round((time() - strtotime($time)) / 60); |
|
| 272 | 271 | } |
| 273 | 272 | } |
@@ -78,7 +78,7 @@ |
||
| 78 | 78 | //echo count($titles)." titles\n"; |
| 79 | 79 | $edited = file(__DIR__.'/../resources/article_externRef_edited.txt', FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES); |
| 80 | 80 | $filtered = array_diff($titles, $edited); |
| 81 | -$list = new PageList( $filtered ); |
|
| 81 | +$list = new PageList($filtered); |
|
| 82 | 82 | echo ">".$list->count()." dans liste\n"; |
| 83 | 83 | |
| 84 | 84 | new ExternRefWorker($botConfig, $wiki, $list, null, new InternetDomainParser()); |