Passed
Push — dev ( 8e8e3b...6bb8f6 )
by Dispositif
03:16 queued 15s
created

ExternRefTransformer::process()   B

Complexity

Conditions 11
Paths 21

Size

Total Lines 54
Code Lines 31

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 11
eloc 31
nc 21
nop 1
dl 0
loc 54
rs 7.3166
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Application;
11
12
use App\Domain\ExternDomains;
13
use App\Domain\ExternPageFactory;
14
use App\Domain\Models\Wiki\AbstractWikiTemplate;
15
use App\Domain\Models\Wiki\ArticleTemplate;
16
use App\Domain\Models\Wiki\LienWebTemplate;
17
use App\Domain\Models\Wiki\OuvrageTemplate;
18
use App\Domain\Publisher\ExternMapper;
19
use App\Domain\Utils\WikiTextUtil;
20
use App\Domain\WikiTemplateFactory;
21
use App\Infrastructure\Logger;
22
use Normalizer;
23
use Psr\Log\LoggerInterface;
24
use Symfony\Component\Yaml\Yaml;
25
26
/**
27
 * todo move Domain
28
 * Class ExternRefTransformer
29
 *
30
 * @package App\Application
31
 */
32
class ExternRefTransformer implements TransformerInterface
33
{
34
35
    const SKIPPED_FILE_LOG  = __DIR__.'/resources/external_skipped.log';
36
    const LOG_REQUEST_ERROR = __DIR__.'/resources/external_request_error.log';
37
    public $skipUnauthorised = true;
38
    /**
39
     * @var array
40
     */
41
    public $summaryLog = [];
42
    /**
43
     * @var LoggerInterface
44
     */
45
    protected $log;
46
    private $config;
47
    /**
48
     * @var string|string[]
49
     */
50
    private $domain;
51
    /**
52
     * @var string
53
     */
54
    private $url;
55
    /**
56
     * @var ExternMapper
57
     */
58
    private $mapper;
59
    /**
60
     * @var array
61
     */
62
    private $data = [];
63
    /**
64
     * @var array
65
     */
66
    private $skip_domain = [];
67
    /**
68
     * @var \App\Domain\ExternPage
69
     */
70
    private $externalPage;
71
72
    /**
73
     * ExternalRefTransformer constructor.
74
     *
75
     * @param LoggerInterface $log
76
     */
77
    public function __construct(LoggerInterface $log)
78
    {
79
        $this->log = $log;
80
81
        // todo REFAC DataObject[]
82
        $this->config = Yaml::parseFile(__DIR__.'/resources/config_presse.yaml');
83
        $skipFromFile = file(__DIR__.'/resources/config_skip_domain.txt');
84
        $this->skip_domain = ($skipFromFile) ? $skipFromFile : [];
85
86
        $this->data['newspaper'] = json_decode(file_get_contents(__DIR__.'/resources/data_newspapers.json'), true);
87
        $this->data['scientific domain'] = json_decode(
88
            file_get_contents(__DIR__.'/resources/data_scientific_domain.json'),
89
            true
90
        );
91
        $this->data['scientific wiki'] = json_decode(
92
            file_get_contents(__DIR__.'/resources/data_scientific_wiki.json'),
93
            true
94
        );
95
96
        $this->mapper = new ExternMapper(new Logger());
97
    }
98
99
    /**
100
     * @param string $string
101
     *
102
     * @return string
103
     * @throws \Exception
104
     */
105
    public function process(string $string): string
106
    {
107
        if (!$this->isURLAutorized($string)) {
108
            return $string;
109
        }
110
        try {
111
            sleep(5);
112
            $this->externalPage = ExternPageFactory::fromURL($string, $this->log);
113
            $pageData = $this->externalPage->getData();
114
            $this->log->debug('metaData', $this->externalPage->getData());
115
        } catch (\Exception $e) {
116
            // ne pas générer de {lien brisé}, car peut-être 404 temporaire
117
            $this->log->notice('erreur sur extractWebData '.$e->getMessage());
118
            file_put_contents(self::LOG_REQUEST_ERROR, $this->domain);
119
        }
120
121
        if (empty($pageData)
122
            || (empty($pageData['JSON-LD']) && empty($pageData['meta']))
123
        ) {
124
            // site avec HTML pourri
125
            return $string;
126
        }
127
128
        if (isset($pageData['robots']) && strpos($pageData['robots'], 'noindex') !== false) {
129
            $this->log->notice('SKIP robots: noindex');
130
131
            return $string;
132
        }
133
134
        $mapData = $this->mapper->process($pageData);
135
136
        // check dataValide
137
        if (empty($mapData) || empty($mapData['url']) || empty($mapData['titre'])) {
138
            $this->skip_domain[] = $this->domain;
139
            $this->log->info('Mapping incomplet');
140
            @file_put_contents(self::SKIPPED_FILE_LOG, $this->domain.",".$this->url."\n", FILE_APPEND);
1 ignored issue
show
Security Best Practice introduced by
It seems like you do not handle an error condition for file_put_contents(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

140
            /** @scrutinizer ignore-unhandled */ @file_put_contents(self::SKIPPED_FILE_LOG, $this->domain.",".$this->url."\n", FILE_APPEND);

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
141
142
            return $string;
143
        }
144
145
        $this->tagAndLog($mapData);
146
        $this->addSummaryLog($mapData);
147
148
        $template = $this->chooseTemplateByData($mapData);
149
150
        $mapData = $this->replaceSitenameByConfig($mapData, $template);
151
        $mapData = $this->replaceURLbyOriginal($mapData);
152
153
        $template->hydrate($mapData, true);
154
155
        $serialized = $template->serialize(true);
156
        $this->log->info($serialized."\n");
157
158
        return Normalizer::normalize($serialized);
159
    }
160
161
    /**
162
     * @param string $string
163
     *
164
     * @return bool
165
     * @throws \Exception
166
     */
167
    protected function isURLAutorized(string $string): bool
168
    {
169
        if (!preg_match('#^http?s://[^ ]+$#i', $string)) {
170
            return false;
171
        }
172
173
        $this->url = $string;
174
        $this->domain = ExternDomains::extractSubDomain($this->url);
175
176
        if (in_array($this->domain, $this->skip_domain)) {
177
            return false;
178
        }
179
180
        if (!isset($this->config[$this->domain])) {
181
            $this->log->info("Domain ".$this->domain." non configuré\n");
182
            if ($this->skipUnauthorised) {
183
                return false;
184
            }
185
        } else {
186
            echo "> Domaine ".Color::LIGHT_GREEN.$this->domain.Color::NORMAL." configuré\n";
187
        }
188
189
        $this->config[$this->domain] = $this->config[$this->domain] ?? [];
190
        $this->config[$this->domain] = is_array($this->config[$this->domain]) ? $this->config[$this->domain] : [];
191
192
        if ($this->config[$this->domain] === 'desactived' || isset($this->config[$this->domain]['desactived'])) {
193
            $this->log->info("Domain ".$this->domain." desactivé\n");
194
195
            return false;
196
        }
197
198
        return true;
199
    }
200
201
    private function tagAndLog(array $mapData)
202
    {
203
        $this->log->debug('mapData', $mapData);
204
205
        if (isset($mapData['DATA-ARTICLE']) && $mapData['DATA-ARTICLE']) {
206
            $this->log->notice("Article OK");
207
        }
208
        if (isset($this->data['newspaper'][$this->domain])) {
209
            $this->log->notice('PRESSE');
210
        }
211
        if ($this->isScientificDomain()) {
212
            $this->log->notice('SCIENCE');
213
        }
214
    }
215
216
    private function isScientificDomain(): bool
217
    {
218
        if (isset($this->data['scientific domain'][$this->domain])) {
219
            return true;
220
        }
221
        if (strpos('.revues.org', $this->domain) > 0) {
222
            return true;
223
        }
224
225
        return false;
226
    }
227
228
    private function addSummaryLog(array $mapData)
229
    {
230
        $this->summaryLog[] = $mapData['site'] ?? $mapData['périodique'] ?? '?';
231
    }
232
233
    /**
234
     * todo refac lisible
235
     */
236
    private function chooseTemplateByData(array $mapData): AbstractWikiTemplate
237
    {
238
        // Logique : choix template
239
        $this->config[$this->domain]['template'] = $this->config[$this->domain]['template'] ?? [];
240
        $mapData['DATA-ARTICLE'] = $mapData['DATA-ARTICLE'] ?? false;
241
242
        if ($this->config[$this->domain]['template'] === 'article'
243
            || ($this->config[$this->domain]['template'] === 'auto' && $mapData['DATA-ARTICLE'])
244
            || ($mapData['DATA-ARTICLE'] && !empty($this->data['newspaper'][$this->domain]))
245
            || $this->isScientificDomain()
246
        ) {
247
            $templateName = 'article';
248
        }
249
        if (!isset($templateName) || $this->config[$this->domain]['template'] === 'lien web') {
250
            $templateName = 'lien web';
251
        }
252
        $template = WikiTemplateFactory::create($templateName);
253
        $template->userSeparator = " |";
254
255
        return $template;
256
    }
257
258
    /**
259
     * Logique : remplacement titre périodique ou nom du site
260
     *
261
     * @param array $mapData
262
     * @param       $template
263
     *
264
     * @return array
265
     */
266
    private function replaceSitenameByConfig(array $mapData, $template): array
267
    {
268
        // from wikidata URL of newspapers
269
        if (!empty($this->data['newspaper'][$this->domain])) {
270
            $frwiki = $this->data['newspaper'][$this->domain]['frwiki'];
271
            $label = $this->data['newspaper'][$this->domain]['fr'];
272
            if (isset($mapData['site']) || $template instanceof LienWebTemplate) {
273
                $mapData['site'] = WikiTextUtil::wikilink($label, $frwiki);
274
            }
275
            if (isset($mapData['périodique']) || $template instanceof ArticleTemplate) {
276
                $mapData['périodique'] = WikiTextUtil::wikilink($label, $frwiki);
277
            }
278
        }
279
280
        // from wikidata of scientific journals
281
        if (isset($mapData['périodique']) && isset($this->data['scientific wiki'][$mapData['périodique']])) {
282
            $mapData['périodique'] = WikiTextUtil::wikilink(
283
                $mapData['périodique'],
284
                $this->data['scientific wiki'][$mapData['périodique']]
285
            );
286
        }
287
288
        // from YAML config
289
        if (!empty($this->config[$this->domain]['site']) && $template instanceof LienWebTemplate) {
290
            $mapData['site'] = $this->config[$this->domain]['site'];
291
        }
292
        if (!empty($this->config[$this->domain]['périodique'])
293
            && (!empty($mapData['périodique'])
294
                || $template instanceof OuvrageTemplate)
295
        ) {
296
            $mapData['périodique'] = $this->config[$this->domain]['périodique'];
297
        }
298
299
        // from logic
300
        if (empty($mapData['site']) && $template instanceof LienWebTemplate) {
301
            $mapData['site'] = $this->externalPage->getPrettyDomainName();
302
        }
303
304
        return $mapData;
305
    }
306
307
    private function replaceURLbyOriginal(array $mapData):array
308
    {
309
        $mapData['url'] = $this->url;
310
        return $mapData;
311
    }
312
313
}
314