Passed
Push — dev ( b8ced8...f7ea32 )
by Dispositif
07:14
created

CompleteProcess::skipGoogle()   A

Complexity

Conditions 5
Paths 2

Size

Total Lines 11
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 5
eloc 6
nc 2
nop 1
dl 0
loc 11
rs 9.6111
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Application;
11
12
use App\Domain\Models\Wiki\OuvrageTemplate;
13
use App\Domain\OuvrageComplete;
14
use App\Domain\OuvrageFactory;
15
use App\Domain\OuvrageOptimize;
16
use App\Domain\Publisher\Wikidata2Ouvrage;
17
use App\Domain\Utils\TemplateParser;
18
use App\Infrastructure\WikidataAdapter;
19
use GuzzleHttp\Client;
20
use Normalizer;
21
use Throwable;
22
23
/**
24
 * Class CompleteProcess
25
 */
26
class CompleteProcess
27
{
28
    /**
29
     * Exclusion requête BnF/Google/etc
30
     * Format EAN ou ISBN10 sans tiret.
31
     */
32
    const ISBN_EAN_SKIP
33
        = [
34
            '9782918758440', // Profils de lignes du réseau ferré français vol.2
35
            '9782918758341', // Profils de lignes du réseau ferré français vol.1
36
        ];
37
    /**
38
     * @var bool
39
     */
40
    public $verbose = false;
41
42
    /**
43
     * @var QueueInterface
44
     */
45
    private $queueAdapter;
46
    /**
47
     * @var string
48
     */
49
    private $raw = '';
50
    private $page; // article title
51
52
    private $log = [];
53
    private $notCosmetic = false;
54
    private $major = false;
55
    /**
56
     * @var OuvrageTemplate
57
     */
58
    private $ouvrage;
59
60
    public function __construct(QueueInterface $queueAdapter, ?bool $verbose = false)
61
    {
62
        $this->queueAdapter = $queueAdapter;
63
        $this->verbose = (bool)$verbose;
64
    }
65
66
    public function run(?int $limit = 10000)
67
    {
68
        $memory = new Memory();
69
        while ($limit > 0) {
70
            $limit--;
71
            sleep(1);
72
            $row = $this->getNewRow2Complete();
73
            $this->raw = $row['raw'];
74
            $this->page = $row['page'];
75
76
            echo sprintf(
77
                "-------------------------------\n%s [%s]\n%s\n%s\n",
78
                date("Y-m-d H:i:s"),
79
                WikiBotConfig::getGitVersion() ?? '',
80
                $this->page,
81
                $this->raw
82
            );
83
            if ($this->verbose) {
84
                $memory->echoMemory(true);
85
            }
86
87
            // initialise variables
88
            $this->log = [];
89
            $this->ouvrage = null;
90
            $this->notCosmetic = false;
91
            $this->major = false;
92
93
94
            try {
95
                $parse = TemplateParser::parseAllTemplateByName('ouvrage', $this->raw);
96
                $origin = $parse['ouvrage'][0]['model'] ?? null;
97
            } catch (Throwable $e) {
98
                echo sprintf("*** ERREUR impossible de transformer en modèle %s \n", $this->raw);
99
                continue;
100
            }
101
102
            if (!$origin instanceof OuvrageTemplate) {
103
                echo sprintf("*** ERREUR impossible de transformer en modèle %s \n", $this->raw);
104
                continue;
105
            }
106
107
            // Final optimizing (with online predictions)
108
            $optimizer = new OuvrageOptimize($origin, $this->page);
109
            $optimizer->doTasks();
110
            $this->ouvrage = $optimizer->getOuvrage();
111
            $this->log = array_merge($this->log, $optimizer->getLog());
112
            $this->notCosmetic = ($optimizer->notCosmetic || $this->notCosmetic);
113
114
            /**
115
             * RECHERCHE ONLINE
116
             */
117
            $isbn = $origin->getParam('isbn') ?? null; // avant mise en forme EAN>ISBN
118
            $isbn10 = $origin->getParam('isbn2') ?? $origin->getParam('isbn10') ?? null;
119
            if (!empty($isbn)
120
                && !$origin->hasParamValue('isbn invalide')
121
                && !$origin->hasParamValue('isbn erroné')
122
            ) {
123
                $this->onlineIsbnSearch($isbn, $isbn10);
124
            }
125
126
            $this->sendCompleted();
127
            unset($optimizer);
128
            unset($parse);
129
            unset($origin);
130
        } // END WHILE
131
132
        return true;
133
    }
134
135
    /**
136
     * Get array (title+raw strings) to complete from AMQP queue, SQL Select or file reading.
137
     *
138
     * @return string|null
139
     * @throws \Exception
140
     */
141
    private function getNewRow2Complete(): ?array
142
    {
143
        $row = $this->queueAdapter->getNewRaw();
144
        if (empty($row) || empty($row['raw'])) {
145
            echo "STOP: no more queue to process \n";
146
            throw new \Exception('no more queue to process');
147
        }
148
149
        return $row;
1 ignored issue
show
Bug Best Practice introduced by
The expression return $row returns the type array which is incompatible with the documented return type null|string.
Loading history...
150
    }
151
152
    /**
153
     * @param string      $isbn
154
     * @param string|null $isbn10
155
     *
156
     * @return bool
157
     */
158
    private function isIsbnSkipped(string $isbn, ?string $isbn10 = null): bool
159
    {
160
        if (in_array(str_replace('-', '', $isbn), self::ISBN_EAN_SKIP)
161
            || ($isbn10 !== null
162
                && in_array(str_replace('-', '', $isbn10), self::ISBN_EAN_SKIP))
163
        ) {
164
            return true;
165
        }
166
167
        return false;
168
    }
169
170
    private function onlineIsbnSearch(string $isbn, ?string $isbn10 = null)
171
    {
172
        if ($this->isIsbnSkipped($isbn, $isbn10)) {
173
            echo "*** SKIP THAT ISBN ***\n";
174
175
            // Vérifier logique return
176
            return;
177
        }
178
179
        online:
180
        if ($this->verbose) {
181
            echo "sleep 10...\n";
182
        }
183
        sleep(10);
184
185
        try {
186
            if ($this->verbose) {
187
                dump('BIBLIO NAT FRANCE...');
188
            }
189
            // BnF sait pas trouver un vieux livre (10) d'après ISBN-13... FACEPALM !
190
            if ($isbn10) {
191
                $bnfOuvrage = OuvrageFactory::BnfFromIsbn($isbn10);
192
                sleep(2);
193
            }
194
            if (!$isbn10 || empty($bnfOuvrage) || empty($bnfOuvrage->getParam('titre'))) {
195
                $bnfOuvrage = OuvrageFactory::BnfFromIsbn($isbn);
196
            }
197
            if (isset($bnfOuvrage) and $bnfOuvrage instanceof OuvrageTemplate) {
198
                $this->completeOuvrage($bnfOuvrage);
199
200
                // Wikidata requests from $infos (ISBN/ISNI)
201
                if (!empty($bnfOuvrage->getInfos())) {
202
                    if ($this->verbose) {
203
                        dump('WIKIDATA...');
204
                    }
205
                    // TODO move to factory
206
                    $wikidataAdapter = new WikidataAdapter(
207
                        new Client(['timeout' => 5, 'headers' => ['User-Agent' => getenv('USER_AGENT')]])
208
                    );
209
                    $wdComplete = new Wikidata2Ouvrage($wikidataAdapter, clone $bnfOuvrage, $this->page);
210
                    $this->completeOuvrage($wdComplete->getOuvrage());
211
                }
212
            }
213
        } catch (Throwable $e) {
214
            echo sprintf(
215
                "*** ERREUR BnF Isbn Search %s %s %s \n",
216
                $e->getMessage(),
217
                $e->getFile(),
218
                $e->getLine()
219
            );
220
        }
221
222
        if (!isset($bnfOuvrage) || !$this->skipGoogle($bnfOuvrage)) {
223
            try {
224
                if ($this->verbose) {
225
                    dump('GOOGLE...');
226
                }
227
                $googleOuvrage = OuvrageFactory::GoogleFromIsbn($isbn);
228
                $this->completeOuvrage($googleOuvrage);
229
            } catch (Throwable $e) {
230
                echo "*** ERREUR GOOGLE Isbn Search ***".$e->getMessage()."\n";
231
                if( strpos($e->getMessage(), 'Could not resolve host: www.googleapis.com') === false) {
232
                    throw $e;
233
                }
234
                unset($e);
235
            }
236
        }
237
238
        if (!isset($bnfOuvrage) && !isset($googleOuvrage)) {
239
            try {
240
                if ($this->verbose) {
241
                    dump('OpenLibrary...');
242
                }
243
                $openLibraryOuvrage = OuvrageFactory::OpenLibraryFromIsbn($isbn);
244
                if (!empty($openLibraryOuvrage)) {
245
                    $this->completeOuvrage($openLibraryOuvrage);
246
                }
247
            } catch (Throwable $e) {
248
                echo '**** ERREUR OpenLibrary Isbn Search';
249
            }
250
        }
251
    }
252
253
    //    private function onlineQuerySearch(string $query)
254
    //    {
255
    //        echo "sleep 40...";
256
    //        sleep(20);
257
    //        onlineQuerySearch:
258
    //
259
    //        try {
260
    //            dump('GOOGLE SEARCH...');
261
    //            //            $googleOuvrage = OuvrageFactory::GoogleFromIsbn($isbn);
262
    //            $adapter = new GoogleBooksAdapter();
263
    //            $data = $adapter->search('blabla');
264
    //            dump($data);
265
    //            //die;
266
    //            //            return $import->getOuvrage();
267
    //            //            $this->completeOuvrage($googleOuvrage);
268
    //        } catch (Throwable $e) {
269
    //            echo "*** ERREUR GOOGLE QuerySearch *** ".$e->getMessage()."\n";
270
    //            echo "sleep 30min";
271
    //            sleep(60 * 30);
272
    //            echo "Wake up\n";
273
    //            goto onlineQuerySearch;
274
    //        }
275
    //    }
276
277
    private function completeOuvrage(OuvrageTemplate $onlineOuvrage)
278
    {
279
        if ($this->verbose) {
280
            dump($onlineOuvrage->serialize(true));
281
        }
282
        $optimizer = new OuvrageOptimize($onlineOuvrage, $this->page);
283
        $onlineOptimized = ($optimizer)->doTasks()->getOuvrage();
284
285
        $completer = new OuvrageComplete($this->ouvrage, $onlineOptimized);
286
        $this->ouvrage = $completer->getResult();
287
288
        // todo move that optimizing in OuvrageComplete ?
289
        $optimizer = new OuvrageOptimize($this->ouvrage, $this->page);
290
        $this->ouvrage = $optimizer->doTasks()->getOuvrage();
291
292
        if ($this->verbose) {
293
            dump($completer->getLog());
294
        }
295
        if ($completer->major) {
296
            $this->major = true;
297
        }
298
        $this->notCosmetic = ($completer->notCosmetic || $this->notCosmetic);
299
        $this->log = array_merge($this->log, $completer->getLog());
300
        unset($optimizer);
301
        unset($completer);
302
    }
303
304
    private function sendCompleted()
305
    {
306
        $isbn13 = $this->ouvrage->getParam('isbn') ?? null;
307
308
        $finalData = [
309
            //    'page' =>
310
            'raw' => $this->raw,
311
            'opti' => $this->serializeFinalOpti(),
312
            'optidate' => date("Y-m-d H:i:s"),
313
            'modifs' => mb_substr(implode(',', $this->log), 0, 250),
314
            'notcosmetic' => ($this->notCosmetic) ? 1 : 0,
315
            'major' => ($this->major) ? 1 : 0,
316
            'isbn' => substr($isbn13, 0, 20),
317
            'version' => WikiBotConfig::getGitVersion() ?? null,
318
        ];
319
        if ($this->verbose) {
320
            dump($finalData);
321
        }
322
        // Json ?
323
        $result = $this->queueAdapter->sendCompletedData($finalData);
324
325
        dump($result); // bool
326
    }
327
328
    /**
329
     * Final serialization of the completed OuvrageTemplate.
330
     *
331
     * @return string
332
     */
333
    private function serializeFinalOpti(): string
334
    {
335
        //        // Améliore style compact : plus espacé
336
        //        if ('|' === $this->ouvrage->userSeparator) {
337
        //            $this->ouvrage->userSeparator = ' |';
338
        //        }
339
        $finalOpti = $this->ouvrage->serialize(true);
340
        $finalOpti = Normalizer::normalize($finalOpti);
341
342
        return $finalOpti;
343
    }
344
345
    private function skipGoogle($bnfOuvrage): bool
346
    {
347
        if ($bnfOuvrage instanceOf OuvrageTemplate
348
            && $bnfOuvrage->hasParamValue('titre')
349
            && ($this->ouvrage->hasParamValue('lire en ligne')
350
                || $this->ouvrage->hasParamValue('présentation en ligne'))
351
        ) {
352
            return true;
353
        }
354
355
        return false;
356
    }
357
}
358