Passed
Branch dev (8d7b92)
by Dispositif
03:12
created

OuvrageCompleteWorker::isIsbnSkipped()   A

Complexity

Conditions 4
Paths 2

Size

Total Lines 10
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 5
nc 2
nop 2
dl 0
loc 10
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Application;
11
12
use App\Domain\Models\Wiki\OuvrageTemplate;
13
use App\Domain\OuvrageComplete;
14
use App\Domain\OuvrageFactory;
15
use App\Domain\OuvrageOptimize;
16
use App\Domain\Publisher\Wikidata2Ouvrage;
17
use App\Domain\Utils\TemplateParser;
18
use App\Infrastructure\Memory;
19
use App\Infrastructure\WikidataAdapter;
20
use Exception;
21
use GuzzleHttp\Client;
22
use Normalizer;
23
use Throwable;
24
25
/**
26
 * Class OuvrageCompleteWorker
27
 *
28
 * @package App\Application
29
 */
30
class OuvrageCompleteWorker
31
{
32
    /**
33
     * Exclusion requête BnF/Google/etc
34
     * Format EAN ou ISBN10 sans tiret.
35
     */
36
    const ISBN_EAN_SKIP
37
        = [
38
            '9782918758440', // Profils de lignes du réseau ferré français vol.2
39
            '9782918758341', // Profils de lignes du réseau ferré français vol.1
40
        ];
41
    /**
42
     * @var bool
43
     */
44
    public $verbose = false;
45
46
    /**
47
     * @var QueueInterface
48
     */
49
    private $queueAdapter;
50
    /**
51
     * @var string
52
     */
53
    private $raw = '';
54
    private $page; // article title
55
56
    private $log = [];
57
    private $notCosmetic = false;
58
    private $major = false;
59
    /**
60
     * @var OuvrageTemplate
61
     */
62
    private $ouvrage;
63
64
    public function __construct(QueueInterface $queueAdapter, ?bool $verbose = false)
65
    {
66
        $this->queueAdapter = $queueAdapter;
67
        $this->verbose = (bool)$verbose;
68
    }
69
70
    public function run(?int $limit = 10000)
71
    {
72
        $memory = new Memory();
73
        while ($limit > 0) {
74
            $limit--;
75
            sleep(1);
76
            $row = $this->getNewRow2Complete();
77
            $this->raw = $row['raw'];
78
            $this->page = $row['page'];
79
80
            echo sprintf(
81
                "-------------------------------\n%s [%s]\n%s\n%s\n",
82
                date("Y-m-d H:i:s"),
83
                WikiBotConfig::getGitVersion() ?? '',
84
                $this->page,
85
                $this->raw
86
            );
87
            if ($this->verbose) {
88
                $memory->echoMemory(true);
89
            }
90
91
            // initialise variables
92
            $this->log = [];
93
            $this->ouvrage = null;
94
            $this->notCosmetic = false;
95
            $this->major = false;
96
97
98
            try {
99
                $parse = TemplateParser::parseAllTemplateByName('ouvrage', $this->raw);
100
                $origin = $parse['ouvrage'][0]['model'] ?? null;
101
            } catch (Throwable $e) {
102
                echo sprintf("*** ERREUR impossible de transformer en modèle %s \n", $this->raw);
103
                continue;
104
            }
105
106
            if (!$origin instanceof OuvrageTemplate) {
107
                echo sprintf("*** ERREUR impossible de transformer en modèle %s \n", $this->raw);
108
                continue;
109
            }
110
111
            // Final optimizing (with online predictions)
112
            $optimizer = new OuvrageOptimize($origin, $this->page);
113
            $optimizer->doTasks();
114
            $this->ouvrage = $optimizer->getOuvrage();
115
            $this->log = array_merge($this->log, $optimizer->getLog());
116
            $this->notCosmetic = ($optimizer->notCosmetic || $this->notCosmetic);
117
118
            /**
119
             * RECHERCHE ONLINE
120
             */
121
            $isbn = $origin->getParam('isbn') ?? null; // avant mise en forme EAN>ISBN
122
            $isbn10 = $origin->getParam('isbn2') ?? $origin->getParam('isbn10') ?? null;
123
            if (!empty($isbn)
124
                && !$origin->hasParamValue('isbn invalide')
125
                && !$origin->hasParamValue('isbn erroné')
126
            ) {
127
                $this->onlineIsbnSearch($isbn, $isbn10);
128
            }
129
130
            $this->sendCompleted();
131
            unset($optimizer);
132
            unset($parse);
133
            unset($origin);
134
        } // END WHILE
135
136
        return true;
137
    }
138
139
    /**
140
     * Get array (title+raw strings) to complete from AMQP queue, SQL Select or file reading.
141
     *
142
     * @return array
143
     * @throws Exception
144
     */
145
    private function getNewRow2Complete(): array
146
    {
147
        $row = $this->queueAdapter->getNewRaw();
148
        if (empty($row) || empty($row['raw'])) {
149
            echo "STOP: no more queue to process \n";
150
            throw new Exception('no more queue to process');
151
        }
152
153
        return $row;
154
    }
155
156
    /**
157
     * @param string      $isbn
158
     * @param string|null $isbn10
159
     *
160
     * @return bool
161
     */
162
    private function isIsbnSkipped(string $isbn, ?string $isbn10 = null): bool
163
    {
164
        if (in_array(str_replace('-', '', $isbn), self::ISBN_EAN_SKIP)
165
            || ($isbn10 !== null
166
                && in_array(str_replace('-', '', $isbn10), self::ISBN_EAN_SKIP))
167
        ) {
168
            return true;
169
        }
170
171
        return false;
172
    }
173
174
    private function onlineIsbnSearch(string $isbn, ?string $isbn10 = null)
175
    {
176
        if ($this->isIsbnSkipped($isbn, $isbn10)) {
177
            echo "*** SKIP THAT ISBN ***\n";
178
179
            // Vérifier logique return
180
            return;
181
        }
182
183
        online:
184
        if ($this->verbose) {
185
            echo "sleep 10...\n";
186
        }
187
        sleep(10);
188
189
        try {
190
            if ($this->verbose) {
191
                dump('BIBLIO NAT FRANCE...');
192
            }
193
            // BnF sait pas trouver un vieux livre (10) d'après ISBN-13... FACEPALM !
194
            $bnfOuvrage = null;
195
            if ($isbn10) {
196
                $bnfOuvrage = OuvrageFactory::BnfFromIsbn($isbn10);
197
                sleep(2);
198
            }
199
            if (!$isbn10 || empty($bnfOuvrage) || empty($bnfOuvrage->getParam('titre'))) {
200
                $bnfOuvrage = OuvrageFactory::BnfFromIsbn($isbn);
201
            }
202
            if (isset($bnfOuvrage) and $bnfOuvrage instanceof OuvrageTemplate) {
203
                $this->completeOuvrage($bnfOuvrage);
204
205
                // Wikidata requests from $infos (ISBN/ISNI)
206
                if (!empty($bnfOuvrage->getInfos())) {
207
                    if ($this->verbose) {
208
                        dump('WIKIDATA...');
209
                    }
210
                    // TODO move to factory
211
                    $wikidataAdapter = new WikidataAdapter(
212
                        new Client(['timeout' => 5, 'headers' => ['User-Agent' => getenv('USER_AGENT')]])
213
                    );
214
                    $wdComplete = new Wikidata2Ouvrage($wikidataAdapter, clone $bnfOuvrage, $this->page);
215
                    $this->completeOuvrage($wdComplete->getOuvrage());
216
                }
217
            }
218
        } catch (Throwable $e) {
219
            echo sprintf(
220
                "*** ERREUR BnF Isbn Search %s %s %s \n",
221
                $e->getMessage(),
222
                $e->getFile(),
223
                $e->getLine()
224
            );
225
        }
226
227
        if (!isset($bnfOuvrage) || !$this->skipGoogle($bnfOuvrage)) {
228
            try {
229
                if ($this->verbose) {
230
                    dump('GOOGLE...');
231
                }
232
                $googleOuvrage = OuvrageFactory::GoogleFromIsbn($isbn);
233
                $this->completeOuvrage($googleOuvrage);
234
            } catch (Throwable $e) {
235
                echo "*** ERREUR GOOGLE Isbn Search ***".$e->getMessage()."\n";
236
                if( strpos($e->getMessage(), 'Could not resolve host: www.googleapis.com') === false) {
237
                    throw $e;
238
                }
239
                unset($e);
240
            }
241
        }
242
243
        if (!isset($bnfOuvrage) && !isset($googleOuvrage)) {
244
            try {
245
                if ($this->verbose) {
246
                    dump('OpenLibrary...');
247
                }
248
                $openLibraryOuvrage = OuvrageFactory::OpenLibraryFromIsbn($isbn);
249
                if (!empty($openLibraryOuvrage)) {
250
                    $this->completeOuvrage($openLibraryOuvrage);
251
                }
252
            } catch (Throwable $e) {
253
                echo '**** ERREUR OpenLibrary Isbn Search';
254
            }
255
        }
256
    }
257
258
    //    private function onlineQuerySearch(string $query)
259
    //    {
260
    //        echo "sleep 40...";
261
    //        sleep(20);
262
    //        onlineQuerySearch:
263
    //
264
    //        try {
265
    //            dump('GOOGLE SEARCH...');
266
    //            //            $googleOuvrage = OuvrageFactory::GoogleFromIsbn($isbn);
267
    //            $adapter = new GoogleBooksAdapter();
268
    //            $data = $adapter->search('blabla');
269
    //            dump($data);
270
    //            //die;
271
    //            //            return $import->getOuvrage();
272
    //            //            $this->completeOuvrage($googleOuvrage);
273
    //        } catch (Throwable $e) {
274
    //            echo "*** ERREUR GOOGLE QuerySearch *** ".$e->getMessage()."\n";
275
    //            echo "sleep 30min";
276
    //            sleep(60 * 30);
277
    //            echo "Wake up\n";
278
    //            goto onlineQuerySearch;
279
    //        }
280
    //    }
281
282
    private function completeOuvrage(OuvrageTemplate $onlineOuvrage)
283
    {
284
        if ($this->verbose) {
285
            dump($onlineOuvrage->serialize(true));
286
        }
287
        $optimizer = new OuvrageOptimize($onlineOuvrage, $this->page);
288
        $onlineOptimized = ($optimizer)->doTasks()->getOuvrage();
289
290
        $completer = new OuvrageComplete($this->ouvrage, $onlineOptimized);
291
        $this->ouvrage = $completer->getResult();
292
293
        // todo move that optimizing in OuvrageComplete ?
294
        $optimizer = new OuvrageOptimize($this->ouvrage, $this->page);
295
        $this->ouvrage = $optimizer->doTasks()->getOuvrage();
296
297
        if ($this->verbose) {
298
            dump($completer->getLog());
299
        }
300
        if ($completer->major) {
301
            $this->major = true;
302
        }
303
        $this->notCosmetic = ($completer->notCosmetic || $this->notCosmetic);
304
        $this->log = array_merge($this->log, $completer->getLog());
305
        unset($optimizer);
306
        unset($completer);
307
    }
308
309
    private function sendCompleted()
310
    {
311
        $isbn13 = $this->ouvrage->getParam('isbn') ?? null;
312
313
        $finalData = [
314
            //    'page' =>
315
            'raw' => $this->raw,
316
            'opti' => $this->serializeFinalOpti(),
317
            'optidate' => date("Y-m-d H:i:s"),
318
            'modifs' => mb_substr(implode(',', $this->log), 0, 250),
319
            'notcosmetic' => ($this->notCosmetic) ? 1 : 0,
320
            'major' => ($this->major) ? 1 : 0,
321
            'isbn' => substr($isbn13, 0, 20),
322
            'version' => WikiBotConfig::getGitVersion() ?? null,
323
        ];
324
        if ($this->verbose) {
325
            dump($finalData);
326
        }
327
        // Json ?
328
        $result = $this->queueAdapter->sendCompletedData($finalData);
329
330
        dump($result); // bool
331
    }
332
333
    /**
334
     * Final serialization of the completed OuvrageTemplate.
335
     *
336
     * @return string
337
     */
338
    private function serializeFinalOpti(): string
339
    {
340
        //        // Améliore style compact : plus espacé
341
        //        if ('|' === $this->ouvrage->userSeparator) {
342
        //            $this->ouvrage->userSeparator = ' |';
343
        //        }
344
        $finalOpti = $this->ouvrage->serialize(true);
345
        $finalOpti = Normalizer::normalize($finalOpti);
346
347
        return $finalOpti;
348
    }
349
350
    private function skipGoogle($bnfOuvrage): bool
351
    {
352
        if ($bnfOuvrage instanceOf OuvrageTemplate
353
            && $bnfOuvrage->hasParamValue('titre')
354
            && ($this->ouvrage->hasParamValue('lire en ligne')
355
                || $this->ouvrage->hasParamValue('présentation en ligne'))
356
        ) {
357
            return true;
358
        }
359
360
        return false;
361
    }
362
}
363