Passed
Branch master (309757)
by Dispositif
03:20 queued 54s
created

OuvrageCompleteWorker::sendCompleted()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 19
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 13
dl 0
loc 19
rs 9.8333
c 0
b 0
f 0
cc 4
nc 4
nop 0
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Application\OuvrageComplete;
11
12
use App\Application\InfrastructurePorts\DbAdapterInterface;
13
use App\Application\InfrastructurePorts\MemoryInterface;
14
use App\Application\WikiBotConfig;
15
use App\Domain\InfrastructurePorts\WikidataAdapterInterface;
16
use App\Domain\Models\Wiki\OuvrageTemplate;
17
use App\Domain\OptimizerFactory;
18
use App\Domain\OuvrageComplete;
19
use App\Domain\OuvrageFactory;
20
use App\Domain\Publisher\Wikidata2Ouvrage;
21
use App\Domain\SummaryLogTrait;
22
use App\Domain\Utils\TemplateParser;
23
use Exception;
24
use Normalizer;
25
use Psr\Log\LoggerInterface;
26
use Psr\Log\NullLogger;
27
use Throwable;
28
29
/**
30
 * TODO Legacy class, to be refactored. To big, too many responsibilities.
31
 * TODO use DTO from DbAdapter.
32
 */
33
class OuvrageCompleteWorker
34
{
35
    use SummaryLogTrait;
36
37
    /**
38
     * Exclusion requête BnF/Google/etc
39
     * Format EAN ou ISBN10 sans tiret.
40
     */
41
    public const ISBN_EAN_SKIP
42
        = [
43
            '9782918758440', // Profils de lignes du réseau ferré français vol.2
44
            '9782918758341', // Profils de lignes du réseau ferré français vol.1
45
            '285608043X', // Dictionnaire encyclopédique d'électronique (langue erronée)
46
            '9782021401196', // sous-titre erroné
47
        ];
48
49
    /**
50
     * @var MemoryInterface
51
     */
52
    protected $memory;
53
    /**
54
     * @var DbAdapterInterface
55
     */
56
    private $queueAdapter;
57
    /**
58
     * @var string
59
     */
60
    private $raw = '';
61
    private $page; // article title
62
63
    private $notCosmetic = false;
64
    private $major = false;
65
    /**
66
     * @var OuvrageTemplate
67
     */
68
    private $ouvrage;
69
    /**
70
     * @var LoggerInterface
71
     */
72
    private $logger;
73
    /**
74
     * @var WikidataAdapterInterface
75
     */
76
    private $wikidataAdapter;
77
78
    public function __construct(
79
        DbAdapterInterface $queueAdapter,
80
        WikidataAdapterInterface $wikidataAdapter,
81
        MemoryInterface $memory,
82
        ?LoggerInterface $logger = null
83
    )
84
    {
85
        $this->queueAdapter = $queueAdapter;
86
        $this->logger = $logger ?? new NullLogger();
87
        $this->wikidataAdapter = $wikidataAdapter;
88
        $this->memory = $memory;
89
    }
90
91
    public function run(?int $limit = 10000): bool
92
    {
93
        while ($limit > 0) {
94
            $limit--;
95
            sleep(1);
96
            $row = $this->getNewRow2Complete();
97
            $this->raw = $row['raw'];
98
            $this->page = $row['page'];
99
            // Note : $row['id'] défini
100
101
            echo sprintf(
102
                "-------------------------------\n%s [%s]\n%s\n%s\n",
103
                date("Y-m-d H:i:s"),
104
                WikiBotConfig::VERSION ?? '',
105
                $this->page,
106
                $this->raw
107
            );
108
109
            $this->logger->debug($this->memory->getMemory(true));
110
111
            // initialise variables
112
            $this->resetSummaryLog();
113
            $this->ouvrage = null;
114
            $this->notCosmetic = false;
115
            $this->major = false;
116
117
118
            try {
119
                $parse = TemplateParser::parseAllTemplateByName('ouvrage', $this->raw);
120
                $origin = $parse['ouvrage'][0]['model'] ?? null;
121
            } catch (Throwable $e) {
122
                $this->logger->warning(
123
                    sprintf(
124
                        "*** ERREUR 432 impossible de transformer en modèle => skip %s : %s \n",
125
                        $row['id'],
126
                        $this->raw
127
                    )
128
                );
129
                $this->queueAdapter->skipRow((int) $row['id']);
130
                sleep(10);
131
                continue;
132
            }
133
134
            if (!$origin instanceof OuvrageTemplate) {
135
                $this->logger->warning(
136
                    sprintf(
137
                        "*** ERREUR 433 impossible de transformer en modèle => skip %s : %s \n",
138
                        $row['id'],
139
                        $this->raw
140
                    )
141
                );
142
                $this->queueAdapter->skipRow((int) $row['id']);
143
                sleep(10);
144
                continue;
145
            }
146
147
            // Final optimizing (with online predictions)
148
            $optimizer = OptimizerFactory::fromTemplate($origin, $this->page, $this->logger);
149
            $optimizer->doTasks();
150
            $this->ouvrage = $optimizer->getOptiTemplate();
151
            $this->summaryLog = array_merge($this->getSummaryLog(), $optimizer->getSummaryLog());
152
            $this->notCosmetic = ($optimizer->notCosmetic || $this->notCosmetic);
153
154
            /**
155
             * RECHERCHE ONLINE
156
             */
157
            $isbn = $origin->getParam('isbn') ?? null; // avant mise en forme EAN>ISBN
158
            $isbn10 = $origin->getParam('isbn2') ?? $origin->getParam('isbn10') ?? null;
159
            if (!empty($isbn)
160
                && !$origin->hasParamValue('isbn invalide')
161
                && !$origin->hasParamValue('isbn erroné')
162
            ) {
163
                $this->onlineIsbnSearch($isbn, $isbn10);
164
            }
165
166
            $this->sendCompleted();
167
            unset($optimizer);
168
            unset($parse);
169
            unset($origin);
170
        } // END WHILE
171
172
        return true;
173
    }
174
175
    /**
176
     * Get array (title+raw strings) to complete from AMQP queue, SQL Select or file reading.
177
     *
178
     * @return array
179
     * @throws Exception
180
     */
181
    private function getNewRow2Complete(): array
182
    {
183
        $row = $this->queueAdapter->getNewRaw();
184
        if (empty($row) || empty($row['raw'])) {
185
            echo "STOP: no more queue to process \n";
186
            throw new Exception('no more queue to process');
187
        }
188
189
        return $row;
190
    }
191
192
    /**
193
     * @param string      $isbn
194
     * @param string|null $isbn10
195
     *
196
     * @return bool
197
     */
198
    private function isIsbnSkipped(string $isbn, ?string $isbn10 = null): bool
199
    {
200
        return in_array(str_replace('-', '', $isbn), self::ISBN_EAN_SKIP)
201
            || ($isbn10 !== null
202
                && in_array(str_replace('-', '', $isbn10), self::ISBN_EAN_SKIP));
203
    }
204
205
    private function onlineIsbnSearch(string $isbn, ?string $isbn10 = null)
206
    {
207
        if ($this->isIsbnSkipped($isbn, $isbn10)) {
208
            echo "*** SKIP THAT ISBN ***\n";
209
210
            // Vérifier logique return
211
            return;
212
        }
213
214
        online:
215
        $this->logger->info("sleep 10...\n");
216
        sleep(10);
217
218
        try {
219
            $this->logger->debug('BIBLIO NAT FRANCE...');
220
            // BnF sait pas trouver un vieux livre (10) d'après ISBN-13... FACEPALM !
221
            $bnfOuvrage = null;
222
            if ($isbn10) {
223
                $bnfOuvrage = OuvrageFactory::BnfFromIsbn($isbn10);
224
                sleep(2);
225
            }
226
            if (!$isbn10 || null === $bnfOuvrage || empty($bnfOuvrage->getParam('titre'))) {
227
                $bnfOuvrage = OuvrageFactory::BnfFromIsbn($isbn);
228
            }
229
            if ($bnfOuvrage instanceof OuvrageTemplate) {
0 ignored issues
show
introduced by
$bnfOuvrage is always a sub-type of App\Domain\Models\Wiki\OuvrageTemplate.
Loading history...
230
                $this->completeOuvrage($bnfOuvrage);
231
232
                // Wikidata requests from $infos (ISBN/ISNI)
233
                if (!empty($bnfOuvrage->getInfos())) {
234
                    $this->logger->info('WIKIDATA...');
235
236
                    // TODO move to factory
237
                    $wdComplete = new Wikidata2Ouvrage($this->wikidataAdapter, clone $bnfOuvrage, $this->page);
238
                    $this->completeOuvrage($wdComplete->getOuvrage());
239
                }
240
            }
241
        } catch (Throwable $e) {
242
            if (strpos($e->getMessage(), 'Could not resolve host') !== false) {
243
                throw $e;
244
            }
245
            $this->logger->error(
246
                sprintf(
247
                    "*** ERREUR BnF Isbn Search %s %s %s \n",
248
                    $e->getMessage(),
249
                    $e->getFile(),
250
                    $e->getLine()
251
                )
252
            );
253
        }
254
255
        if (!isset($bnfOuvrage) || !$this->skipGoogle($bnfOuvrage)) {
256
            try {
257
                $this->logger->info('GOOGLE...');
258
259
                $googleOuvrage = OuvrageFactory::GoogleFromIsbn($isbn);
260
                $this->completeOuvrage($googleOuvrage);
261
            } catch (Throwable $e) {
262
                $this->logger->warning("*** ERREUR GOOGLE Isbn Search ***".$e->getMessage());
263
                if (strpos($e->getMessage(), 'Could not resolve host: www.googleapis.com') === false) {
264
                    throw $e;
265
                }
266
                unset($e);
267
            }
268
        }
269
270
        if (!isset($bnfOuvrage) && !isset($googleOuvrage)) {
271
            try {
272
                $this->logger->info('OpenLibrary...');
273
                $openLibraryOuvrage = OuvrageFactory::OpenLibraryFromIsbn($isbn);
274
                if (!empty($openLibraryOuvrage)) {
275
                    $this->completeOuvrage($openLibraryOuvrage);
276
                }
277
            } catch (Throwable $e) {
278
                $this->logger->warning('**** ERREUR OpenLibrary Isbn Search');
279
            }
280
        }
281
    }
282
283
    //    private function onlineQuerySearch(string $query)
284
    //    {
285
    //        echo "sleep 40...";
286
    //        sleep(20);
287
    //        onlineQuerySearch:
288
    //
289
    //        try {
290
    //            dump('GOOGLE SEARCH...');
291
    //            //            $googleOuvrage = OuvrageFactory::GoogleFromIsbn($isbn);
292
    //            $adapter = new GoogleBooksAdapter();
293
    //            $data = $adapter->search('blabla');
294
    //            dump($data);
295
    //            //die;
296
    //            //            return $import->getOuvrage();
297
    //            //            $this->completeOuvrage($googleOuvrage);
298
    //        } catch (Throwable $e) {
299
    //            echo "*** ERREUR GOOGLE QuerySearch *** ".$e->getMessage()."\n";
300
    //            echo "sleep 30min";
301
    //            sleep(60 * 30);
302
    //            echo "Wake up\n";
303
    //            goto onlineQuerySearch;
304
    //        }
305
    //    }
306
307
    private function completeOuvrage(OuvrageTemplate $onlineOuvrage)
308
    {
309
        $this->logger->info($onlineOuvrage->serialize(true));
310
        $optimizer = OptimizerFactory::fromTemplate($onlineOuvrage, $this->page, $this->logger);
311
        $onlineOptimized = ($optimizer)->doTasks()->getOptiTemplate();
312
313
        $completer = new OuvrageComplete($this->ouvrage, $onlineOptimized, $this->logger);
314
        $this->ouvrage = $completer->getResult();
315
316
        // todo move that optimizing in OuvrageComplete ?
317
        $optimizer = OptimizerFactory::fromTemplate($this->ouvrage, $this->page, $this->logger);
318
        $this->ouvrage = $optimizer->doTasks()->getOptiTemplate();
319
320
        $this->logger->info('Summary', $completer->getSummaryLog());
321
322
        if ($completer->major) {
323
            $this->major = true;
324
        }
325
        $this->notCosmetic = ($completer->notCosmetic || $this->notCosmetic);
326
        $this->summaryLog = array_merge($this->getSummaryLog(), $completer->getSummaryLog());
327
        unset($optimizer);
328
        unset($completer);
329
    }
330
331
    private function sendCompleted()
332
    {
333
        $isbn = $this->ouvrage->getParam('isbn');
334
        $finalData = [
335
            //    'page' =>
336
            'raw' => $this->raw,
337
            'opti' => $this->serializeFinalOpti(),
338
            'optidate' => date("Y-m-d H:i:s"),
339
            'modifs' => mb_substr(implode(',', $this->getSummaryLog()), 0, 250),
340
            'notcosmetic' => ($this->notCosmetic) ? 1 : 0,
341
            'major' => ($this->major) ? 1 : 0,
342
            'isbn' => substr($isbn,0,19),
0 ignored issues
show
Bug introduced by
It seems like $isbn can also be of type null; however, parameter $string of substr() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

342
            'isbn' => substr(/** @scrutinizer ignore-type */ $isbn,0,19),
Loading history...
343
            'version' => WikiBotConfig::VERSION ?? null,
344
        ];
345
        $this->logger->info('finalData', $finalData);
346
        // Json ?
347
        $result = $this->queueAdapter->sendCompletedData($finalData);
348
349
        $this->logger->debug($result ? 'OK DB' : 'erreur sendCompletedData()');
350
    }
351
352
    /**
353
     * Final serialization of the completed OuvrageTemplate.
354
     */
355
    private function serializeFinalOpti(): string
356
    {
357
        //        // Améliore style compact : plus espacé
358
        //        if ('|' === $this->ouvrage->userSeparator) {
359
        //            $this->ouvrage->userSeparator = ' |';
360
        //        }
361
        $finalOpti = $this->ouvrage->serialize(true);
362
        $finalOpti = Normalizer::normalize($finalOpti);
363
        if (empty($finalOpti) || !is_string($finalOpti)) {
364
            throw new Exception('normalized $finalOpti serialize in OuvrageComplete is not a string');
365
        }
366
367
        return $finalOpti;
368
    }
369
370
    private function skipGoogle($bnfOuvrage): bool
371
    {
372
        return $bnfOuvrage instanceof OuvrageTemplate
373
            && $bnfOuvrage->hasParamValue('titre')
374
            && ($this->ouvrage->hasParamValue('lire en ligne')
375
                || $this->ouvrage->hasParamValue('présentation en ligne'));
376
    }
377
}
378