GoogleTransformer::processRef()   A
last analyzed

Complexity

Conditions 4
Paths 5

Size

Total Lines 29
Code Lines 18

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 18
c 1
b 0
f 0
dl 0
loc 29
rs 9.6666
cc 4
nc 5
nop 2
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Transformers;
11
12
use App\Domain\InfrastructurePorts\GoogleApiQuotaInterface;
13
use App\Domain\InfrastructurePorts\GoogleBooksInterface;
14
use App\Domain\Models\Wiki\OuvrageTemplate;
15
use App\Domain\Publisher\GoogleBookMapper;
16
use App\Domain\Publisher\GoogleBooksUtil;
17
use App\Domain\Utils\NumberUtil;
18
use App\Domain\Utils\WikiTextUtil;
19
use App\Domain\WikiOptimizer\OptimizerFactory;
20
use App\Domain\WikiTemplateFactory;
21
use App\Infrastructure\Monitor\NullLogger;
22
use DomainException;
23
use Exception;
24
use Psr\Log\LoggerInterface;
25
use Scriptotek\GoogleBooks\Volume;
26
use Throwable;
27
28
/**
29
 * TODO REFAC : duplicate, extract methods in trait or in AbstractRefBotWorker + ExternBotWorker
30
 * --
31
 * Transform <ref>https://books.google...</ref> to <ref>{{Ouvrage|...}}.</ref>
32
 * in an article wikitext.
33
 */
34
class GoogleTransformer
35
{
36
    final public const SLEEP_GOOGLE_API_INTERVAL = 5;
37
38
    /**
39
     * @var OuvrageTemplate[]
40
     */
41
    protected array $cacheOuvrageTemplate = [];
42
43
    public function __construct(
44
        protected readonly GoogleApiQuotaInterface $quota,
45
        protected readonly GoogleBooksInterface    $googleBooksAdapter,
46
        protected LoggerInterface                  $logger = new NullLogger()
47
    )
48
    {
49
    }
50
51
    /**
52
     * Process page wikitext. Return wikitext with the <ref> or bullets list converted.
53
     */
54
    public function process(string $text): string
55
    {
56
        if ($this->quota->isQuotaReached()) {
0 ignored issues
show
Bug introduced by
The method isQuotaReached() does not exist on App\Domain\Infrastructur...GoogleApiQuotaInterface. Since it exists in all sub-types, consider adding an abstract or default implementation to App\Domain\Infrastructur...GoogleApiQuotaInterface. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

56
        if ($this->quota->/** @scrutinizer ignore-call */ isQuotaReached()) {
Loading history...
57
            $this->logger->debug('Quota Google atteint');
58
            throw new DomainException('Quota Google atteint');
59
        }
60
61
        $refsData = $this->extractAllGoogleRefs($text);
62
        $this->logger->debug('GoogleTransformer: refs found: ' . count($refsData));
63
        if ($refsData !== []) {
64
            $text = $this->processRef($text, $refsData);
65
        }
66
67
        $links = $this->extractGoogleExternalBullets($text);
68
        $this->logger->debug('GoogleTransformer: links found: ' . count($links));
69
        if ($links !== []) {
70
            $text = $this->processExternLinks($text, $links);
71
        }
72
73
        return $text;
74
    }
75
76
    /**
77
     * Extract all <ref>/{ref} with only GoogleBooks URL.
78
     * Todo : supprimer point final URL
79
     *
80
     * @param string $text Page wikitext
81
     *
82
     * @return array [0 => ['<ref>http...</ref>', 'http://'], 1 => ...]
83
     */
84
    public function extractAllGoogleRefs(string $text): array
85
    {
86
        // <ref>...</ref> or {{ref|...}}
87
        // GoogleLivresTemplate::GOOGLEBOOK_URL_PATTERN
88
        if (preg_match_all(
89
            '#(?:<ref[^>]*>|{{ref\|) ?(' . GoogleBooksUtil::GOOGLEBOOKS_START_URL_PATTERN . '[^>\]} \n]+) ?(?:</ref>|}})#i',
90
            $text,
91
            $matches,
92
            PREG_SET_ORDER
93
        )
94
        ) {
95
            return $matches;
96
        }
97
98
        return [];
99
    }
100
101
    protected function processRef(string $text, array $refsData): string
102
    {
103
        foreach ($refsData as $ref) {
104
            $this->logger->info('Process GoogleBooks ref: ' . $ref[1]);
105
            if ($this->quota->isQuotaReached()) {
106
                $this->logger->debug('Quota Google atteint');
107
                throw new DomainException('Quota Google atteint');
108
            }
109
            try {
110
                $citation = $this->convertGBurl2OuvrageCitation(WikiTextUtil::stripFinalPoint($ref[1]));
111
                sleep(2);
112
            } catch (Throwable $e) {
113
                $this->logger->debug("Exception " . $e->getMessage());
114
                continue;
115
            }
116
117
            // ajout point final pour référence
118
            $citation .= '.';
119
120
            $newRef = str_replace($ref[1], $citation, (string)$ref[0]);
121
            $this->logger->info($newRef);
122
123
            $text = str_replace($ref[0], $newRef, $text);
124
125
            $this->logger->notice("sleep " . self::SLEEP_GOOGLE_API_INTERVAL);
126
            sleep(self::SLEEP_GOOGLE_API_INTERVAL);
127
        }
128
129
        return $text;
130
    }
131
132
    /**
133
     * TODO : extract. TODO private ?
134
     * Convert GoogleBooks URL to wiki-template {ouvrage} citation.
135
     * Need GoogleBooksAdapter injection.
136
     * @throws Throwable
137
     */
138
    public function convertGBurl2OuvrageCitation(string $url): string
139
    {
140
        if (!GoogleBooksUtil::isGoogleBookURL($url)) {
141
            throw new DomainException('Pas de URL Google Books');
142
        }
143
144
        $gooDat = GoogleBooksUtil::parseGoogleBookQuery($url);
145
        if (empty($gooDat['isbn']) && empty($gooDat['id'])) {
146
            throw new DomainException('Pas de ISBN ou ID Google Books');
147
        }
148
149
        try {
150
            $identifiant = $gooDat['id'] ?? $gooDat['isbn'];
151
            $isISBN = !empty($gooDat['isbn']);
152
            $ouvrage = $this->generateOuvrageFromGoogleData($identifiant, $isISBN);
153
        } catch (Throwable $e) {
154
            // ID n'existe pas sur Google Books
155
            if (strpos($e->getMessage(), '"message": "The volume ID could n')) {
156
                return sprintf(
157
                    '{{lien brisé |url= %s |titre=%s |brisé le=%s}}',
158
                    $url,
159
                    'Ouvrage inexistant sur Google Books',
160
                    date('d-m-Y')
161
                );
162
            }
163
            throw $e;
164
        }
165
166
        $cleanUrl = GoogleBooksUtil::simplifyGoogleUrl($url);
167
        $ouvrage->unsetParam('présentation en ligne');
168
        $ouvrage->setParam('lire en ligne', $cleanUrl);
169
        $ouvrage->userSeparator = ' |';
170
171
        // Si titre absent
172
        if (!$ouvrage->hasParamValue('titre')) {
173
            throw new DomainException("Ouvrage sans titre (data Google?)");
174
        }
175
176
        // Google page => 'passage'
177
        if (!empty($gooDat['pg'])) {
178
            // Exclusion de page=1, page=2 (vue par défaut sur Google Book)
179
            if (preg_match('#(?:PA|PT)(\d+)$#', (string)$gooDat['pg'], $matches) && (int)$matches[1] >= 3) {
180
                $page = $matches[1];
181
            }
182
            // conversion chiffres Romain pour PR
183
            // Exclusion de page=1, page=2 (vue par défaut sur Google Book)
184
            if (preg_match('#PR(\d+)$#', (string)$gooDat['pg'], $matches) && (int)$matches[1] >= 3) {
185
                $page = NumberUtil::arab2roman((int)$matches[1], true);
186
            }
187
188
            if (!empty($page)) {
189
                $ouvrage->setParam('passage', $page);
190
                // ajout commentaire '<!-- utile? -->' ?
191
            }
192
        }
193
194
        $optimizer = OptimizerFactory::fromTemplate($ouvrage, null, $this->logger);
195
        $optimizer->doTasks();
196
        $ouvrage2 = $optimizer->getOptiTemplate();
197
198
        return $ouvrage2->serialize();
199
    }
200
201
    /**
202
     * todo: move (injection) to other class.
203
     * Generate wiki-template {ouvrage} from GoogleBook ID.
204
     *
205
     * @param string $id GoogleBooks ID
206
     *
207
     * @throws Exception
208
     */
209
    protected function generateOuvrageFromGoogleData(string $id, ?bool $isISBN = false): OuvrageTemplate
210
    {
211
        // return cached OuvrageTemplate
212
        if (!$isISBN && isset($this->cacheOuvrageTemplate[$id])) {
213
            return clone $this->cacheOuvrageTemplate[$id];
214
        }
215
216
        // Get Google data by ID ZvhBAAAAcAAJ
217
        $volume = $isISBN === true
218
            ? $this->googleBooksAdapter->getDataByIsbn($id)
0 ignored issues
show
Bug introduced by
The method getDataByIsbn() does not exist on App\Domain\Infrastructur...ts\GoogleBooksInterface. Since it exists in all sub-types, consider adding an abstract or default implementation to App\Domain\Infrastructur...ts\GoogleBooksInterface. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

218
            ? $this->googleBooksAdapter->/** @scrutinizer ignore-call */ getDataByIsbn($id)
Loading history...
219
            : $this->googleBooksAdapter->getDataByGoogleId($id);
0 ignored issues
show
Bug introduced by
The method getDataByGoogleId() does not exist on App\Domain\Infrastructur...ts\GoogleBooksInterface. Since it exists in all sub-types, consider adding an abstract or default implementation to App\Domain\Infrastructur...ts\GoogleBooksInterface. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

219
            : $this->googleBooksAdapter->/** @scrutinizer ignore-call */ getDataByGoogleId($id);
Loading history...
220
        if (!$volume instanceof Volume) {
221
            throw new DomainException('googleBooks Volume not found for that GB-id/isbn');
222
        }
223
224
        $mapper = new GoogleBookMapper(); // todo inject
225
        $mapper->mapLanguageData(true);
226
        $data = $mapper->process($volume);
227
228
        // Generate wiki-template {ouvrage}
229
        $ouvrage = WikiTemplateFactory::create('ouvrage');
230
        $ouvrage->hydrate($data);
231
        $ouvrage->setParam('consulté le', date('d-m-Y'));
232
233
        // cache
234
        $this->cacheOuvrageTemplate[$id] = clone $ouvrage;
235
236
        return $ouvrage;
0 ignored issues
show
Bug Best Practice introduced by
The expression return $ouvrage returns the type null which is incompatible with the type-hinted return App\Domain\Models\Wiki\OuvrageTemplate.
Loading history...
237
    }
238
239
    /**
240
     * todo move
241
     */
242
    public function extractGoogleExternalBullets(string $text): array
243
    {
244
        // match "* https://books.google.fr/..."
245
        if (preg_match_all(
246
            '#^\* *(' . GoogleBooksUtil::GOOGLEBOOKS_START_URL_PATTERN . '[^ <{\]}\n\r]+) *$#im',
247
            $text,
248
            $matches,
249
            PREG_SET_ORDER
250
        )
251
        ) {
252
            return $matches;
253
        }
254
255
        return [];
256
    }
257
258
    /**
259
     * TODO Duplication du dessus...
260
     * @return string|string[]
261
     * @throws Throwable
262
     */
263
    protected function processExternLinks(string $text, array $links): string|array
264
    {
265
        foreach ($links as $pattern) {
266
            $this->logger->info('Process links: ' . $pattern[1]);
267
            if ($this->quota->isQuotaReached()) {
268
                $this->logger->debug('Quota Google atteint');
269
                throw new DomainException('Quota Google atteint');
270
            }
271
            try {
272
                $citation = $this->convertGBurl2OuvrageCitation(WikiTextUtil::stripFinalPoint($pattern[1]));
273
            } catch (Exception $e) {
274
                $this->logger->debug("Exception " . $e->getMessage());
275
                continue;
276
            }
277
278
            // todo : ajout point final pour référence ???
279
            $citation .= '.';
280
281
            $newRef = str_replace($pattern[1], $citation, (string)$pattern[0]);
282
            $this->logger->info($newRef);
283
284
            $text = str_replace($pattern[0], $newRef, $text);
285
286
            $this->logger->info("Sleep " . self::SLEEP_GOOGLE_API_INTERVAL);
287
            sleep(self::SLEEP_GOOGLE_API_INTERVAL);
288
        }
289
290
        return $text;
291
    }
292
293
}
294