Passed
Branch master (fd6b1a)
by Dispositif
03:51 queued 01:13
created

GoogleTransformer::extractAllGoogleRefs()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 15
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 7
c 0
b 0
f 0
dl 0
loc 15
rs 10
cc 2
nc 2
nop 1
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019-2023 © Philippe M./Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Transformers;
11
12
use App\Domain\InfrastructurePorts\GoogleApiQuotaInterface;
13
use App\Domain\InfrastructurePorts\GoogleBooksInterface;
14
use App\Domain\Models\Wiki\OuvrageTemplate;
15
use App\Domain\Publisher\GoogleBookMapper;
16
use App\Domain\Publisher\GoogleBooksUtil;
17
use App\Domain\Utils\NumberUtil;
18
use App\Domain\Utils\WikiTextUtil;
19
use App\Domain\WikiOptimizer\OptimizerFactory;
20
use App\Domain\WikiTemplateFactory;
21
use DomainException;
22
use Exception;
23
use Psr\Log\LoggerInterface;
24
use Scriptotek\GoogleBooks\Volume;
25
use Throwable;
26
27
/**
28
 * TODO REFAC : duplicate, extract methods in trait or in AbstractRefBotWorker + ExternBotWorker
29
 * --
30
 * Transform <ref>https://books.google...</ref> to <ref>{{Ouvrage|...}}.</ref>
31
 * in an article wikitext.
32
 * Class GoogleTransformer
33
 *
34
 * @package App\Domain
35
 */
36
class GoogleTransformer
37
{
38
    public const SLEEP_GOOGLE_API_INTERVAL = 5;
39
40
    /**
41
     * @var array OuvrageTemplate[]
42
     */
43
    private $cacheOuvrageTemplate = [];
44
    /**
45
     * @var GoogleApiQuotaInterface
46
     */
47
    private $quota;
48
    /**
49
     * @var GoogleBooksInterface
50
     */
51
    private $googleBooksAdapter;
52
    /**
53
     * @var LoggerInterface|null
54
     */
55
    private $logger;
56
57
    /**
58
     * GoogleTransformer constructor.
59
     * todo dependency injection
60
     */
61
    public function __construct(
62
        GoogleApiQuotaInterface $googleApiQuota,
63
        GoogleBooksInterface $googleBooksAdapter,
64
        LoggerInterface $logger = null)
65
    {
66
        $this->quota = $googleApiQuota;
67
        $this->googleBooksAdapter = $googleBooksAdapter;
68
        $this->logger = $logger;
69
    }
70
71
    /**
72
     * process page wikitext. Return wikitext with the <ref> converted.
73
     *
74
     * @param string $text Page wikitext
75
     *
76
     * @return string New wikitext
77
     * @throws Throwable
78
     */
79
    public function process(string $text): string
80
    {
81
        if ($this->quota->isQuotaReached()) {
0 ignored issues
show
Bug introduced by
The method isQuotaReached() does not exist on App\Domain\Infrastructur...GoogleApiQuotaInterface. Since it exists in all sub-types, consider adding an abstract or default implementation to App\Domain\Infrastructur...GoogleApiQuotaInterface. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

81
        if ($this->quota->/** @scrutinizer ignore-call */ isQuotaReached()) {
Loading history...
82
            throw new DomainException('Quota Google atteint');
83
        }
84
85
        $refsData = $this->extractAllGoogleRefs($text);
86
        if ($refsData !== []) {
87
            $text = $this->processRef($text, $refsData);
88
        }
89
90
        $links = $this->extractGoogleExternalBullets($text);
91
        if ($links !== []) {
92
            $text = $this->processExternLinks($text, $links);
93
        }
94
95
        return $text;
96
    }
97
98
    /**
99
     * Extract all <ref>/{ref} with only GoogleBooks URL.
100
     * Todo : supprimer point final URL
101
     *
102
     * @param string $text Page wikitext
103
     *
104
     * @return array [0 => ['<ref>http...</ref>', 'http://'], 1 => ...]
105
     */
106
    private function extractAllGoogleRefs(string $text): array
107
    {
108
        // <ref>...</ref> or {{ref|...}}
109
        // GoogleLivresTemplate::GOOGLEBOOK_URL_PATTERN
110
        if (preg_match_all(
111
            '#(?:<ref[^>]*>|{{ref\|) ?('.GoogleBooksUtil::GOOGLEBOOKS_START_URL_PATTERN.'[^>\]} \n]+) ?(?:</ref>|}})#i',
112
            $text,
113
            $matches,
114
            PREG_SET_ORDER
115
        )
116
        ) {
117
            return $matches;
118
        }
119
120
        return [];
121
    }
122
123
    private function processRef(string $text, array $refsData): string
124
    {
125
        foreach ($refsData as $ref) {
126
            if ($this->quota->isQuotaReached()) {
127
                throw new DomainException('Quota Google atteint');
128
            }
129
            try {
130
                $citation = $this->convertGBurl2OuvrageCitation(WikiTextUtil::stripFinalPoint($ref[1]));
131
                sleep(2);
132
            } catch (Throwable $e) {
133
                echo "Exception ".$e->getMessage();
134
                continue;
135
            }
136
137
            // ajout point final pour référence
138
            $citation .= '.';
139
140
            $newRef = str_replace($ref[1], $citation, $ref[0]);
141
            echo $newRef."\n";
142
143
            $text = str_replace($ref[0], $newRef, $text);
144
145
            echo "sleep ".self::SLEEP_GOOGLE_API_INTERVAL."\n";
146
            sleep(self::SLEEP_GOOGLE_API_INTERVAL);
147
        }
148
149
        return $text;
150
    }
151
152
    /**
153
     * TODO : extract. TODO private ?
154
     * Convert GoogleBooks URL to wiki-template {ouvrage} citation.
155
     * Need GoogleBooksAdapter injection.
156
     *
157
     * @throws Throwable
158
     */
159
    public function convertGBurl2OuvrageCitation(string $url): string
160
    {
161
        if (!GoogleBooksUtil::isGoogleBookURL($url)) {
162
            throw new DomainException('Pas de URL Google Books');
163
        }
164
165
        $gooDat = GoogleBooksUtil::parseGoogleBookQuery($url);
166
        if (empty($gooDat['isbn']) && empty($gooDat['id'])) {
167
            throw new DomainException('Pas de ISBN ou ID Google Books');
168
        }
169
170
        try {
171
            $identifiant = $gooDat['id'] ?? $gooDat['isbn'];
172
            $isISBN = !empty($gooDat['isbn']);
173
            $ouvrage = $this->generateOuvrageFromGoogleData($identifiant, $isISBN);
174
        } catch (Throwable $e) {
175
            // ID n'existe pas sur Google Books
176
            if (strpos($e->getMessage(), '"message": "The volume ID could n')) {
177
                return sprintf(
178
                    '{{lien brisé |url= %s |titre=%s |brisé le=%s}}',
179
                    $url,
180
                    'Ouvrage inexistant sur Google Books',
181
                    date('d-m-Y')
182
                );
183
            }
184
            throw $e;
185
        }
186
187
        $cleanUrl = GoogleBooksUtil::simplifyGoogleUrl($url);
188
        $ouvrage->unsetParam('présentation en ligne');
189
        $ouvrage->setParam('lire en ligne', $cleanUrl);
190
        $ouvrage->userSeparator = ' |';
191
192
        // Si titre absent
193
        if (!$ouvrage->hasParamValue('titre')) {
194
            throw new DomainException("Ouvrage sans titre (data Google?)");
195
        }
196
197
        // Google page => 'passage'
198
        if (!empty($gooDat['pg'])) {
199
            // Exclusion de page=1, page=2 (vue par défaut sur Google Book)
200
            if (preg_match('#(?:PA|PT)(\d+)$#', $gooDat['pg'], $matches) && (int) $matches[1] >= 3) {
201
                $page = $matches[1];
202
            }
203
            // conversion chiffres Romain pour PR
204
            // Exclusion de page=1, page=2 (vue par défaut sur Google Book)
205
            if (preg_match('#PR(\d+)$#', $gooDat['pg'], $matches) && (int) $matches[1] >= 3) {
206
                $page = NumberUtil::arab2roman((int) $matches[1], true);
207
            }
208
209
            if (!empty($page)) {
210
                $ouvrage->setParam('passage', $page);
211
                // ajout commentaire '<!-- utile? -->' ?
212
            }
213
        }
214
215
        $optimizer = OptimizerFactory::fromTemplate($ouvrage, null, $this->logger = null);
216
        $optimizer->doTasks();
217
        $ouvrage2 = $optimizer->getOptiTemplate();
218
219
        return $ouvrage2->serialize();
220
    }
221
222
    /**
223
     * todo: move (injection) to other class.
224
     * Generate wiki-template {ouvrage} from GoogleBook ID.
225
     *
226
     * @param string    $id GoogleBooks ID
227
     * @param bool|null $isISBN
228
     *
229
     * @return OuvrageTemplate
230
     * @throws Exception
231
     */
232
    private function generateOuvrageFromGoogleData(string $id, ?bool $isISBN = false): OuvrageTemplate
233
    {
234
        // return cached OuvrageTemplate
235
        if (!$isISBN && isset($this->cacheOuvrageTemplate[$id])) {
236
            return clone $this->cacheOuvrageTemplate[$id];
237
        }
238
239
        // Get Google data by ID ZvhBAAAAcAAJ
240
        $volume = $isISBN === true
241
            ? $this->googleBooksAdapter->getDataByIsbn($id)
0 ignored issues
show
Bug introduced by
The method getDataByIsbn() does not exist on App\Domain\Infrastructur...ts\GoogleBooksInterface. Since it exists in all sub-types, consider adding an abstract or default implementation to App\Domain\Infrastructur...ts\GoogleBooksInterface. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

241
            ? $this->googleBooksAdapter->/** @scrutinizer ignore-call */ getDataByIsbn($id)
Loading history...
242
            : $this->googleBooksAdapter->getDataByGoogleId($id);
0 ignored issues
show
Bug introduced by
The method getDataByGoogleId() does not exist on App\Domain\Infrastructur...ts\GoogleBooksInterface. Since it exists in all sub-types, consider adding an abstract or default implementation to App\Domain\Infrastructur...ts\GoogleBooksInterface. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

242
            : $this->googleBooksAdapter->/** @scrutinizer ignore-call */ getDataByGoogleId($id);
Loading history...
243
        if (!$volume instanceof Volume) {
244
            throw new DomainException('googleBooks Volume not found for that GB-id/isbn');
245
        }
246
247
        $mapper = new GoogleBookMapper();
248
        $mapper->mapLanguageData(true);
249
        $data = $mapper->process($volume);
250
251
        // Generate wiki-template {ouvrage}
252
        $ouvrage = WikiTemplateFactory::create('ouvrage');
253
        $ouvrage->hydrate($data);
254
        $ouvrage->setParam('consulté le', date('d-m-Y'));
255
256
        // cache
257
        $this->cacheOuvrageTemplate[$id] = clone $ouvrage;
258
259
        return $ouvrage;
260
    }
261
262
    /**
263
     * todo move
264
     *
265
     * @param string $text
266
     *
267
     * @return array
268
     */
269
    public function extractGoogleExternalBullets(string $text): array
270
    {
271
        // match "* https://books.google.fr/..."
272
        if (preg_match_all(
273
            '#^\* *('.GoogleBooksUtil::GOOGLEBOOKS_START_URL_PATTERN.'[^ <{\]}\n\r]+) *$#im',
274
            $text,
275
            $matches,
276
            PREG_SET_ORDER
277
        )
278
        ) {
279
            return $matches;
280
        }
281
282
        return [];
283
    }
284
285
    /**
286
     * TODO Duplication du dessus...
287
     *
288
     * @param string $text
289
     * @param array  $links
290
     *
291
     * @return string|string[]
292
     * @throws Throwable
293
     */
294
    private function processExternLinks(string $text, array $links)
295
    {
296
        foreach ($links as $pattern) {
297
            if ($this->quota->isQuotaReached()) {
298
                throw new DomainException('Quota Google atteint');
299
            }
300
            try {
301
                $citation = $this->convertGBurl2OuvrageCitation(WikiTextUtil::stripFinalPoint($pattern[1]));
302
            } catch (Exception $e) {
303
                echo "Exception ".$e->getMessage();
304
                continue;
305
            }
306
307
            // todo : ajout point final pour référence ???
308
            $citation .= '.';
309
310
            $newRef = str_replace($pattern[1], $citation, $pattern[0]);
311
            echo $newRef."\n";
312
313
            $text = str_replace($pattern[0], $newRef, $text);
314
315
            echo "sleep ".self::SLEEP_GOOGLE_API_INTERVAL."\n";
316
            sleep(self::SLEEP_GOOGLE_API_INTERVAL);
317
        }
318
319
        return $text;
320
    }
321
322
}
323