Passed
Push — master ( 5eccc7...1faa52 )
by Dispositif
02:45
created

GoogleTransformer::extractAllGoogleRefs()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 15
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
cc 2
eloc 7
nc 2
nop 1
dl 0
loc 15
ccs 0
cts 8
cp 0
crap 6
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain;
11
12
use App\Domain\Models\Wiki\OuvrageTemplate;
13
use App\Domain\Publisher\GoogleBookMapper;
14
use App\Domain\Publisher\GoogleBooksUtil;
15
use App\Domain\Utils\NumberUtil;
16
use App\Domain\Utils\WikiTextUtil;
17
use App\Infrastructure\GoogleApiQuota;
18
use App\Infrastructure\GoogleBooksAdapter;
19
use App\Infrastructure\Logger;
20
use DomainException;
21
use Exception;
22
use Throwable;
23
24
/**
25
 * TODO REFAC : duplicate, extract methods in trait or in RefBotWorker + ExternBotWorker
26
 * --
27
 * Transform <ref>https://books.google...</ref> to <ref>{{Ouvrage|...}}.</ref>
28
 * in an article wikitext.
29
 * Class GoogleTransformer
30
 *
31
 * @package App\Domain
32
 */
33
class GoogleTransformer
34
{
35
    const SLEEP_GOOGLE_API_INTERVAL = 5;
36
37
    /**
38
     * @var array OuvrageTemplate[]
39
     */
40
    private $cacheOuvrageTemplate = [];
41
    /**
42
     * @var GoogleApiQuota
43
     */
44
    private $quota;
45
46
    /**
47
     * GoogleTransformer constructor.
48
     * todo dependency injection
49
     */
50 1
    public function __construct()
51
    {
52 1
        $this->quota = new GoogleApiQuota();
53 1
    }
54
55
    /**
56
     * process page wikitext. Return wikitext with the <ref> converted.
57
     *
58
     * @param string $text Page wikitext
59
     *
60
     * @return string New wikitext
61
     * @throws Throwable
62
     */
63
    public function process(string $text): string
64
    {
65
        if ($this->quota->isQuotaReached()) {
66
            throw new DomainException('Quota Google atteint');
67
        }
68
69
        $refsData = $this->extractAllGoogleRefs($text);
70
        if (!empty($refsData)) {
71
            $text = $this->processRef($text, $refsData);
72
        }
73
74
        $links = $this->extractGoogleExternal($text);
75
        if (!empty($links)) {
76
            $text = $this->processExternLinks($text, $links);
77
        }
78
79
        return $text;
80
    }
81
82
    /**
83
     * todo move
84
     *
85
     * @param string $text
86
     *
87
     * @return array
88
     */
89 1
    public function extractGoogleExternal(string $text): array
90
    {
91
        // match "* https://books.google.fr/..."
92 1
        if (preg_match_all(
93 1
            '#^\* *('.GoogleBooksUtil::GOOGLEBOOKS_START_URL_PATTERN.'[^ <{\]}\n\r]+) *$#im',
94 1
            $text,
95 1
            $matches,
96 1
            PREG_SET_ORDER
97
        )
98
        ) {
99 1
            return $matches;
100
        }
101
102
        return [];
103
    }
104
105
    /**
106
     * TODO Duplication du dessus...
107
     *
108
     * @param string $text
109
     * @param array  $links
110
     *
111
     * @return string|string[]
112
     * @throws Throwable
113
     */
114
    private function processExternLinks(string $text, array $links)
115
    {
116
        foreach ($links as $pattern) {
117
            if ($this->quota->isQuotaReached()) {
118
                throw new DomainException('Quota Google atteint');
119
            }
120
            try {
121
                $citation = $this->convertGBurl2OuvrageCitation(WikiTextUtil::stripFinalPoint($pattern[1]));
122
            } catch (Exception $e) {
123
                echo "Exception ".$e->getMessage();
124
                continue;
125
            }
126
127
            // todo : ajout point final pour référence ???
128
            $citation .= '.';
129
130
            $newRef = str_replace($pattern[1], $citation, $pattern[0]);
131
            echo $newRef."\n";
132
133
            $text = str_replace($pattern[0], $newRef, $text);
134
135
            echo "sleep ".self::SLEEP_GOOGLE_API_INTERVAL."\n";
136
            sleep(self::SLEEP_GOOGLE_API_INTERVAL);
137
        }
138
139
        return $text;
140
    }
141
142
    /**
143
     * TODO : extract
144
     * Convert GoogleBooks URL to wiki-template {ouvrage} citation.
145
     *
146
     * @param string $url GoogleBooks URL
147
     *
148
     * @return string {{ouvrage}}
149
     * @throws Exception
150
     * @throws Throwable
151
     */
152
    public function convertGBurl2OuvrageCitation(string $url): string
153
    {
154
        if (!GoogleBooksUtil::isGoogleBookURL($url)) {
155
            throw new DomainException('Pas de URL Google Books');
156
        }
157
158
        $gooDat = GoogleBooksUtil::parseGoogleBookQuery($url);
159
        if (empty($gooDat['id'])) {
160
            throw new DomainException('Pas de ID Google Books');
161
        }
162
163
        try {
164
            $ouvrage = $this->generateOuvrageFromGoogleData($gooDat['id']);
165
        } catch (Throwable $e) {
166
            // ID n'existe pas sur Google Books
167
            if (strpos($e->getMessage(), '"message": "The volume ID could n')) {
168
                return sprintf(
169
                    '{{lien brisé |url= %s |titre= %s |brisé le=%s}}',
170
                    $url,
171
                    'Ouvrage inexistant sur Google Books',
172
                    date('d-m-Y')
173
                );
174
            }
175
            throw $e;
176
        }
177
178
        $cleanUrl = GoogleBooksUtil::simplifyGoogleUrl($url);
179
        $ouvrage->unsetParam('présentation en ligne');
180
        $ouvrage->setParam('lire en ligne', $cleanUrl);
181
        $ouvrage->userSeparator = ' |';
182
183
        // Si titre absent
184
        if (!$ouvrage->hasParamValue('titre')) {
185
            throw new DomainException("Ouvrage sans titre (data Google?)");
186
        }
187
188
        // Google page => 'passage'
189
        if (!empty($gooDat['pg'])) {
190
            if (preg_match('#(?:PA|PT)([0-9]+)$#', $gooDat['pg'], $matches)) {
191
                // Exclusion de page=1, page=2 (vue par défaut sur Google Book)
192
                if (intval($matches[1]) >= 3) {
193
                    $page = $matches[1];
194
                }
195
            }
196
            // conversion chiffres Romain pour PR
197
            if (preg_match('#PR([0-9]+)$#', $gooDat['pg'], $matches)) {
198
                // Exclusion de page=1, page=2 (vue par défaut sur Google Book)
199
                if (intval($matches[1]) >= 3) {
200
                    $page = NumberUtil::arab2roman(intval($matches[1]), true);
201
                }
202
            }
203
204
            if (!empty($page)) {
205
                $ouvrage->setParam('passage', $page);
206
                // ajout commentaire '<!-- utile? -->' ?
207
            }
208
        }
209
210
        $optimizer = new OuvrageOptimize($ouvrage, null, new Logger());
211
        $optimizer->doTasks();
212
        $ouvrage2 = $optimizer->getOuvrage();
213
214
        return $ouvrage2->serialize();
215
    }
216
217
    /**
218
     * todo: move (injection) to other class.
219
     * Generate wiki-template {ouvrage} from GoogleBook ID.
220
     *
221
     * @param string $id GoogleBooks ID
222
     *
223
     * @return OuvrageTemplate
224
     * @throws Exception
225
     */
226
    private function generateOuvrageFromGoogleData(string $id): OuvrageTemplate
227
    {
228
        // return cached OuvrageTemplate
229
        if (isset($this->cacheOuvrageTemplate[$id])) {
230
            return clone $this->cacheOuvrageTemplate[$id];
231
        }
232
233
        // Get Google data by ID ZvhBAAAAcAAJ
234
        $adapter = new GoogleBooksAdapter();
235
        $volume = $adapter->getDataByGoogleId($id);
236
237
        $mapper = new GoogleBookMapper();
238
        $mapper->mapLanguageData(true);
239
        $data = $mapper->process($volume);
240
241
        // Generate wiki-template {ouvrage}
242
        $ouvrage = WikiTemplateFactory::create('ouvrage');
243
        $ouvrage->hydrate($data);
244
245
        // cache
246
        $this->cacheOuvrageTemplate[$id] = clone $ouvrage;
247
248
        return $ouvrage;
249
    }
250
251
    /**
252
     * Extract all <ref>/{ref} with only GoogleBooks URL.
253
     * Todo : supprimer point final URL
254
     *
255
     * @param string $text Page wikitext
256
     *
257
     * @return array [0 => ['<ref>http...</ref>', 'http://'], 1 => ...]
258
     */
259
    private function extractAllGoogleRefs(string $text): array
260
    {
261
        // <ref>...</ref> or {{ref|...}}
262
        // GoogleLivresTemplate::GOOGLEBOOK_URL_PATTERN
263
        if (preg_match_all(
264
            '#(?:<ref[^>]*>|{{ref\|) ?('.GoogleBooksUtil::GOOGLEBOOKS_START_URL_PATTERN.'[^>\]} \n]+) ?(?:</ref>|}})#i',
265
            $text,
266
            $matches,
267
            PREG_SET_ORDER
268
        )
269
        ) {
270
            return $matches;
271
        }
272
273
        return [];
274
    }
275
276
    private function processRef(string $text, array $refsData): string
277
    {
278
        foreach ($refsData as $ref) {
279
            if ($this->quota->isQuotaReached()) {
280
                throw new DomainException('Quota Google atteint');
281
            }
282
            try {
283
                $citation = $this->convertGBurl2OuvrageCitation(WikiTextUtil::stripFinalPoint($ref[1]));
284
                sleep(2);
285
            } catch (\Throwable $e) {
286
                echo "Exception ".$e->getMessage();
287
                continue;
288
            }
289
290
            // ajout point final pour référence
291
            $citation .= '.';
292
293
            $newRef = str_replace($ref[1], $citation, $ref[0]);
294
            echo $newRef."\n";
295
296
            $text = str_replace($ref[0], $newRef, $text);
297
298
            echo "sleep ".self::SLEEP_GOOGLE_API_INTERVAL."\n";
299
            sleep(self::SLEEP_GOOGLE_API_INTERVAL);
300
        }
301
302
        return $text;
303
    }
304
305
}
306