Passed
Push — master ( 26e53d...a73e13 )
by Dispositif
09:44
created

GoogleTransformer::processRef()   A

Complexity

Conditions 4
Paths 5

Size

Total Lines 27
Code Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 20

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 4
eloc 16
c 1
b 0
f 0
nc 5
nop 2
dl 0
loc 27
ccs 0
cts 12
cp 0
crap 20
rs 9.7333
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain;
11
12
use App\Domain\Models\Wiki\OuvrageTemplate;
13
use App\Domain\Publisher\GoogleBookMapper;
14
use App\Domain\Publisher\GoogleBooksUtil;
15
use App\Domain\Utils\NumberUtil;
16
use App\Domain\Utils\WikiTextUtil;
17
use App\Infrastructure\GoogleApiQuota;
18
use App\Infrastructure\GoogleBooksAdapter;
19
use App\Infrastructure\Logger;
20
use DomainException;
21
use Exception;
22
use Throwable;
23
24
/**
25
 * TODO REFAC : duplicate, extract methods in trait or in RefBotWorker + ExternBotWorker
26
 * --
27
 * Transform <ref>https://books.google...</ref> to <ref>{{Ouvrage|...}}.</ref>
28
 * in an article wikitext.
29
 * Class GoogleTransformer
30
 *
31
 * @package App\Domain
32
 */
33
class GoogleTransformer
34
{
35
    const SLEEP_GOOGLE_API_INTERVAL = 5;
36
37
    /**
38
     * @var array OuvrageTemplate[]
39
     */
40
    private $cacheOuvrageTemplate = [];
41
    /**
42
     * @var GoogleApiQuota
43
     */
44
    private $quota;
45
46
    /**
47
     * GoogleTransformer constructor.
48
     * todo dependency injection
49
     */
50 1
    public function __construct()
51
    {
52 1
        $this->quota = new GoogleApiQuota();
53 1
    }
54
55
    /**
56
     * process page wikitext. Return wikitext with the <ref> converted.
57
     *
58
     * @param string $text Page wikitext
59
     *
60
     * @return string New wikitext
61
     * @throws Throwable
62
     */
63
    public function process(string $text): string
64
    {
65
        if ($this->quota->isQuotaReached()) {
66
            throw new DomainException('Quota Google atteint');
67
        }
68
69
        $refsData = $this->extractAllGoogleRefs($text);
70
        if (!empty($refsData)) {
71
            $text = $this->processRef($text, $refsData);
72
        }
73
74
        $links = $this->extractGoogleExternalBullets($text);
75
        if (!empty($links)) {
76
            $text = $this->processExternLinks($text, $links);
77
        }
78
79
        return $text;
80
    }
81
82
    /**
83
     * Extract all <ref>/{ref} with only GoogleBooks URL.
84
     * Todo : supprimer point final URL
85
     *
86
     * @param string $text Page wikitext
87
     *
88
     * @return array [0 => ['<ref>http...</ref>', 'http://'], 1 => ...]
89 1
     */
90
    private function extractAllGoogleRefs(string $text): array
91
    {
92 1
        // <ref>...</ref> or {{ref|...}}
93 1
        // GoogleLivresTemplate::GOOGLEBOOK_URL_PATTERN
94 1
        if (preg_match_all(
95 1
            '#(?:<ref[^>]*>|{{ref\|) ?('.GoogleBooksUtil::GOOGLEBOOKS_START_URL_PATTERN.'[^>\]} \n]+) ?(?:</ref>|}})#i',
96 1
            $text,
97
            $matches,
98
            PREG_SET_ORDER
99 1
        )
100
        ) {
101
            return $matches;
102
        }
103
104
        return [];
105
    }
106
107
    private function processRef(string $text, array $refsData): string
108
    {
109
        foreach ($refsData as $ref) {
110
            if ($this->quota->isQuotaReached()) {
111
                throw new DomainException('Quota Google atteint');
112
            }
113
            try {
114
                $citation = $this->convertGBurl2OuvrageCitation(WikiTextUtil::stripFinalPoint($ref[1]));
115
                sleep(2);
116
            } catch (Throwable $e) {
117
                echo "Exception ".$e->getMessage();
118
                continue;
119
            }
120
121
            // ajout point final pour référence
122
            $citation .= '.';
123
124
            $newRef = str_replace($ref[1], $citation, $ref[0]);
125
            echo $newRef."\n";
126
127
            $text = str_replace($ref[0], $newRef, $text);
128
129
            echo "sleep ".self::SLEEP_GOOGLE_API_INTERVAL."\n";
130
            sleep(self::SLEEP_GOOGLE_API_INTERVAL);
131
        }
132
133
        return $text;
134
    }
135
136
    /**
137
     * TODO : extract
138
     * Convert GoogleBooks URL to wiki-template {ouvrage} citation.
139
     *
140
     * @param string $url GoogleBooks URL
141
     *
142
     * @return string {{ouvrage}}
143
     * @throws Exception
144
     * @throws Throwable
145
     */
146
    public function convertGBurl2OuvrageCitation(string $url): string
147
    {
148
        if (!GoogleBooksUtil::isGoogleBookURL($url)) {
149
            throw new DomainException('Pas de URL Google Books');
150
        }
151
152
        $gooDat = GoogleBooksUtil::parseGoogleBookQuery($url);
153
        if (empty($gooDat['id'])) {
154
            throw new DomainException('Pas de ID Google Books');
155
        }
156
157
        try {
158
            $ouvrage = $this->generateOuvrageFromGoogleData($gooDat['id']);
159
        } catch (Throwable $e) {
160
            // ID n'existe pas sur Google Books
161
            if (strpos($e->getMessage(), '"message": "The volume ID could n')) {
162
                return sprintf(
163
                    '{{lien brisé |url= %s |titre=%s |brisé le=%s}}',
164
                    $url,
165
                    'Ouvrage inexistant sur Google Books',
166
                    date('d-m-Y')
167
                );
168
            }
169
            throw $e;
170
        }
171
172
        $cleanUrl = GoogleBooksUtil::simplifyGoogleUrl($url);
173
        $ouvrage->unsetParam('présentation en ligne');
174
        $ouvrage->setParam('lire en ligne', $cleanUrl);
175
        $ouvrage->userSeparator = ' |';
176
177
        // Si titre absent
178
        if (!$ouvrage->hasParamValue('titre')) {
179
            throw new DomainException("Ouvrage sans titre (data Google?)");
180
        }
181
182
        // Google page => 'passage'
183
        if (!empty($gooDat['pg'])) {
184
            if (preg_match('#(?:PA|PT)([0-9]+)$#', $gooDat['pg'], $matches)) {
185
                // Exclusion de page=1, page=2 (vue par défaut sur Google Book)
186
                if (intval($matches[1]) >= 3) {
187
                    $page = $matches[1];
188
                }
189
            }
190
            // conversion chiffres Romain pour PR
191
            if (preg_match('#PR([0-9]+)$#', $gooDat['pg'], $matches)) {
192
                // Exclusion de page=1, page=2 (vue par défaut sur Google Book)
193
                if (intval($matches[1]) >= 3) {
194
                    $page = NumberUtil::arab2roman(intval($matches[1]), true);
195
                }
196
            }
197
198
            if (!empty($page)) {
199
                $ouvrage->setParam('passage', $page);
200
                // ajout commentaire '<!-- utile? -->' ?
201
            }
202
        }
203
204
        $optimizer = OptimizerFactory::fromTemplate($ouvrage, null, new Logger());
205
        $optimizer->doTasks();
206
        $ouvrage2 = $optimizer->getOptiTemplate();
207
208
        return $ouvrage2->serialize();
209
    }
210
211
    /**
212
     * todo: move (injection) to other class.
213
     * Generate wiki-template {ouvrage} from GoogleBook ID.
214
     *
215
     * @param string $id GoogleBooks ID
216
     *
217
     * @return OuvrageTemplate
218
     * @throws Exception
219
     */
220
    private function generateOuvrageFromGoogleData(string $id): OuvrageTemplate
221
    {
222
        // return cached OuvrageTemplate
223
        if (isset($this->cacheOuvrageTemplate[$id])) {
224
            return clone $this->cacheOuvrageTemplate[$id];
225
        }
226
227
        // Get Google data by ID ZvhBAAAAcAAJ
228
        $adapter = new GoogleBooksAdapter();
229
        $volume = $adapter->getDataByGoogleId($id);
230
231
        $mapper = new GoogleBookMapper();
232
        $mapper->mapLanguageData(true);
233
        $data = $mapper->process($volume);
234
235
        // Generate wiki-template {ouvrage}
236
        $ouvrage = WikiTemplateFactory::create('ouvrage');
237
        $ouvrage->hydrate($data);
238
239
        // cache
240
        $this->cacheOuvrageTemplate[$id] = clone $ouvrage;
241
242
        return $ouvrage;
243
    }
244
245
    /**
246
     * todo move
247
     *
248
     * @param string $text
249
     *
250
     * @return array
251
     */
252
    public function extractGoogleExternalBullets(string $text): array
253
    {
254
        // match "* https://books.google.fr/..."
255
        if (preg_match_all(
256
            '#^\* *('.GoogleBooksUtil::GOOGLEBOOKS_START_URL_PATTERN.'[^ <{\]}\n\r]+) *$#im',
257
            $text,
258
            $matches,
259
            PREG_SET_ORDER
260
        )
261
        ) {
262
            return $matches;
263
        }
264
265
        return [];
266
    }
267
268
    /**
269
     * TODO Duplication du dessus...
270
     *
271
     * @param string $text
272
     * @param array  $links
273
     *
274
     * @return string|string[]
275
     * @throws Throwable
276
     */
277
    private function processExternLinks(string $text, array $links)
278
    {
279
        foreach ($links as $pattern) {
280
            if ($this->quota->isQuotaReached()) {
281
                throw new DomainException('Quota Google atteint');
282
            }
283
            try {
284
                $citation = $this->convertGBurl2OuvrageCitation(WikiTextUtil::stripFinalPoint($pattern[1]));
285
            } catch (Exception $e) {
286
                echo "Exception ".$e->getMessage();
287
                continue;
288
            }
289
290
            // todo : ajout point final pour référence ???
291
            $citation .= '.';
292
293
            $newRef = str_replace($pattern[1], $citation, $pattern[0]);
294
            echo $newRef."\n";
295
296
            $text = str_replace($pattern[0], $newRef, $text);
297
298
            echo "sleep ".self::SLEEP_GOOGLE_API_INTERVAL."\n";
299
            sleep(self::SLEEP_GOOGLE_API_INTERVAL);
300
        }
301
302
        return $text;
303
    }
304
305
}
306