Passed
Push — dev ( 043eb4...bf3609 )
by Dispositif
06:23
created

RefGoogleBook   A

Complexity

Total Complexity 22

Size/Duplication

Total Lines 190
Duplicated Lines 0 %

Importance

Changes 4
Bugs 0 Features 0
Metric Value
eloc 73
dl 0
loc 190
rs 10
c 4
b 0
f 0
wmc 22

5 Methods

Rating   Name   Duplication   Size   Complexity  
A process() 0 30 4
A extractAllGoogleRefs() 0 15 2
C convertGBurl2OuvrageCitation() 0 66 13
A __construct() 0 1 1
A generateOuvrageFromGoogleData() 0 23 2
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain;
11
12
use App\Domain\Models\Wiki\GoogleLivresTemplate;
13
use App\Domain\Models\Wiki\OuvrageTemplate;
14
use App\Domain\Publisher\GoogleBookMapper;
15
use App\Domain\Utils\NumberUtil;
16
use App\Domain\Utils\WikiTextUtil;
17
use App\Infrastructure\GoogleBooksAdapter;
18
19
/**
20
 * Transform <ref>https://books.google...</ref> to <ref>{{Ouvrage|...}}.</ref>
21
 * in an article wikitext.
22
 * Class RefGoogleBook
23
 *
24
 * @package App\Domain
25
 */
26
class RefGoogleBook
27
{
28
    const SLEEP_GOOGLE_API_INTERVAL = 8;
29
30
    /**
31
     * @var array OuvrageTemplate[]
32
     */
33
    private $cacheOuvrageTemplate = [];
34
35
    /**
36
     * RefGoogleBook constructor.
37
     * todo dependency injection
38
     */
39
    public function __construct() { }
40
41
    /**
42
     * Process page wikitext. Return wikitext with the <ref> converted.
43
     *
44
     * @param string $text Page wikitext
45
     *
46
     * @return string New wikitext
47
     * @throws \Throwable
48
     */
49
    public function process(string $text): string
50
    {
51
        $refsData = $this->extractAllGoogleRefs($text);
52
        if (empty($refsData)) {
53
            echo "Pas d'URL GB trouvée\n";
54
55
            return $text;
56
        }
57
58
        foreach ($refsData as $ref) {
59
            try {
60
                $citation = $this->convertGBurl2OuvrageCitation(WikiTextUtil::stripFinalPoint($ref[1]));
61
            } catch (\Exception $e) {
62
                echo "Exception ".$e->getMessage();
63
                continue;
64
            }
65
66
            // ajout point final pour référence
67
            $citation .= '.';
68
69
            $newRef = str_replace($ref[1], $citation, $ref[0]);
70
            echo $newRef."\n";
71
72
            $text = str_replace($ref[0], $newRef, $text);
73
74
            echo "sleep ".self::SLEEP_GOOGLE_API_INTERVAL."\n";
75
            sleep(self::SLEEP_GOOGLE_API_INTERVAL);
76
        }
77
78
        return $text;
79
    }
80
81
    /**
82
     * TODO : extract
83
     * Convert GoogleBooks URL to wiki-template {ouvrage} citation.
84
     *
85
     * @param string $url GoogleBooks URL
86
     *
87
     * @return string {{ouvrage}}
88
     * @throws \Exception
89
     * @throws \Throwable
90
     */
91
    public function convertGBurl2OuvrageCitation(string $url): string
92
    {
93
        if (!GoogleLivresTemplate::isGoogleBookURL($url)) {
94
            throw new \DomainException('Pas de URL Google Books');
95
        }
96
97
        $gooDat = GoogleLivresTemplate::parseGoogleBookQuery($url);
98
        if (empty($gooDat['id'])) {
99
            throw new \DomainException('Pas de ID Google Books');
100
        }
101
102
        try {
103
            $ouvrage = $this->generateOuvrageFromGoogleData($gooDat['id']);
104
        } catch (\Throwable $e) {
105
            // ID n'existe pas sur Google Books
106
            if (strpos($e->getMessage(), '404 Not Found')
107
                && strpos($e->getMessage(), '"message": "The volume ID could n')
108
            ) {
109
                return sprintf(
110
                    '{{lien brisé |url= %s |titre= %s |brisé le=%s}}',
111
                    $url,
112
                    'Ouvrage inexistant sur Google Books',
113
                    date('d-m-Y')
114
                );
115
            }
116
            throw $e;
117
        }
118
119
120
        $cleanUrl = GoogleLivresTemplate::simplifyGoogleUrl($url);
121
        $ouvrage->unsetParam('présentation en ligne');
122
        $ouvrage->setParam('lire en ligne', $cleanUrl);
123
        $ouvrage->userSeparator = ' |';
124
125
        // Si titre absent
126
        if (empty($ouvrage->getParam('titre'))) {
127
            throw new \DomainException("Ouvrage sans titre (data Google?)");
128
        }
129
130
        // Google page => 'passage'
131
        if (!empty($gooDat['pg'])) {
132
            if (preg_match('#(?:PA|PT)([0-9]+)$#', $gooDat['pg'], $matches)) {
133
                // Exclusion de page=1, page=2 (vue par défaut sur Google Book)
134
                if (intval($matches[1]) >= 3) {
135
                    $page = $matches[1];
136
                }
137
            }
138
            // conversion chiffres Romain pour PR
139
            if (preg_match('#PR([0-9]+)$#', $gooDat['pg'], $matches)) {
140
                // Exclusion de page=1, page=2 (vue par défaut sur Google Book)
141
                if (intval($matches[1]) >= 3) {
142
                    $page = NumberUtil::arab2roman(intval($matches[1]), true);
143
                }
144
            }
145
146
            if (!empty($page)) {
147
                $ouvrage->setParam('passage', $page);
148
                // ajout commentaire '<!-- utile? -->' ?
149
            }
150
        }
151
152
        $optimizer = new OuvrageOptimize($ouvrage);
153
        $optimizer->doTasks();
154
        $ouvrage2 = $optimizer->getOuvrage();
155
156
        return $ouvrage2->serialize();
157
    }
158
159
    /**
160
     * todo: move (injection) to other class.
161
     * Generate wiki-template {ouvrage} from GoogleBook ID.
162
     *
163
     * @param string $id GoogleBooks ID
164
     *
165
     * @return OuvrageTemplate
166
     * @throws \Exception
167
     */
168
    private function generateOuvrageFromGoogleData(string $id): OuvrageTemplate
169
    {
170
        // return cached OuvrageTemplate
171
        if (isset($this->cacheOuvrageTemplate[$id])) {
172
            return clone $this->cacheOuvrageTemplate[$id];
173
        }
174
175
        // Get Google data by ID ZvhBAAAAcAAJ
176
        $adapter = new GoogleBooksAdapter();
177
        $volume = $adapter->getDataByGoogleId($id);
178
179
        $mapper = new GoogleBookMapper();
180
        $mapper->mapLanguageData(true);
181
        $data = $mapper->process($volume);
182
183
        // Generate wiki-template {ouvrage}
184
        $ouvrage = \App\Domain\WikiTemplateFactory::create('ouvrage');
185
        $ouvrage->hydrate($data);
186
187
        // cache
188
        $this->cacheOuvrageTemplate[$id] = clone $ouvrage;
189
190
        return $ouvrage;
191
    }
192
193
    /**
194
     * Extract all <ref>/{ref} with only GoogleBooks URL.
195
     * Todo : supprimer point final URL
196
     *
197
     * @param string $text Page wikitext
198
     *
199
     * @return array [0 => ['<ref>http...</ref>', 'http://'], 1 => ...]
200
     */
201
    private function extractAllGoogleRefs(string $text): array
202
    {
203
        // <ref>...</ref> or {{ref|...}}
204
        // GoogleLivresTemplate::GOOGLEBOOK_URL_PATTERN
205
        if (preg_match_all(
206
            '#(?:<ref[^>]*>|{{ref\|) ?('.GoogleLivresTemplate::GOOGLEBOOK_URL_PATTERN.'[^>\]} \n]+) ?(?:</ref>|}})#i',
207
            $text,
208
            $matches,
209
            PREG_SET_ORDER
210
        )
211
        ) {
212
            return $matches;
213
        }
214
215
        return [];
216
    }
217
}
218