Test Setup Failed
Push — master ( aa2bf5...c5c2a9 )
by Dispositif
04:06
created

RefGoogleBook   A

Complexity

Total Complexity 19

Size/Duplication

Total Lines 169
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 62
c 1
b 0
f 0
dl 0
loc 169
rs 10
wmc 19

5 Methods

Rating   Name   Duplication   Size   Complexity  
A process() 0 30 4
A extractAllGoogleRefs() 0 15 2
B convertGBurl2OuvrageCitation() 0 49 10
A __construct() 0 1 1
A generateOuvrageFromGoogleData() 0 22 2
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 © Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain;
11
12
use App\Domain\Models\Wiki\GoogleLivresTemplate;
13
use App\Domain\Models\Wiki\OuvrageTemplate;
14
use App\Domain\Publisher\GoogleBookMapper;
15
use App\Domain\Utils\NumberUtil;
16
use App\Domain\Utils\TextUtil;
17
use App\Domain\Utils\WikiTextUtil;
18
use App\Infrastructure\GoogleBooksAdapter;
19
20
/**
21
 * Transform <ref>https://books.google...</ref> to <ref>{{Ouvrage|...}}.</ref>
22
 * in an article wikitext.
23
 * Class RefGoogleBook
24
 *
25
 * @package App\Domain
26
 */
27
class RefGoogleBook
28
{
29
    const SLEEP_GOOGLE_API_INTERVAL = 8;
30
31
    /**
32
     * @var array OuvrageTemplate[]
33
     */
34
    private $cacheOuvrageTemplate = [];
35
36
    /**
37
     * RefGoogleBook constructor.
38
     * todo dependency injection
39
     */
40
    public function __construct() { }
41
42
    /**
43
     * Process page wikitext. Return wikitext with the <ref> converted.
44
     *
45
     * @param string $text Page wikitext
46
     *
47
     * @return string New wikitext
48
     */
49
    public function process(string $text): string
50
    {
51
        $refsData = $this->extractAllGoogleRefs($text);
52
        if (empty($refsData)) {
53
            echo "Pas d'URL GB trouvée";
54
55
            return $text;
56
        }
57
58
        foreach ($refsData as $ref) {
59
            try {
60
                $citation = $this->convertGBurl2OuvrageCitation($ref[1]);
61
            } catch (\Exception $e) {
62
                echo "Exception ".$e->getMessage();
63
                continue;
64
            }
65
66
            // ajout point final pour référence
67
            $citation .= '.';
68
69
            $newRef = str_replace($ref[1], $citation, $ref[0]);
70
            echo $newRef."\n";
71
72
            $text = str_replace($ref[0], $newRef, $text);
73
74
            echo "sleep ".self::SLEEP_GOOGLE_API_INTERVAL."\n";
75
            sleep(self::SLEEP_GOOGLE_API_INTERVAL);
76
        }
77
78
        return $text;
79
    }
80
81
    /**
82
     * Convert GoogleBooks URL to wiki-template {ouvrage} citation.
83
     *
84
     * @param string $url GoogleBooks URL
85
     *
86
     * @return string {{ouvrage}}
87
     * @throws \Exception
88
     */
89
    private function convertGBurl2OuvrageCitation(string $url): string
90
    {
91
        if (!GoogleLivresTemplate::isGoogleBookURL($url)) {
92
            throw new \DomainException('Pas de URL Google Books');
93
        }
94
95
        $gooDat = GoogleLivresTemplate::parseGoogleBookQuery($url);
96
        if (empty($gooDat['id'])) {
97
            throw new \DomainException('Pas de ID Google Books');
98
        }
99
100
        $ouvrage = $this->generateOuvrageFromGoogleData($gooDat['id']);
101
        $cleanUrl = GoogleLivresTemplate::simplifyGoogleUrl($url);
102
        $ouvrage->unsetParam('présentation en ligne');
103
        $ouvrage->setParam('lire en ligne', $cleanUrl);
104
        $ouvrage->userSeparator = ' |';
105
106
        // Si titre absent
107
        if (empty($ouvrage->getParam('titre'))) {
108
            throw new \DomainException("Ouvrage sans titre (data Google?)");
109
        }
110
111
        // Google page => 'passage'
112
        if (!empty($gooDat['pg'])) {
113
            if (preg_match('#(?:PA|PT)([0-9]+)$#', $gooDat['pg'], $matches)) {
114
                // Exclusion de page=1, page=2 (vue par défaut sur Google Book)
115
                if (intval($matches[1]) >= 3) {
116
                    $page = $matches[1];
117
                }
118
            }
119
            // conversion chiffres Romain pour PR
120
            if (preg_match('#PR([0-9]+)$#', $gooDat['pg'], $matches)) {
121
                // Exclusion de page=1, page=2 (vue par défaut sur Google Book)
122
                if (intval($matches[1]) >= 3) {
123
                    $page = NumberUtil::arab2roman(intval($matches[1]));
124
                }
125
            }
126
127
            if (isset($page)) {
128
                $ouvrage->setParam('passage', $page);
129
                // ajout commentaire '<!-- utile? -->' ?
130
            }
131
        }
132
133
        $optimizer = new OuvrageOptimize($ouvrage);
134
        $optimizer->doTasks();
135
        $ouvrage2 = $optimizer->getOuvrage();
136
137
        return $ouvrage2->serialize();
138
    }
139
140
    /**
141
     * todo: move (injection) to other class.
142
     * Generate wiki-template {ouvrage} from GoogleBook ID.
143
     *
144
     * @param string $id GoogleBooks ID
145
     *
146
     * @return OuvrageTemplate
147
     * @throws \Exception
148
     */
149
    private function generateOuvrageFromGoogleData(string $id): OuvrageTemplate
150
    {
151
        // return cached OuvrageTemplate
152
        if (isset($this->cacheOuvrageTemplate[$id])) {
153
            return clone $this->cacheOuvrageTemplate[$id];
154
        }
155
156
        // Get Google data by ID ZvhBAAAAcAAJ
157
        $adapter = new GoogleBooksAdapter();
158
        $volume = $adapter->getDataByGoogleId($id);
159
160
        $mapper = new GoogleBookMapper();
161
        $data = $mapper->process($volume);
162
163
        // Generate wiki-template {ouvrage}
164
        $ouvrage = new OuvrageTemplate();
165
        $ouvrage->hydrate($data);
166
167
        // cache
168
        $this->cacheOuvrageTemplate[$id] = clone $ouvrage;
169
170
        return $ouvrage;
171
    }
172
173
    /**
174
     * Extract all <ref>/{ref} with only GoogleBooks URL.
175
     * Todo : supprimer point final URL
176
     *
177
     * @param string $text Page wikitext
178
     *
179
     * @return array [0 => ['<ref>http...</ref>', 'http://'], 1 => ...]
180
     */
181
    private function extractAllGoogleRefs(string $text): array
182
    {
183
        // <ref>...</ref> or {{ref|...}}
184
        if (preg_match_all(
185
            '#(?:<ref[^>]*>|{{ref\|) ?(https?://(?:books|play)\.google\.[a-z]{2,3}/(?:books)?(?:/reader)?\?id=[^>\]} \n]+) ?(?:</ref>|}})#i',
186
            $text,
187
            $matches,
188
            PREG_SET_ORDER
189
        )
190
        ) {
191
192
            return $matches;
193
        }
194
195
        return [];
196
    }
197
}
198