Passed
Push — master ( 5eccc7...1faa52 )
by Dispositif
02:45
created

TemplateParser   A

Complexity

Total Complexity 36

Size/Duplication

Total Lines 290
Duplicated Lines 0 %

Test Coverage

Coverage 87%

Importance

Changes 2
Bugs 0 Features 0
Metric Value
eloc 106
dl 0
loc 290
ccs 87
cts 100
cp 0.87
rs 9.52
c 2
b 0
f 0
wmc 36

8 Methods

Rating   Name   Duplication   Size   Complexity  
B parseDataFromTemplate() 0 43 6
A findFirstTemplateInText() 0 32 2
A findUserStyleSeparator() 0 16 4
B parseAllTemplateByName() 0 39 7
B explodeParameterValue() 0 41 10
A findAllTemplatesByName() 0 14 2
A encodeTemplatePipes() 0 10 3
A isMultispacedTemplate() 0 8 2
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 : Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Utils;
11
12
use App\Domain\Models\Wiki\AbstractWikiTemplate;
13
use App\Domain\WikiTemplateFactory;
14
use Exception;
15
use LogicException;
16
use Throwable;
17
18
/**
19
 * todo legacy.
20
 * Class TemplateParser.
21
 */
22
abstract class TemplateParser extends WikiTextUtil
23
{
24
    /**
25
     * todo : simplify array if only one occurrence ?
26
     * todo refac extract/logic.
27
     *
28
     * @param string $tplName
29
     * @param string $text
30
     *
31
     * @return array
32
     * @throws Exception
33
     */
34 2
    public static function parseAllTemplateByName(string $tplName, string $text): array
35
    {
36
        // Extract wikiText from that template
37 2
        $arrayTplText = self::findAllTemplatesByName($tplName, $text);
38
39 2
        if (empty($arrayTplText) || empty($arrayTplText[0])) {
40
            return [];
41
        }
42
43 2
        $result[$tplName] = [];
44 2
        $inc = -1;
45 2
        foreach ($arrayTplText as $tplText) {
46 2
            ++$inc;
47
            // store the raw text of the template
48 2
            $result[$tplName][$inc] = ['raw' => $tplText];
49
50
            // create an object of the template
51
            /**
52
             * @var AbstractWikiTemplate
53
             */
54
            try {
55 2
                $tplObject = WikiTemplateFactory::create($tplName);
56
            } catch (Throwable $e) {
57
                unset($e);
58
                continue;
59
            }
60
61 2
            if (!is_object($tplObject) || !is_subclass_of($tplObject, AbstractWikiTemplate::class)) {
62
                continue;
63
            }
64
65 2
            $data = self::parseDataFromTemplate($tplName, $tplText);
66 2
            $tplObject->hydrate($data);
67 2
            $tplObject->detectUserSeparator($tplText);
68
69 2
            $result[$tplName][$inc] += ['model' => $tplObject];
70
        }
71
72 2
        return (array)$result;
73
    }
74
75
    /**
76
     * Find all the recurrences of a wiki's template in a text.
77
     * Compatible with inclusion of sub-templates.
78
     * Example :
79
     * {{Infobox |pays={{pays|France}} }}
80
     * retourne array {{modèle|...}}.
81
     *
82
     * @param $templateName
83
     * @param $text
84
     *
85
     * @return array [ 0=>{{bla|...}}, 1=>{{bla|...}} ]
86
     */
87 2
    public static function findAllTemplatesByName(string $templateName, string $text): array
88
    {
89
        // TODO check {{fr}}
90 2
        $res = preg_match_all(
91 2
            "#{{[ \n]*".preg_quote(trim($templateName), '#')."[ \t \n\r]*\|[^{}]*(?:{{[^{}]+}}[^{}]*)*}}#i",
92 2
            $text,
93 2
            $matches
94
        );
95
96 2
        if (false === $res) {
97
            return [];
98
        }
99
100 2
        return $matches[0];
101
        //OK : preg_match_all("#\{\{".preg_quote(trim($nommodele), '#')."[ \t \n\r]*\|([^\{\}]*(\{\{[^\{\}]+\}\}[^\{\}]*)*)\}\}#i", $text, $matches);
102
    }
103
104
    /**
105
     * todo refactor + check if @notused
106
     * Parsing of any wiki template from text and templateName
107
     * Using the first {{template}} definition found in text
108
     * todo legacy.
109
     *
110
     * @param string $tplName
111
     * @param string $text
112
     *
113
     * @return array
114
     */
115 27
    public static function parseDataFromTemplate(string $tplName, string $text): array
116
    {
117 27
        $text = str_replace("\n", '', $text); // todo WTF ?
118
119
        // check {{template}} in text
120 27
        $tplFounded = self::findFirstTemplateInText($tplName, $text);
121
122
        // $matches[0] : {{template|...}}
123 27
        if ($tplFounded === null) {
1 ignored issue
show
introduced by
The condition $tplFounded === null is always true.
Loading history...
124
            throw new LogicException("Template $tplName not found in text");
125
        }
126
        // $matches[1] : url=blabla|titre=Popo
127 27
        if (false === $tplFounded[1]) {
128
            throw new LogicException("No parameters found in $tplName");
129
        }
130
        // sub-template pipe | encoding
131 27
        $tplFounded[1] = self::encodeTemplatePipes($tplFounded[1]);
132
133
        // x flag => "\ " for space
134 27
        $res = preg_match_all(
135 27
            "/
136
			(
137
	  			[^|=]*=?                          # parameter name (or nothing)
138
		 		(
139
					[^|{}\[\]<>]*               # reject <i>,<ref>
140
					(?:\[[^\[\]]+])?              # [url text] or [text]
141
					(?:<!--[^<>]+-->)?    # commentary <!-- -->
142
					(?:{{[^}{]+}})?          # {{template}} but KO with {{tmp|...}}
143
					                               # test : {{bla@PIPE@bla}}
144
					(?:\[\[[^]]+]])?            # [[fu|bar]]
145
					[^|{}\[\]]*                 # accept <i>,<ref>
146
		 		)*
147
	 		)\|?
148
		/x",
149 27
            $tplFounded[1],
150 27
            $wikiParams
151
        );
152
153 27
        if (false === $res || 0 === $res || empty($wikiParams[1])) {
154
            throw new LogicException("Parameters from template '$tplName' can't be parsed");
155
        }
156
157 27
        return self::explodeParameterValue($wikiParams[1]);
158
    }
159
160
    /**
161
     * For multiple occurrences see findAllTemplatesByName().
162
     *
163
     * @param string $templateName
164
     * @param string $text
165
     *
166
     * @return array|null
167
     */
168 27
    private static function findFirstTemplateInText(string $templateName, string $text): ?array
169
    {
170
        // BUG marche pas avec :
171
        //        $text = '{{Ouvrage|auteur1 = Clément|titre = Les Borgia {{nobr|Alexandre {{VI}}}}}}'; // to debug
172
        //        $templateName = 'ouvrage'; // to debug
173
174
        //        $text = str_replace("\n", '', $text); // ??? todo regex multiline or encode char
175
176
        // todo: replace <!-- --> by encode char and memorize in var
177
178
        // hack : replace solitary { and } by encoded string CURLYBRACKET
179 27
        $text = preg_replace('#([^{]){([^{])#', '${1}CURLYBRACKETO$2', $text);
180 27
        $text = preg_replace('#([^}])}([^}])#', '${1}CURLYBRACKETC$2', $text);
181
182
        // TODO: implement better regex :(
183 27
        if (preg_match(
184 27
                '~{{ ?'.preg_quote($templateName, '~')."[ \t \n\r]*\|([^{}]*(?:{{[^{}]+}}[^{}]*)*)}}~i",
185 27
                $text,
186 27
                $matches
187 27
            ) > 0
188
        ) {
189 27
            array_walk(
190 27
                $matches,
191
                function (&$value) {
192 27
                    $value = str_replace(['CURLYBRACKETO', 'CURLYBRACKETC'], ['{', '}'], $value);
193 27
                }
194
            );
195
196 27
            return $matches;
197
        }
198
199
        return null;
200
    }
201
202
    /**
203
     * replace sub-templates pipes | by @PIPE@ in text.
204
     *
205
     * @param string $text
206
     *
207
     * @return string
208
     */
209 27
    protected static function encodeTemplatePipes(string $text): string
210
    {
211 27
        if (preg_match_all('#{{(?:[^{}]+)}}#m', $text, $subTmpl) > 0) {
212 1
            foreach ($subTmpl[0] as $sub) {
213 1
                $subSanit = str_replace('|', '@PIPE@', $sub);
214 1
                $text = str_replace($sub, $subSanit, $text);
215
            }
216
        }
217
218 27
        return $text;
219
    }
220
221
    /**
222
     * From ['fr', 'url=blabla', 'titre=popo']
223
     * To [ '1'=> 'fr', url' => 'blabla', 'titre' => 'popo' ].
224
     *
225
     * @param array $wikiLines ['url=blabla', 'titre=popo']
226
     *
227
     * @return array
228
     */
229 27
    protected static function explodeParameterValue(array $wikiLines): array
230
    {
231 27
        $data = [];
232 27
        $keyNum = 1;
233 27
        foreach ($wikiLines as $line) {
234 27
            if (empty($line)) {
235 27
                continue;
236
            }
237 27
            $line = str_replace(
238 27
                ["\t", "\n", "\r", ' '],
239 27
                ['', '', '', ' '],
240 27
                $line
241
            ); // perte cosmétique : est-ce bien ? + espace insécable remplacé par espace sécable
242
243
            // $line : fu = bar (OK : fu=bar=coco)
244 27
            $pos = strpos($line, '=');
245 27
            if (is_int($pos) && $pos >= 0) {
246 26
                $param = mb_strtolower(substr($line, 0, $pos), 'UTF-8');
247 26
                $value = substr($line, $pos + 1);
248
            }
249
            // No param name => take $keyNum as param name
250 27
            if (false === $pos) {
251 2
                $param = (string)$keyNum;
252 2
                $value = $line;
253 2
                ++$keyNum;
254
            }
255
256 27
            if (empty($param) || !isset($value)) {
257
                throw new LogicException('param/value variable not defined');
258
            }
259
260
            // TODO : accept empty value ?
261 27
            if (!isset($value) || 0 === strlen(trim($value))) {
262 5
                continue;
263
            }
264
            // reverse the sub-template pipe encoding
265 27
            $value = str_replace('@PIPE@', '|', $value);
266 27
            $data[trim($param)] = trim($value);
1 ignored issue
show
Comprehensibility Best Practice introduced by
The variable $param does not seem to be defined for all execution paths leading up to this point.
Loading history...
267
        }
268
269 27
        return $data;
270
    }
271
272
    /**
273
     * Find text style of template : only pipe, space-pipe-space, pipe-space, return-pipe, etc.
274
     *
275
     * @param string $tplText
276
     *
277
     * @return string
278
     */
279 31
    public static function findUserStyleSeparator(string $tplText): string
280
    {
281
        // Fixed : {{fu\n    | bar}}
282 31
        if (preg_match('#{{[^}|]+\n +\|( ?)[^}]+}}#i', $tplText, $matches) > 0) {
283 2
            return "\n |".$matches[1];
284
        }
285
        // {{fu | bar}} (duplicate : because [^}|\n]+ allows final space...)
286 29
        if (preg_match('#{{[^}|\n]+([ \n]\|[ \n]?)[^}]+}}#i', $tplText, $matches) > 0) {
287 6
            return $matches[1];
288
        }
289
        // others : {{fu|bar}} ; {{fu\n|bar}} ; {{fu |bar}} ...
290 24
        if (preg_match('#{{[^}|\n]+([ \n]?\|[ \n]?)[^}]+}}#i', $tplText, $matches) > 0) {
291 24
            return $matches[1];
292
        }
293
294
        return ' |';
295
    }
296
297
    /**
298
     * Detect if "param     = bla".
299
     *
300
     * @param string $tplText
301
     *
302
     * @return bool
303
     */
304 25
    public static function isMultispacedTemplate(string $tplText): bool
305
    {
306
        // detect 4 spaces chars
307 25
        if (preg_match('#{{[^}]+ {4}[^}]+}}#i', $tplText)) {
308
            return true;
309
        }
310
311 25
        return false;
312
    }
313
}
314