Passed
Push — dev ( eba0dc...b8ced8 )
by Dispositif
02:52
created

TemplateParser::findUserStyleSeparator()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 16
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 4
eloc 7
c 1
b 0
f 0
nc 4
nop 1
dl 0
loc 16
rs 10
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 : Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Utils;
11
12
use App\Domain\Models\Wiki\AbstractWikiTemplate;
13
use App\Domain\WikiTemplateFactory;
14
use Exception;
15
use LogicException;
16
17
/**
18
 * todo legacy.
19
 *
20
 * Class TemplateParser.
21
 */
22
abstract class TemplateParser extends WikiTextUtil
23
{
24
    /**
25
     * todo : simplify array if only one occurrence ?
26
     * todo refac extract/logic.
27
     *
28
     * @param string $tplName
29
     * @param string $text
30
     *
31
     * @return array
32
     *
33
     * @throws Exception
34
     */
35
    public static function parseAllTemplateByName(string $tplName, string $text): array
36
    {
37
        // Extract wikiText from that template
38
        $arrayTplText = self::findAllTemplatesByName($tplName, $text);
39
40
        if (empty($arrayTplText) || empty($arrayTplText[0])) {
41
            return [];
42
        }
43
44
        $result[$tplName] = [];
1 ignored issue
show
Comprehensibility Best Practice introduced by
$result was never initialized. Although not strictly required by PHP, it is generally a good practice to add $result = array(); before regardless.
Loading history...
45
        $inc = -1;
46
        foreach ($arrayTplText as $tplText) {
47
            ++$inc;
48
            // store the raw text of the template
49
            $result[$tplName][$inc] = ['raw' => $tplText];
50
51
            // create an object of the template
52
            /**
53
             * @var AbstractWikiTemplate
54
             */
55
            try{
56
                $tplObject = WikiTemplateFactory::create($tplName);
57
            }catch (\Throwable $e){
58
                unset($e);
59
                continue;
60
            }
61
62
            if (!is_object($tplObject) || !is_subclass_of($tplObject, AbstractWikiTemplate::class)) {
63
                continue;
64
            }
65
66
            $data = self::parseDataFromTemplate($tplName, $tplText);
67
            $tplObject->hydrate($data);
68
            $tplObject->detectUserSeparator($tplText);
69
70
            $result[$tplName][$inc] += ['model' => $tplObject];
71
        }
72
73
        return (array) $result;
74
    }
75
76
    /**
77
     * Find all the recurrences of a wiki's template in a text.
78
     * Compatible with inclusion of sub-templates.
79
     * Example :
80
     * {{Infobox |pays={{pays|France}} }}
81
     * retourne array {{modèle|...}}.
82
     *
83
     * @param $templateName
84
     * @param $text
85
     *
86
     * @return array [ 0=>{{bla|...}}, 1=>{{bla|...}} ]
87
     */
88
    public static function findAllTemplatesByName(string $templateName, string $text): array
89
    {
90
        // TODO check {{fr}}
91
        $res = preg_match_all(
92
            "#{{[ \n]*".preg_quote(trim($templateName), '#')
93
            ."[ \t \n\r]*\|[^{}]*(?:{{[^{}]+}}[^{}]*)*}}#i",
94
            $text,
95
            $matches
96
        );
97
98
        if (false === $res) {
99
            return [];
100
        }
101
102
        return $matches[0];
103
        //OK : preg_match_all("#\{\{".preg_quote(trim($nommodele), '#')."[ \t \n\r]*\|([^\{\}]*(\{\{[^\{\}]+\}\}[^\{\}]*)*)\}\}#i", $text, $matches);
104
    }
105
106
    /**
107
     * Parsing of any wiki template from text and templateName
108
     * Using the first {{template}} definition found in text
109
     * todo legacy.
110
     *
111
     * @param string $tplName
112
     * @param string $text
113
     *
114
     * @return array
115
     */
116
    public static function parseDataFromTemplate(string $tplName, string $text): array
117
    {
118
        $text = str_replace("\n", '', $text);
119
120
        // check {{template}} in text
121
        $tplFounded = self::findFirstTemplateInText($tplName, $text);
1 ignored issue
show
Bug introduced by
Are you sure the assignment to $tplFounded is correct as self::findFirstTemplateInText($tplName, $text) targeting App\Domain\Utils\Templat...ndFirstTemplateInText() seems to always return null.

This check looks for function or method calls that always return null and whose return value is assigned to a variable.

class A
{
    function getObject()
    {
        return null;
    }

}

$a = new A();
$object = $a->getObject();

The method getObject() can return nothing but null, so it makes no sense to assign that value to a variable.

The reason is most likely that a function or method is imcomplete or has been reduced for debug purposes.

Loading history...
122
123
        // $matches[0] : {{template|...}}
124
        if (null === $tplFounded) {
0 ignored issues
show
introduced by
The condition null === $tplFounded is always true.
Loading history...
125
            throw new LogicException("Template $tplName not found in text");
126
        }
127
        // $matches[1] : url=blabla|titre=Popo
128
        if (false === $tplFounded[1]) {
129
            throw new LogicException("No parameters found in $tplName");
130
        }
131
        // sub-template pipe | encoding
132
        $tplFounded[1] = self::encodeTemplatePipes($tplFounded[1]);
133
134
        // x flag => "\ " for space
135
        $res = preg_match_all(
136
            "/
137
			(
138
	  			[^|=]*=?                          # parameter name (or nothing)
139
		 		(
140
					[^|{}\[\]<>]*               # reject <i>,<ref>
141
					(?:\[[^\[\]]+])?              # [url text] or [text]
142
					(?:<!--[^<>]+-->)?    # commentary <!-- -->
143
					(?:{{[^}{]+}})?          # {{template}} but KO with {{tmp|...}}
144
					                               # test : {{bla@PIPE@bla}}
145
					(?:\[\[[^]]+]])?            # [[fu|bar]]
146
					[^|{}\[\]]*                 # accept <i>,<ref>
147
		 		)*
148
	 		)\|?
149
		/x",
150
            $tplFounded[1],
151
            $wikiParams
152
        );
153
154
        if (false === $res || 0 === $res || empty($wikiParams[1])) {
155
            throw new LogicException("Parameters from template '$tplName' can't be parsed");
156
        }
157
158
        return self::explodeParameterValue($wikiParams[1]);
159
    }
160
161
    /**
162
     * For multiple occurrences see findAllTemplatesByName().
163
     *
164
     * @param string $templateName
165
     * @param string $text
166
     *
167
     * @return array|null
168
     */
169
    private static function findFirstTemplateInText(string $templateName, string $text): ?array
170
    {
171
        // BUG marche pas avec :
172
        //        $text = '{{Ouvrage|auteur1 = Clément|titre = Les Borgia {{nobr|Alexandre {{VI}}}}}}'; // to debug
173
        //        $templateName = 'ouvrage'; // to debug
174
175
        //        $text = str_replace("\n", '', $text); // ??? todo regex multiline or encode char
176
177
        // todo: replace <!-- --> by encode char and memorize in var
178
179
        // hack : replace solitary { and } by encoded string CURLYBRACKET
180
        $text = preg_replace('#([^{]){([^{])#', '${1}CURLYBRACKETO$2', $text);
181
        $text = preg_replace('#([^}])}([^}])#', '${1}CURLYBRACKETC$2', $text);
182
183
        // TODO: implement better regex :(
184
        if (preg_match(
185
                '~{{ ?'.preg_quote($templateName, '~')."[ \t \n\r]*\|([^{}]*(?:{{[^{}]+}}[^{}]*)*)}}~i",
186
                $text,
187
                $matches
188
            ) > 0
189
        ) {
190
            array_walk(
191
                $matches,
192
                function (&$value) {
193
                    $value = str_replace(['CURLYBRACKETO', 'CURLYBRACKETC'], ['{', '}'], $value);
194
                }
195
            );
196
197
            return $matches;
198
        }
199
200
        return null;
201
    }
202
203
    /**
204
     * replace sub-templates pipes | by @PIPE@ in text.
205
     *
206
     * @param string $text
207
     *
208
     * @return string
209
     */
210
    protected static function encodeTemplatePipes(string $text): string
211
    {
212
        if (preg_match_all('#{{(?:[^{}]+)}}#m', $text, $subTmpl) > 0) {
213
            foreach ($subTmpl[0] as $sub) {
214
                $subSanit = str_replace('|', '@PIPE@', $sub);
215
                $text = str_replace($sub, $subSanit, $text);
216
            }
217
        }
218
219
        return $text;
220
    }
221
222
    /**
223
     * From ['fr', 'url=blabla', 'titre=popo']
224
     * To [ '1'=> 'fr', url' => 'blabla', 'titre' => 'popo' ].
225
     *
226
     * @param array $wikiLines ['url=blabla', 'titre=popo']
227
     *
228
     * @return array
229
     */
230
    protected static function explodeParameterValue(array $wikiLines): array
231
    {
232
        $data = [];
233
        $keyNum = 1;
234
        foreach ($wikiLines as $line) {
235
            if (empty($line)) {
236
                continue;
237
            }
238
            $line = str_replace(
239
                ["\t", "\n", "\r", ' '],
240
                ['', '', '', ' '],
241
                $line
242
            ); // perte cosmétique : est-ce bien ? + espace insécable remplacé par espace sécable
243
244
            // $line : fu = bar (OK : fu=bar=coco)
245
            $pos = strpos($line, '=');
246
            if (is_int($pos) && $pos >= 0) {
247
                $param = mb_strtolower(substr($line, 0, $pos), 'UTF-8');
248
                $value = substr($line, $pos + 1);
249
            }
250
            // No param name => take $keyNum as param name
251
            if (false === $pos) {
252
                $param = (string) $keyNum;
253
                $value = $line;
254
                ++$keyNum;
255
            }
256
257
            if (!isset($param) || !isset($value)) {
258
                throw new LogicException('param/value variable not defined');
259
            }
260
261
            // TODO : accept empty value ?
262
            if (!isset($value) || 0 === strlen(trim($value))) {
263
                continue;
264
            }
265
            // reverse the sub-template pipe encoding
266
            $value = str_replace('@PIPE@', '|', $value);
267
            $data[trim($param)] = trim($value);
1 ignored issue
show
Comprehensibility Best Practice introduced by
The variable $param does not seem to be defined for all execution paths leading up to this point.
Loading history...
268
        }
269
270
        return $data;
271
    }
272
273
    /**
274
     * Find text style of template : only pipe, space-pipe-space, pipe-space, return-pipe, etc.
275
     *
276
     * @param string $tplText
277
     *
278
     * @return string
279
     */
280
    public static function findUserStyleSeparator(string $tplText): string
281
    {
282
        // Fixed : {{fu\n    | bar}}
283
        if (preg_match('#{{[^}|]+\n +\|( ?)[^}]+}}#i', $tplText, $matches) > 0) {
284
            return "\n |".$matches[1];
285
        }
286
        // {{fu | bar}} (duplicate : because [^}|\n]+ allows final space...)
287
        if (preg_match('#{{[^}|\n]+([ \n]\|[ \n]?)[^}]+}}#i', $tplText, $matches) > 0) {
288
            return $matches[1];
289
        }
290
        // others : {{fu|bar}} ; {{fu\n|bar}} ; {{fu |bar}} ...
291
        if (preg_match('#{{[^}|\n]+([ \n]?\|[ \n]?)[^}]+}}#i', $tplText, $matches) > 0) {
292
            return $matches[1];
293
        }
294
295
        return ' |';
296
    }
297
298
    /**
299
     * Detect if "param     = bla".
300
     *
301
     * @param string $tplText
302
     *
303
     * @return bool
304
     */
305
    public static function isMultispacedTemplate(string $tplText):bool
306
    {
307
        // detect 4 spaces chars
308
        if (preg_match('#{{[^}]+ {4}[^}]+}}#i', $tplText)) {
309
            return true;
310
        }
311
        return false;
312
    }
313
}
314