Passed
Push — master ( d644e9...e142f8 )
by Dispositif
15:55
created

WikiTextUtil::fixConcatenatedRefsSyntax()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
eloc 2
c 0
b 0
f 0
dl 0
loc 5
ccs 0
cts 0
cp 0
rs 10
cc 1
nc 1
nop 1
crap 2
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe/Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Utils;
11
12
class WikiTextUtil extends TextUtil
13
{
14
    /**
15
     * todo {{ref}}
16
     *
17
     *
18
     * @return array [0=>['<ref>fu</ref>', 'fu'], 1=> ...]
19
     */
20
    public static function extractRefsAndListOfLinks(string $text): array
21
    {
22
        // s = "\n" include in "." // m = ^multiline$
23
        // Exclusion des imbrications
24
        if (!preg_match_all('#<ref[^>/]*>((?:(?!</ref>).)*)</ref>#ism', $text, $refs, PREG_SET_ORDER)) {
25
            return [];
26
        }
27
        $result = $refs;
28
29
        // extraction des liens externes
30
        // ^\* *(https?:\/\/[^ ]+[^ .])$
31
        if (preg_match_all('#^\* *(https?://[^ \n]+[^ \n.])\.? *\n#im', $text, $liensExternes, PREG_SET_ORDER)) {
32
            $result = [...$result, ...$liensExternes];
33
        }
34
35
        return $result;
36
    }
37
38
    /**
39
     * remove wiki encoding : italic, bold, links [ ] and [[fu|bar]] => bar
40
     * replace non-breaking spaces
41
     * replace {{lang|en|fubar}} => fubar.
42 60
     *
43
     * @param      $text
44 60
     * @param bool $stripcomment
45 60
     *
46
     * @return string
47
     */
48 60
    public static function unWikify(string $text, ?bool $stripcomment = true): string
49 60
    {
50 60
        if (true === $stripcomment) {
51 60
            $text = self::removeHTMLcomments($text);
52
        }
53 60
54
        $text = str_replace(
55
            ['[', ']', "'''", "''", ' '],
56
            ['', '', '', '', ' '],
57 60
            preg_replace(
58 60
                [
59
                    "#\[\[[^|\]]*\|([^]]*)]]#",
60
                    '#{{ ?(?:lang|langue) ?\|[^|]+\| ?(?:texte=)?([^{}=]+)(?:\|dir=rtl)?}}#i',
61
                    "#&[\w\d]{2,7};#",
62 60
                ],
63
                ['$1', '$1', ''],
64 60
                $text
65
            )
66 60
        );
67
        // {{Lien|Jeffrey Robinson}} => Jeffrey Robinson
68
        $text = preg_replace('#{{ ?lien ?\| ?([^|}]+) ?}}#i', '${1}', $text);
69 24
70
        return strip_tags($text, '<sup><sub>');
71 24
    }
72 3
73
    public static function isWikify(string $text): bool
74
    {
75 21
        return self::unWikify($text) !== $text;
76
    }
77
78
    /**
79
     * Generate wikilink from string.
80
     *
81
     *
82
     * @return string
83
     */
84
    public static function wikilink(string $label, ?string $page = null): string
85
    {
86 4
        $label = trim(str_replace('_', ' ', self::unWikify($label)));
87
        $page = ($page) ? trim(self::unWikify($page)) : null;
88 4
89 4
        // fu_bar => [[fu_bar]] / Fu, fu => [[fu]]
90
        if (empty($page) || self::str2WikiTitle($label) === self::str2WikiTitle($page)) {
91
            return '[['.$label.']]';
92 4
        }
93 3
94
        // fu, bar => [[Bar|fu]]
95
        return sprintf(
96
            '[[%s|%s]]',
97 2
            self::str2WikiTitle($page),
98 2
            $label
99 2
        );
100 2
    }
101
102
    /**
103
     * "fu_bar_ " => "Fu bar".
104
     *
105
     * @return string
106
     */
107 4
    private static function str2WikiTitle(string $str): string
108
    {
109 4
        return TextUtil::mb_ucfirst(trim(str_replace('_', ' ', $str)));
110
    }
111
112
    /**
113
     * Get page titles from wiki encoded links.
114
     * (but not others projects links like [[wikt:bla]].
115
     *
116
     *
117
     * @return array|null
118
     */
119
    public static function getWikilinkPages(string $text): ?array
120 1
    {
121
        if (preg_match_all('#\[\[([^:|\]]+)(?:\|[^|\]]*)?]]#', $text, $matches) > 0) {
122 1
            return $matches[1];
123 1
        }
124
125
        return null;
126
    }
127
128
    /**
129
     * Strip external links (http://) from wiki text.
130
     * "[http://google.fr Google]" => "Google"
131
     * "bla [http://google.fr]" => "bla"
132
     *
133
     *
134
     * @return string
135
     */
136
    public static function stripExternalLink(string $text): string
137
    {
138 2
        $text = preg_replace('#\[(https?://[^][<>\s"]+) *((?<= )[^\n\]]*|)\]#i', '${2}', $text);
139
140 2
        return trim($text);
141
    }
142 2
143
    /**
144
     * @return bool
145
     */
146
    public static function isCommented(string $text): bool
147
    {
148
        $text = str_replace('<!-- Paramètre obligatoire -->', '', $text);
149
150 24
        //ou preg_match('#<\!--(?!-->).*-->#s', '', $text); // plus lourd mais précis
151
        return preg_match('#<!--[^>]*-->#', $text) > 0;
152 24
    }
153
154
    /**
155 24
     * Remove '<!--', '-->', and everything between.
156
     * To avoid leaving blank lines, when a comment is both preceded
157
     * and followed by a newline (ignoring spaces), trim leading and
158
     * trailing spaces and one of the newlines.
159
     * (c) WikiMedia /includes/parser/Sanitizer.php.
160
     *
161
     *
162
     * @return string
163
     */
164
    public static function removeHTMLcomments(string $text)
165
    {
166
        while (false !== ($start = mb_strpos($text, '<!--'))) {
167
            $end = mb_strpos($text, '-->', $start + 4);
168
            if (false === $end) {
169 61
                // Unterminated comment; bail out
170
                break;
171 61
            }
172 2
            $end += 3;
173 2
            // Trim space and newline if the comment is both
174
            // preceded and followed by a newline
175
            $spaceStart = max($start - 1, 0);
176
            $spaceLen = $end - $spaceStart;
177 2
            while (' ' === substr($text, $spaceStart, 1) && $spaceStart > 0) {
178
                --$spaceStart;
179
                ++$spaceLen;
180 2
            }
181 2
            while (' ' === substr($text, $spaceStart + $spaceLen, 1)) {
182 2
                ++$spaceLen;
183
            }
184
            if ("\n" === substr($text, $spaceStart, 1)
185
                && "\n" === substr($text, $spaceStart + $spaceLen, 1)
186 2
            ) {
187 1
                // Remove the comment, leading and trailing
188
                // spaces, and leave only one newline.
189 2
                $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
190 2
            } else {
191
                // Remove just the comment.
192
                $text = substr_replace($text, '', $start, $end - $start);
193
            }
194
        }
195
196
        return (string) $text;
197 2
    }
198
199
    /**
200
     * Strip the final point (".") as in <ref> ending.
201 61
     *
202
     *
203
     * @return string
204
     */
205
    public static function stripFinalPoint(string $str): string
206
    {
207
        if (str_ends_with($str, '.')) {
208
            return substr($str, 0, strlen($str) - 1);
209
        }
210
211
        return $str;
212
    }
213
214
    /**
215
     * Normalize URL for inclusion as a wiki-template value.
216
     * https://en.wikipedia.org/wiki/Template:Citation_Style_documentation/url
217
     *
218
     *
219
     * @return string
220
     */
221
    public static function normalizeUrlForTemplate(string $url): string
222
    {
223
        $searchReplace = [
224
            ' ' => '%20',
225
            '"' => '%22',
226
            "'''" => '%27%27%27',
227
            "''" => '%27%27',
228
            '<' => '%3c',
229
            '>' => '%3e',
230
            '[' => '%5b',
231
            ']' => '%5d',
232
            '{{' => '%7b%7b',
233
            '|' => '%7c',
234
            '}}' => '%7d%7d',
235
        ];
236
237
        return str_replace(array_keys($searchReplace), array_values($searchReplace), $url);
238
    }
239
240
    /**
241
     * Add reference separator {{,}} between reference tags.
242
     * Example :
243
     * "<ref>A</ref><ref>B</ref>" => "<ref>A</ref>{{,}}<ref>B</ref>".
244
     * "<ref name="A" /> <ref>" => "<ref name="A" />{{,}}<ref>".
245
     *
246
     * TODO : allow carriage return between refs ? See https://w.wiki/8n7K
247
     */
248
    public static function fixConcatenatedRefsSyntax(string $wikiText): string
249
    {
250
        $wikiText = preg_replace('#</ref>[\n\r\s]*<ref#', '</ref>{{,}}<ref', $wikiText);
251
252
        return preg_replace('#(<ref name=[^\/\>\r\n]+ ?/>)[\n\r\s]*<ref#', "$1".'{{,}}<ref', $wikiText);
253
    }
254
}
255