Passed
Push — master ( b8fbf6...e42360 )
by Dispositif
06:49
created

WikiTextUtil::extractRefsAndListOfLinks()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 16
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 12

Importance

Changes 0
Metric Value
cc 3
eloc 6
nc 3
nop 1
dl 0
loc 16
ccs 0
cts 4
cp 0
crap 12
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 : Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Utils;
11
12
class WikiTextUtil extends TextUtil
13
{
14
    /**
15
     * todo {{ref}}
16
     *
17
     * @param string $text
18
     *
19
     * @return array [0=>['<ref>fu</ref>', 'fu'], 1=> ...]
20
     */
21
    public static function extractRefsAndListOfLinks(string $text): array
22
    {
23
        // s = "\n" include in "." // m = ^multiline$
24
        // Exclusion des imbrications
25
        if (!preg_match_all('#<ref[^>/]*>((?:(?!</ref>).)*)</ref>#ism', $text, $refs, PREG_SET_ORDER)) {
26
            return [];
27
        }
28
        $result = $refs;
29
30
        // extraction des liens externes
31
        // ^\* *(https?:\/\/[^ ]+[^ .])$
32
        if (preg_match_all('#^\* *(https?://[^ \n]+[^ \n.])\.? *\n#im', $text, $liensExternes, PREG_SET_ORDER)) {
33
            $result = array_merge($result, $liensExternes);
34
        }
35
36
        return $result;
37
    }
38
39
    /**
40
     * remove wiki encoding : italic, bold, links [ ] and [[fu|bar]] => bar
41
     * replace non-breaking spaces
42 60
     * replace {{lang|en|fubar}} => fubar.
43
     *
44 60
     * @param      $text
45 60
     * @param bool $stripcomment
46
     *
47
     * @return string
48 60
     */
49 60
    public static function unWikify(string $text, ?bool $stripcomment = true): string
50 60
    {
51 60
        if (true === $stripcomment) {
52
            $text = self::removeHTMLcomments($text);
53 60
        }
54
55
        $text = str_replace(
56
            ['[', ']', "'''", "''", ' '],
57 60
            ['', '', '', '', ' '],
58 60
            preg_replace(
59
                [
60
                    "#\[\[[^|\]]*\|([^]]*)]]#",
61
                    '#{{ ?(?:lang|langue) ?\|[^|]+\| ?(?:texte=)?([^{}=]+)(?:\|dir=rtl)?}}#i',
62 60
                    "#&[\w\d]{2,7};#",
63
                ],
64 60
                ['$1', '$1', ''],
65
                $text
66 60
            )
67
        );
68
        // {{Lien|Jeffrey Robinson}} => Jeffrey Robinson
69 24
        $text = preg_replace('#{{ ?lien ?\| ?([^|}]+) ?}}#i', '${1}', $text);
70
71 24
        $text = strip_tags($text, '<sup><sub>');
72 3
73
        return $text;
74
    }
75 21
76
    public static function isWikify(string $text): bool
77
    {
78
        if (self::unWikify($text) !== $text) {
79
            return true;
80
        }
81
82
        return false;
83
    }
84
85
    /**
86 4
     * Generate wikilink from string.
87
     *
88 4
     * @param string      $label
89 4
     * @param string|null $page
90
     *
91
     * @return string
92 4
     */
93 3
    public static function wikilink(string $label, ?string $page = null): string
94
    {
95
        $label = trim(str_replace('_', ' ', self::unWikify($label)));
96
        $page = ($page) ? trim(self::unWikify($page)) : null;
97 2
98 2
        // fu_bar => [[fu_bar]] / Fu, fu => [[fu]]
99 2
        if (empty($page) || self::str2WikiTitle($label) === self::str2WikiTitle($page)) {
100 2
            return '[['.$label.']]';
101
        }
102
103
        // fu, bar => [[Bar|fu]]
104
        return sprintf(
105
            '[[%s|%s]]',
106
            self::str2WikiTitle($page),
107 4
            $label
108
        );
109 4
    }
110
111
    /**
112
     * "fu_bar_ " => "Fu bar".
113
     * @param string $str
114
     *
115
     * @return string
116
     */
117
    private static function str2WikiTitle(string $str): string
118
    {
119
        return TextUtil::mb_ucfirst(trim(str_replace('_', ' ', $str)));
120 1
    }
121
122 1
    /**
123 1
     * Get page titles from wiki encoded links.
124
     * (but not others projects links like [[wikt:bla]].
125
     *
126
     * @param string $text
127
     *
128
     * @return array|null
129
     */
130
    public static function getWikilinkPages(string $text): ?array
131
    {
132
        if (preg_match_all('#\[\[([^:|\]]+)(?:\|[^|\]]*)?]]#', $text, $matches) > 0) {
133
            return $matches[1];
134
        }
135
136
        return null;
137
    }
138 2
139
    /**
140 2
     * Strip external links (http://) from wiki text.
141
     * "[http://google.fr Google]" => "Google"
142 2
     * "bla [http://google.fr]" => "bla"
143
     *
144
     * @param string $text
145
     *
146
     * @return string
147
     */
148
    public static function stripExternalLink(string $text): string
149
    {
150 24
        $text = preg_replace('#\[(https?://[^][<>\s"]+) *((?<= )[^\n\]]*|)\]#i', '${2}', $text);
151
152 24
        return trim($text);
153
    }
154
155 24
    /**
156
     * @param string $text
157
     *
158
     * @return bool
159
     */
160
    public static function isCommented(string $text): bool
161
    {
162
        $text = str_replace('<!-- Paramètre obligatoire -->', '', $text);
163
164
        //ou preg_match('#<\!--(?!-->).*-->#s', '', $text); // plus lourd mais précis
165
        return (preg_match('#<!--[^>]*-->#', $text) > 0) ? true : false;
166
    }
167
168
    /**
169 61
     * Remove '<!--', '-->', and everything between.
170
     * To avoid leaving blank lines, when a comment is both preceded
171 61
     * and followed by a newline (ignoring spaces), trim leading and
172 2
     * trailing spaces and one of the newlines.
173 2
     * (c) WikiMedia /includes/parser/Sanitizer.php.
174
     *
175
     * @param string $text
176
     *
177 2
     * @return string
178
     */
179
    public static function removeHTMLcomments(string $text)
180 2
    {
181 2
        while (false !== ($start = mb_strpos($text, '<!--'))) {
182 2
            $end = mb_strpos($text, '-->', $start + 4);
183
            if (false === $end) {
184
                // Unterminated comment; bail out
185
                break;
186 2
            }
187 1
            $end += 3;
188
            // Trim space and newline if the comment is both
189 2
            // preceded and followed by a newline
190 2
            $spaceStart = max($start - 1, 0);
191
            $spaceLen = $end - $spaceStart;
192
            while (' ' === substr($text, $spaceStart, 1) && $spaceStart > 0) {
193
                --$spaceStart;
194
                ++$spaceLen;
195
            }
196
            while (' ' === substr($text, $spaceStart + $spaceLen, 1)) {
197 2
                ++$spaceLen;
198
            }
199
            if ("\n" === substr($text, $spaceStart, 1)
200
                && "\n" === substr($text, $spaceStart + $spaceLen, 1)
201 61
            ) {
202
                // Remove the comment, leading and trailing
203
                // spaces, and leave only one newline.
204
                $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
205
            } else {
206
                // Remove just the comment.
207
                $text = substr_replace($text, '', $start, $end - $start);
208
            }
209
        }
210
211
        return $text;
212
    }
213
214
    /**
215
     * Strip the final point (".") as in <ref> ending.
216
     *
217
     * @param string $str
218
     *
219
     * @return string
220
     */
221
    public static function stripFinalPoint(string $str): string
222
    {
223
        if (substr($str, -1, 1) === '.') {
224
            return substr($str, 0, strlen($str) - 1);
225
        }
226
227
        return $str;
228
    }
229
}
230