Passed
Push — master ( 5eccc7...1faa52 )
by Dispositif
02:45
created

WikiTextUtil   A

Complexity

Total Complexity 25

Size/Duplication

Total Lines 206
Duplicated Lines 0 %

Test Coverage

Coverage 79.69%

Importance

Changes 6
Bugs 0 Features 0
Metric Value
eloc 58
dl 0
loc 206
ccs 51
cts 64
cp 0.7969
rs 10
c 6
b 0
f 0
wmc 25

10 Methods

Rating   Name   Duplication   Size   Complexity  
B removeHTMLcomments() 0 33 8
A isCommented() 0 6 2
A extractAllRefs() 0 9 2
A getWikilinkPages() 0 7 2
A isWikify() 0 7 2
A stripExternalLink() 0 5 1
A str2WikiTitle() 0 3 1
A stripFinalPoint() 0 7 2
A unWikify() 0 25 2
A wikilink() 0 15 3
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 : Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Utils;
11
12
class WikiTextUtil extends TextUtil
13
{
14
    /**
15
     * todo {{ref}}
16
     *
17
     * @param string $text
18
     *
19
     * @return array [0=>['<ref>fu</ref>', 'fu'], 1=> ...]
20
     */
21
    public static function extractAllRefs(string $text): array
22
    {
23
        // s = "\n" include in "." // m = ^multiline$
24
        // Exclusion des imbrications
25
        if (!preg_match_all('#<ref[^>]*>((?:(?!</ref>).)*)</ref>#ism', $text, $refs, PREG_SET_ORDER)) {
26
            return [];
27
        }
28
29
        return $refs;
30
    }
31
32
    /**
33
     * remove wiki encoding : italic, bold, links [ ] and [[fu|bar]] => bar
34
     * replace non-breaking spaces
35
     * replace {{lang|en|fubar}} => fubar.
36
     *
37
     * @param      $text
38
     * @param bool $stripcomment
39
     *
40
     * @return string
41
     */
42 60
    public static function unWikify(string $text, ?bool $stripcomment = true): string
43
    {
44 60
        if (true === $stripcomment) {
45 60
            $text = self::removeHTMLcomments($text);
46
        }
47
48 60
        $text = str_replace(
49 60
            ['[', ']', "'''", "''", ' '],
50 60
            ['', '', '', '', ' '],
51 60
            preg_replace(
52
                [
53 60
                    "#\[\[[^|\]]*\|([^]]*)]]#",
54
                    '#{{ ?(?:lang|langue) ?\|[^|]+\| ?(?:texte=)?([^{}=]+)(?:\|dir=rtl)?}}#i',
55
                    "#&[\w\d]{2,7};#",
56
                ],
57 60
                ['$1', '$1', ''],
58 60
                $text
59
            )
60
        );
61
        // {{Lien|Jeffrey Robinson}} => Jeffrey Robinson
62 60
        $text = preg_replace('#{{ ?lien ?\| ?([^|}]+) ?}}#i', '${1}', $text);
63
64 60
        $text = strip_tags($text, '<sup><sub>');
65
66 60
        return $text;
67
    }
68
69 24
    public static function isWikify(string $text): bool
70
    {
71 24
        if (self::unWikify($text) !== $text) {
72 3
            return true;
73
        }
74
75 21
        return false;
76
    }
77
78
    /**
79
     * Generate wikilink from string.
80
     *
81
     * @param string      $label
82
     * @param string|null $page
83
     *
84
     * @return string
85
     */
86 4
    public static function wikilink(string $label, ?string $page = null): string
87
    {
88 4
        $label = trim($label);
89 4
        $page = trim($page);
90
91
        // fu_bar => [[fu_bar]] / Fu, fu => [[fu]]
92 4
        if (empty($page) || self::str2WikiTitle($label) === self::str2WikiTitle($page)) {
93 3
            return '[['.$label.']]';
94
        }
95
96
        // fu, bar => [[Bar|fu]]
97 2
        return sprintf(
98 2
            '[[%s|%s]]',
99 2
            self::str2WikiTitle($page),
100 2
            $label
101
        );
102
    }
103
104
    /**
105
     * "fu_bar_ " => "Fu bar".
106
     */
107 4
    private static function str2WikiTitle(string $str): string
108
    {
109 4
        return TextUtil::mb_ucfirst(trim(str_replace('_', ' ', $str)));
110
    }
111
112
    /**
113
     * Get page titles from wiki encoded links.
114
     * (but not others projects links like [[wikt:bla]].
115
     *
116
     * @param string $text
117
     *
118
     * @return array|null
119
     */
120 1
    public static function getWikilinkPages(string $text): ?array
121
    {
122 1
        if (preg_match_all('#\[\[([^:|\]]+)(?:\|[^|\]]*)?]]#', $text, $matches) > 0) {
123 1
            return $matches[1];
124
        }
125
126
        return null;
127
    }
128
129
    /**
130
     * Strip external links (http://) from wiki text.
131
     * "[http://google.fr Google]" => "Google"
132
     * "bla [http://google.fr]" => "bla"
133
     *
134
     * @param string $text
135
     *
136
     * @return string
137
     */
138 2
    public static function stripExternalLink(string $text): string
139
    {
140 2
        $text = preg_replace('#\[(https?://[^][<>\s"]+) *((?<= )[^\n\]]*|)\]#i', '${2}', $text);
141
142 2
        return trim($text);
143
    }
144
145
    /**
146
     * @param string $text
147
     *
148
     * @return bool
149
     */
150 24
    public static function isCommented(string $text): bool
151
    {
152 24
        $text = str_replace('<!-- Paramètre obligatoire -->', '', $text);
153
154
        //ou preg_match('#<\!--(?!-->).*-->#s', '', $text); // plus lourd mais précis
155 24
        return (preg_match('#<!--[^>]*-->#', $text) > 0) ? true : false;
156
    }
157
158
    /**
159
     * Remove '<!--', '-->', and everything between.
160
     * To avoid leaving blank lines, when a comment is both preceded
161
     * and followed by a newline (ignoring spaces), trim leading and
162
     * trailing spaces and one of the newlines.
163
     * (c) WikiMedia /includes/parser/Sanitizer.php.
164
     *
165
     * @param string $text
166
     *
167
     * @return string
168
     */
169 61
    public static function removeHTMLcomments(string $text)
170
    {
171 61
        while (false !== ($start = mb_strpos($text, '<!--'))) {
172 2
            $end = mb_strpos($text, '-->', $start + 4);
173 2
            if (false === $end) {
174
                // Unterminated comment; bail out
175
                break;
176
            }
177 2
            $end += 3;
178
            // Trim space and newline if the comment is both
179
            // preceded and followed by a newline
180 2
            $spaceStart = max($start - 1, 0);
181 2
            $spaceLen = $end - $spaceStart;
182 2
            while (' ' === substr($text, $spaceStart, 1) && $spaceStart > 0) {
183
                --$spaceStart;
184
                ++$spaceLen;
185
            }
186 2
            while (' ' === substr($text, $spaceStart + $spaceLen, 1)) {
187 1
                ++$spaceLen;
188
            }
189 2
            if ("\n" === substr($text, $spaceStart, 1)
190 2
                && "\n" === substr($text, $spaceStart + $spaceLen, 1)
191
            ) {
192
                // Remove the comment, leading and trailing
193
                // spaces, and leave only one newline.
194
                $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
195
            } else {
196
                // Remove just the comment.
197 2
                $text = substr_replace($text, '', $start, $end - $start);
198
            }
199
        }
200
201 61
        return $text;
202
    }
203
204
    /**
205
     * Strip the final point (".") as in <ref> ending.
206
     *
207
     * @param string $str
208
     *
209
     * @return string
210
     */
211
    public static function stripFinalPoint(string $str): string
212
    {
213
        if (substr($str, -1, 1) === '.') {
214
            return substr($str, 0, strlen($str) - 1);
215
        }
216
217
        return $str;
218
    }
219
}
220