Passed
Push — master ( 99b995...983f4d )
by Dispositif
02:27
created

WikiTextUtil::stripExternalLink()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 1
dl 0
loc 5
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * This file is part of dispositif/wikibot application
4
 * 2019 : Philippe M. <[email protected]>
5
 * For the full copyright and MIT license information, please view the LICENSE file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Utils;
11
12
class WikiTextUtil extends TextUtil
13
{
14
    /**
15
     * remove wiki encoding : italic, bold, links [ ] and [[fu|bar]] => bar
16
     * replace non-breaking spaces
17
     * replace {{lang|en|fubar}} => fubar.
18
     *
19
     * @param      $text
20
     * @param bool $stripcomment
21
     *
22
     * @return string
23
     */
24
    public static function unWikify(string $text, ?bool $stripcomment = true): string
25
    {
26
        // todo remove HTML tags ?
27
        if (true === $stripcomment) {
28
            $text = self::removeHTMLcomments($text);
29
        }
30
31
        $text = str_replace(
32
            ['[', ']', "'''", "''", ' '],
33
            ['', '', '', '', ' '],
34
            preg_replace(
35
                [
36
                    "#\[\[[^|\]]*\|([^]]*)]]#",
37
                    '#{{ ?(?:lang|langue) ?\|[^|]+\| ?(?:texte=)?([^{}=]+)(?:\|dir=rtl)?}}#i',
38
                    "#&[\w\d]{2,7};#",
39
                ],
40
                ['$1', '$1', ''],
41
                $text
42
            )
43
        );
44
        $text = str_replace(['<small>', '</small>'], '', $text); // ??
45
46
        return $text;
47
    }
48
49
    public static function isWikify(string $text): bool
50
    {
51
        if (self::unWikify($text) !== $text) {
52
            return true;
53
        }
54
55
        return false;
56
    }
57
58
    /**
59
     * Get page titles from wiki encoded links.
60
     * (but not others projects links like [[wikt:bla]].
61
     *
62
     * @param string $text
63
     *
64
     * @return array|null
65
     */
66
    public static function getWikilinkPages(string $text): ?array
67
    {
68
        if (preg_match_all('#\[\[([^:|\]]+)(?:\|[^|\]]*)?]]#', $text, $matches) > 0) {
69
            return $matches[1];
70
        }
71
72
        return null;
73
    }
74
75
    /**
76
     * Strip external links (http://) from wiki text.
77
     * "[http://google.fr Google]" => "Google"
78
     * "bla [http://google.fr]" => "bla"
79
     *
80
     * @param string $text
81
     *
82
     * @return string
83
     */
84
    public static function stripExternalLink(string $text): string
85
    {
86
        $text = preg_replace('#\[(https?://[^][<>\s"]+) *((?<= )[^\n\]]*|)\]#i', '${2}', $text);
87
88
        return trim($text);
89
    }
90
91
    /**
92
     * @param string $text
93
     *
94
     * @return bool
95
     */
96
    public static function isCommented(string $text): bool
97
    {
98
        //ou preg_match('#<\!--(?!-->).*-->#s', '', $text); // plus lourd mais précis
99
        return (preg_match('#<!--[^>]*-->#', $text) > 0) ? true : false;
100
    }
101
102
    /**
103
     * Remove '<!--', '-->', and everything between.
104
     * To avoid leaving blank lines, when a comment is both preceded
105
     * and followed by a newline (ignoring spaces), trim leading and
106
     * trailing spaces and one of the newlines.
107
     * (c) WikiMedia /includes/parser/Sanitizer.php.
108
     *
109
     * @param string $text
110
     *
111
     * @return string
112
     */
113
    public static function removeHTMLcomments(string $text)
114
    {
115
        while (false !== ($start = mb_strpos($text, '<!--'))) {
116
            $end = mb_strpos($text, '-->', $start + 4);
117
            if (false === $end) {
118
                // Unterminated comment; bail out
119
                break;
120
            }
121
            $end += 3;
122
            // Trim space and newline if the comment is both
123
            // preceded and followed by a newline
124
            $spaceStart = max($start - 1, 0);
125
            $spaceLen = $end - $spaceStart;
126
            while (' ' === substr($text, $spaceStart, 1) && $spaceStart > 0) {
127
                --$spaceStart;
128
                ++$spaceLen;
129
            }
130
            while (' ' === substr($text, $spaceStart + $spaceLen, 1)) {
131
                ++$spaceLen;
132
            }
133
            if ("\n" === substr($text, $spaceStart, 1)
134
                && "\n" === substr($text, $spaceStart + $spaceLen, 1)
135
            ) {
136
                // Remove the comment, leading and trailing
137
                // spaces, and leave only one newline.
138
                $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
139
            } else {
140
                // Remove just the comment.
141
                $text = substr_replace($text, '', $start, $end - $start);
142
            }
143
        }
144
145
        return $text;
146
    }
147
}
148