WikiTextUtil::isCommented() - Code Metrics - Inspection of "Improve CompleteProcess : inject article title + s..." - Dispositif/Wikibot - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( 99b995...983f4d )

by Dispositif

created 2020-02-18 18:19 UTC

WikiTextUtil::isCommented() A

↳ Parent: WikiTextUtil

Complexity

Conditions	2
Paths	2

Size

Total Lines	4
Code Lines	1

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	2
eloc	1
nc	2
nop	1
dl	0
loc	4
rs	10
c	0
b	0
f	0

<?php
/**
 * This file is part of dispositif/wikibot application
 * 2019 : Philippe M. <[email protected]>
 * For the full copyright and MIT license information, please view the LICENSE file.
 */

declare(strict_types=1);

namespace App\Domain\Utils;

class WikiTextUtil extends TextUtil
{
    /**
     * remove wiki encoding : italic, bold, links [ ] and [[fu|bar]] => bar
     * replace non-breaking spaces
     * replace {{lang|en|fubar}} => fubar.
     *
     * @param      $text
     * @param bool $stripcomment
     *
     * @return string
     */
    public static function unWikify(string $text, ?bool $stripcomment = true): string
    {
        // todo remove HTML tags ?
        if (true === $stripcomment) {
            $text = self::removeHTMLcomments($text);
        }

        $text = str_replace(
            ['[', ']', "'''", "''", ' '],
            ['', '', '', '', ' '],
            preg_replace(
                [
                    "#\[\[[^|\]]*\|([^]]*)]]#",
                    '#{{ ?(?:lang|langue) ?\|[^|]+\| ?(?:texte=)?([^{}=]+)(?:\|dir=rtl)?}}#i',
                    "#&[\w\d]{2,7};#",
                ],
                ['$1', '$1', ''],
                $text
            )
        );
        $text = str_replace(['<small>', '</small>'], '', $text); // ??

        return $text;
    }

    public static function isWikify(string $text): bool
    {
        if (self::unWikify($text) !== $text) {
            return true;
        }

        return false;
    }

    /**
     * Get page titles from wiki encoded links.
     * (but not others projects links like [[wikt:bla]].
     *
     * @param string $text
     *
     * @return array|null
     */
    public static function getWikilinkPages(string $text): ?array
    {
        if (preg_match_all('#\[\[([^:|\]]+)(?:\|[^|\]]*)?]]#', $text, $matches) > 0) {
            return $matches[1];
        }

        return null;
    }

    /**
     * Strip external links (http://) from wiki text.
     * "[http://google.fr Google]" => "Google"
     * "bla [http://google.fr]" => "bla"
     *
     * @param string $text
     *
     * @return string
     */
    public static function stripExternalLink(string $text): string
    {
        $text = preg_replace('#\[(https?://[^][<>\s"]+) *((?<= )[^\n\]]*|)\]#i', '${2}', $text);

        return trim($text);
    }

    /**
     * @param string $text
     *
     * @return bool
     */
    public static function isCommented(string $text): bool
    {
        //ou preg_match('#<\!--(?!-->).*-->#s', '', $text); // plus lourd mais précis
        return (preg_match('#<!--[^>]*-->#', $text) > 0) ? true : false;
    }

    /**
     * Remove '<!--', '-->', and everything between.
     * To avoid leaving blank lines, when a comment is both preceded
     * and followed by a newline (ignoring spaces), trim leading and
     * trailing spaces and one of the newlines.
     * (c) WikiMedia /includes/parser/Sanitizer.php.
     *
     * @param string $text
     *
     * @return string
     */
    public static function removeHTMLcomments(string $text)
    {
        while (false !== ($start = mb_strpos($text, '<!--'))) {
            $end = mb_strpos($text, '-->', $start + 4);
            if (false === $end) {
                // Unterminated comment; bail out
                break;
            }
            $end += 3;
            // Trim space and newline if the comment is both
            // preceded and followed by a newline
            $spaceStart = max($start - 1, 0);
            $spaceLen = $end - $spaceStart;
            while (' ' === substr($text, $spaceStart, 1) && $spaceStart > 0) {
                --$spaceStart;
                ++$spaceLen;
            }
            while (' ' === substr($text, $spaceStart + $spaceLen, 1)) {
                ++$spaceLen;
            }
            if ("\n" === substr($text, $spaceStart, 1)
                && "\n" === substr($text, $spaceStart + $spaceLen, 1)
            ) {
                // Remove the comment, leading and trailing
                // spaces, and leave only one newline.
                $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
            } else {
                // Remove just the comment.
                $text = substr_replace($text, '', $start, $end - $start);
            }
        }

        return $text;
    }
}


1			<?php
2			/**
3			* This file is part of dispositif/wikibot application
4			* 2019 : Philippe M. <[email protected]>
5			* For the full copyright and MIT license information, please view the LICENSE file.
6			*/
7
8			declare(strict_types=1);
9
10			namespace App\Domain\Utils;
11
12			class WikiTextUtil extends TextUtil
13			{
14			/**
15			* remove wiki encoding : italic, bold, links [ ] and [[fu\|bar]] => bar
16			* replace non-breaking spaces
17			* replace {{lang\|en\|fubar}} => fubar.
18			*
19			* @param $text
20			* @param bool $stripcomment
21			*
22			* @return string
23			*/
24			public static function unWikify(string $text, ?bool $stripcomment = true): string
25			{
26			// todo remove HTML tags ?
27			if (true === $stripcomment) {
28			$text = self::removeHTMLcomments($text);
29			}
30
31			$text = str_replace(
32			['[', ']', "'''", "''", ' '],
33			['', '', '', '', ' '],
34			preg_replace(
35			[
36			"#\[\[[^\|\]]\\|([^]])]]#",
37			'#{{ ?(?:lang\|langue) ?\\|[^\|]+\\| ?(?:texte=)?([^{}=]+)(?:\\|dir=rtl)?}}#i',
38			"#&[\w\d]{2,7};#",
39			],
40			['$1', '$1', ''],
41			$text
42			)
43			);
44			$text = str_replace(['<small>', '</small>'], '', $text); // ??
45
46			return $text;
47			}
48
49			public static function isWikify(string $text): bool
50			{
51			if (self::unWikify($text) !== $text) {
52			return true;
53			}
54
55			return false;
56			}
57
58			/**
59			* Get page titles from wiki encoded links.
60			* (but not others projects links like [[wikt:bla]].
61			*
62			* @param string $text
63			*
64			* @return array\|null
65			*/
66			public static function getWikilinkPages(string $text): ?array
67			{
68			if (preg_match_all('#\[\[([^:\|\]]+)(?:\\|[^\|\]]*)?]]#', $text, $matches) > 0) {
69			return $matches[1];
70			}
71
72			return null;
73			}
74
75			/**
76			* Strip external links (http://) from wiki text.
77			* "[http://google.fr Google]" => "Google"
78			* "bla [http://google.fr]" => "bla"
79			*
80			* @param string $text
81			*
82			* @return string
83			*/
84			public static function stripExternalLink(string $text): string
85			{
86			$text = preg_replace('#\[(https?://[^][<>\s"]+) ((?<= )[^\n\]]\|)\]#i', '${2}', $text);
87
88			return trim($text);
89			}
90
91			/**
92			* @param string $text
93			*
94			* @return bool
95			*/
96			public static function isCommented(string $text): bool
97			{
98			//ou preg_match('#<\!--(?!-->).*-->#s', '', $text); // plus lourd mais précis
99			return (preg_match('#<!--[^>]*-->#', $text) > 0) ? true : false;
100			}
101
102			/**
103			* Remove '<!--', '-->', and everything between.
104			* To avoid leaving blank lines, when a comment is both preceded
105			* and followed by a newline (ignoring spaces), trim leading and
106			* trailing spaces and one of the newlines.
107			* (c) WikiMedia /includes/parser/Sanitizer.php.
108			*
109			* @param string $text
110			*
111			* @return string
112			*/
113			public static function removeHTMLcomments(string $text)
114			{
115			while (false !== ($start = mb_strpos($text, '<!--'))) {
116			$end = mb_strpos($text, '-->', $start + 4);
117			if (false === $end) {
118			// Unterminated comment; bail out
119			break;
120			}
121			$end += 3;
122			// Trim space and newline if the comment is both
123			// preceded and followed by a newline
124			$spaceStart = max($start - 1, 0);
125			$spaceLen = $end - $spaceStart;
126			while (' ' === substr($text, $spaceStart, 1) && $spaceStart > 0) {
127			--$spaceStart;
128			++$spaceLen;
129			}
130			while (' ' === substr($text, $spaceStart + $spaceLen, 1)) {
131			++$spaceLen;
132			}
133			if ("\n" === substr($text, $spaceStart, 1)
134			&& "\n" === substr($text, $spaceStart + $spaceLen, 1)
135			) {
136			// Remove the comment, leading and trailing
137			// spaces, and leave only one newline.
138			$text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
139			} else {
140			// Remove just the comment.
141			$text = substr_replace($text, '', $start, $end - $start);
142			}
143			}
144
145			return $text;
146			}
147			}
148

Dispositif / Wikibot

Push — master ( 99b995...983f4d )

WikiTextUtil::isCommented() A

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like