WikiTextUtil::fixConcatenatedRefsSyntax() - Code Metrics - Inspection of "add DiffAdapter" - Dispositif/Wikibot - Measure and Improve Code Quality continuously with Scrutinizer

Passed

Push — master ( aea8b9...d57411 )

by Dispositif

created 2024-01-14 15:41 UTC

WikiTextUtil::fixConcatenatedRefsSyntax() A

↳ Parent: Project

Complexity

Conditions	1
Paths	1

Size

Total Lines	5
Code Lines	2

Duplication

Lines	0
Ratio	0 %

Code Coverage

Tests	0
CRAP Score	2

Importance

Changes

Metric	Value
eloc	2
c	0
b	0
f	0
dl	0
loc	5
ccs	0
cts	0
cp	0
rs	10
cc	1
nc	1
nop	1
crap	2

<?php
/*
 * This file is part of dispositif/wikibot application (@github)
 * 2019/2020 © Philippe/Irønie  <[email protected]>
 * For the full copyright and MIT license information, view the license file.
 */

declare(strict_types=1);

namespace App\Domain\Utils;

class WikiTextUtil extends TextUtil
{
    /**
     * todo {{ref}}
     *
     *
     * @return array [0=>['<ref>fu</ref>', 'fu'], 1=> ...]
     */
    public static function extractRefsAndListOfLinks(string $text): array
    {
        // s = "\n" include in "." // m = ^multiline$
        // Exclusion des imbrications
        if (!preg_match_all('#<ref[^>/]*>((?:(?!</ref>).)*)</ref>#ism', $text, $refs, PREG_SET_ORDER)) {
            return [];
        }
        $result = $refs;

        // extraction des liens externes
        // ^\* *(https?:\/\/[^ ]+[^ .])$
        if (preg_match_all('#^\* *(https?://[^ \n]+[^ \n.])\.? *\n#im', $text, $liensExternes, PREG_SET_ORDER)) {
            $result = [...$result, ...$liensExternes];
        }

        return $result;
    }

    /**
     * remove wiki encoding : italic, bold, links [ ] and [[fu|bar]] => bar
     * replace non-breaking spaces
     * replace {{lang|en|fubar}} => fubar.
     *
     * @param      $text
     * @param bool $stripcomment
     *
     * @return string
     */
    public static function unWikify(string $text, ?bool $stripcomment = true): string
    {
        if (true === $stripcomment) {
            $text = self::removeHTMLcomments($text);
        }

        $text = str_replace(
            ['[', ']', "'''", "''", ' '],
            ['', '', '', '', ' '],
            preg_replace(
                [
                    "#\[\[[^|\]]*\|([^]]*)]]#",
                    '#{{ ?(?:lang|langue) ?\|[^|]+\| ?(?:texte=)?([^{}=]+)(?:\|dir=rtl)?}}#i',
                    "#&[\w\d]{2,7};#",
                ],
                ['$1', '$1', ''],
                $text
            )
        );
        // {{Lien|Jeffrey Robinson}} => Jeffrey Robinson
        $text = preg_replace('#{{ ?lien ?\| ?([^|}]+) ?}}#i', '${1}', $text);

        return strip_tags($text, '<sup><sub>');
    }

    public static function isWikify(string $text): bool
    {
        return self::unWikify($text) !== $text;
    }

    /**
     * Generate wikilink from string.
     *
     *
     * @return string
     */
    public static function wikilink(string $label, ?string $page = null): string
    {
        $label = trim(str_replace('_', ' ', self::unWikify($label)));
        $page = ($page) ? trim(self::unWikify($page)) : null;

        // fu_bar => [[fu_bar]] / Fu, fu => [[fu]]
        if (empty($page) || self::str2WikiTitle($label) === self::str2WikiTitle($page)) {
            return '[['.$label.']]';
        }

        // fu, bar => [[Bar|fu]]
        return sprintf(
            '[[%s|%s]]',
            self::str2WikiTitle($page),
            $label
        );
    }

    /**
     * "fu_bar_ " => "Fu bar".
     *
     * @return string
     */
    private static function str2WikiTitle(string $str): string
    {
        return TextUtil::mb_ucfirst(trim(str_replace('_', ' ', $str)));
    }

    /**
     * Get page titles from wiki encoded links.
     * (but not others projects links like [[wikt:bla]].
     *
     *
     * @return array|null
     */
    public static function getWikilinkPages(string $text): ?array
    {
        if (preg_match_all('#\[\[([^:|\]]+)(?:\|[^|\]]*)?]]#', $text, $matches) > 0) {
            return $matches[1];
        }

        return null;
    }

    /**
     * Strip external links (http://) from wiki text.
     * "[http://google.fr Google]" => "Google"
     * "bla [http://google.fr]" => "bla"
     *
     *
     * @return string
     */
    public static function stripExternalLink(string $text): string
    {
        $text = preg_replace('#\[(https?://[^][<>\s"]+) *((?<= )[^\n\]]*|)\]#i', '${2}', $text);

        return trim($text);
    }

    /**
     * @return bool
     */
    public static function isCommented(string $text): bool
    {
        $text = str_replace('<!-- Paramètre obligatoire -->', '', $text);

        //ou preg_match('#<\!--(?!-->).*-->#s', '', $text); // plus lourd mais précis
        return preg_match('#<!--[^>]*-->#', $text) > 0;
    }

    /**
     * Remove '<!--', '-->', and everything between.
     * To avoid leaving blank lines, when a comment is both preceded
     * and followed by a newline (ignoring spaces), trim leading and
     * trailing spaces and one of the newlines.
     * (c) WikiMedia /includes/parser/Sanitizer.php.
     *
     *
     * @return string
     */
    public static function removeHTMLcomments(string $text)
    {
        while (false !== ($start = mb_strpos($text, '<!--'))) {
            $end = mb_strpos($text, '-->', $start + 4);
            if (false === $end) {
                // Unterminated comment; bail out
                break;
            }
            $end += 3;
            // Trim space and newline if the comment is both
            // preceded and followed by a newline
            $spaceStart = max($start - 1, 0);
            $spaceLen = $end - $spaceStart;
            while (' ' === substr($text, $spaceStart, 1) && $spaceStart > 0) {
                --$spaceStart;
                ++$spaceLen;
            }
            while (' ' === substr($text, $spaceStart + $spaceLen, 1)) {
                ++$spaceLen;
            }
            if ("\n" === substr($text, $spaceStart, 1)
                && "\n" === substr($text, $spaceStart + $spaceLen, 1)
            ) {
                // Remove the comment, leading and trailing
                // spaces, and leave only one newline.
                $text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
            } else {
                // Remove just the comment.
                $text = substr_replace($text, '', $start, $end - $start);
            }
        }

        return (string) $text;
    }

    /**
     * Strip the final point (".") as in <ref> ending.
     *
     *
     * @return string
     */
    public static function stripFinalPoint(string $str): string
    {
        if (str_ends_with($str, '.')) {
            return substr($str, 0, strlen($str) - 1);
        }

        return $str;
    }

    /**
     * Normalize URL for inclusion as a wiki-template value.
     * https://en.wikipedia.org/wiki/Template:Citation_Style_documentation/url
     *
     *
     * @return string
     */
    public static function normalizeUrlForTemplate(string $url): string
    {
        $searchReplace = [
            ' ' => '%20',
            '"' => '%22',
            "'''" => '%27%27%27',
            "''" => '%27%27',
            '<' => '%3c',
            '>' => '%3e',
            '[' => '%5b',
            ']' => '%5d',
            '{{' => '%7b%7b',
            '|' => '%7c',
            '}}' => '%7d%7d',
        ];

        return str_replace(array_keys($searchReplace), array_values($searchReplace), $url);
    }
}


1		<?php
2		/*
3		* This file is part of dispositif/wikibot application (@github)
4		* 2019/2020 © Philippe/Irønie <[email protected]>
5		* For the full copyright and MIT license information, view the license file.
6		*/
7
8		declare(strict_types=1);
9
10		namespace App\Domain\Utils;
11
12		class WikiTextUtil extends TextUtil
13		{
14		/**
15		* todo {{ref}}
16		*
17		*
18		* @return array [0=>['<ref>fu</ref>', 'fu'], 1=> ...]
19		*/
20		public static function extractRefsAndListOfLinks(string $text): array
21		{
22		// s = "\n" include in "." // m = ^multiline$
23		// Exclusion des imbrications
24		if (!preg_match_all('#<ref[^>/]>((?:(?!</ref>).))</ref>#ism', $text, $refs, PREG_SET_ORDER)) {
25		return [];
26		}
27		$result = $refs;
28
29		// extraction des liens externes
30		// ^\* *(https?:\/\/[^ ]+[^ .])$
31		if (preg_match_all('#^\* (https?://[^ \n]+[^ \n.])\.? \n#im', $text, $liensExternes, PREG_SET_ORDER)) {
32		$result = [...$result, ...$liensExternes];
33		}
34
35		return $result;
36		}
37
38		/**
39		* remove wiki encoding : italic, bold, links [ ] and [[fu\|bar]] => bar
40		* replace non-breaking spaces
41		* replace {{lang\|en\|fubar}} => fubar.
42	60	*
43		* @param $text
44	60	* @param bool $stripcomment
45	60	*
46		* @return string
47		*/
48	60	public static function unWikify(string $text, ?bool $stripcomment = true): string
49	60	{
50	60	if (true === $stripcomment) {
51	60	$text = self::removeHTMLcomments($text);
52		}
53	60
54		$text = str_replace(
55		['[', ']', "'''", "''", ' '],
56		['', '', '', '', ' '],
57	60	preg_replace(
58	60	[
59		"#\[\[[^\|\]]\\|([^]])]]#",
60		'#{{ ?(?:lang\|langue) ?\\|[^\|]+\\| ?(?:texte=)?([^{}=]+)(?:\\|dir=rtl)?}}#i',
61		"#&[\w\d]{2,7};#",
62	60	],
63		['$1', '$1', ''],
64	60	$text
65		)
66	60	);
67		// {{Lien\|Jeffrey Robinson}} => Jeffrey Robinson
68		$text = preg_replace('#{{ ?lien ?\\| ?([^\|}]+) ?}}#i', '${1}', $text);
69	24
70		return strip_tags($text, '<sup><sub>');
71	24	}
72	3
73		public static function isWikify(string $text): bool
74		{
75	21	return self::unWikify($text) !== $text;
76		}
77
78		/**
79		* Generate wikilink from string.
80		*
81		*
82		* @return string
83		*/
84		public static function wikilink(string $label, ?string $page = null): string
85		{
86	4	$label = trim(str_replace('_', ' ', self::unWikify($label)));
87		$page = ($page) ? trim(self::unWikify($page)) : null;
88	4
89	4	// fu_bar => [[fu_bar]] / Fu, fu => [[fu]]
90		if (empty($page) \|\| self::str2WikiTitle($label) === self::str2WikiTitle($page)) {
91		return '[['.$label.']]';
92	4	}
93	3
94		// fu, bar => [[Bar\|fu]]
95		return sprintf(
96		'[[%s\|%s]]',
97	2	self::str2WikiTitle($page),
98	2	$label
99	2	);
100	2	}
101
102		/**
103		* "fu_bar_ " => "Fu bar".
104		*
105		* @return string
106		*/
107	4	private static function str2WikiTitle(string $str): string
108		{
109	4	return TextUtil::mb_ucfirst(trim(str_replace('_', ' ', $str)));
110		}
111
112		/**
113		* Get page titles from wiki encoded links.
114		* (but not others projects links like [[wikt:bla]].
115		*
116		*
117		* @return array\|null
118		*/
119		public static function getWikilinkPages(string $text): ?array
120	1	{
121		if (preg_match_all('#\[\[([^:\|\]]+)(?:\\|[^\|\]]*)?]]#', $text, $matches) > 0) {
122	1	return $matches[1];
123	1	}
124
125		return null;
126		}
127
128		/**
129		* Strip external links (http://) from wiki text.
130		* "[http://google.fr Google]" => "Google"
131		* "bla [http://google.fr]" => "bla"
132		*
133		*
134		* @return string
135		*/
136		public static function stripExternalLink(string $text): string
137		{
138	2	$text = preg_replace('#\[(https?://[^][<>\s"]+) ((?<= )[^\n\]]\|)\]#i', '${2}', $text);
139
140	2	return trim($text);
141		}
142	2
143		/**
144		* @return bool
145		*/
146		public static function isCommented(string $text): bool
147		{
148		$text = str_replace('<!-- Paramètre obligatoire -->', '', $text);
149
150	24	//ou preg_match('#<\!--(?!-->).*-->#s', '', $text); // plus lourd mais précis
151		return preg_match('#<!--[^>]*-->#', $text) > 0;
152	24	}
153
154		/**
155	24	* Remove '<!--', '-->', and everything between.
156		* To avoid leaving blank lines, when a comment is both preceded
157		* and followed by a newline (ignoring spaces), trim leading and
158		* trailing spaces and one of the newlines.
159		* (c) WikiMedia /includes/parser/Sanitizer.php.
160		*
161		*
162		* @return string
163		*/
164		public static function removeHTMLcomments(string $text)
165		{
166		while (false !== ($start = mb_strpos($text, '<!--'))) {
167		$end = mb_strpos($text, '-->', $start + 4);
168		if (false === $end) {
169	61	// Unterminated comment; bail out
170		break;
171	61	}
172	2	$end += 3;
173	2	// Trim space and newline if the comment is both
174		// preceded and followed by a newline
175		$spaceStart = max($start - 1, 0);
176		$spaceLen = $end - $spaceStart;
177	2	while (' ' === substr($text, $spaceStart, 1) && $spaceStart > 0) {
178		--$spaceStart;
179		++$spaceLen;
180	2	}
181	2	while (' ' === substr($text, $spaceStart + $spaceLen, 1)) {
182	2	++$spaceLen;
183		}
184		if ("\n" === substr($text, $spaceStart, 1)
185		&& "\n" === substr($text, $spaceStart + $spaceLen, 1)
186	2	) {
187	1	// Remove the comment, leading and trailing
188		// spaces, and leave only one newline.
189	2	$text = substr_replace($text, "\n", $spaceStart, $spaceLen + 1);
190	2	} else {
191		// Remove just the comment.
192		$text = substr_replace($text, '', $start, $end - $start);
193		}
194		}
195
196		return (string) $text;
197	2	}
198
199		/**
200		* Strip the final point (".") as in <ref> ending.
201	61	*
202		*
203		* @return string
204		*/
205		public static function stripFinalPoint(string $str): string
206		{
207		if (str_ends_with($str, '.')) {
208		return substr($str, 0, strlen($str) - 1);
209		}
210
211		return $str;
212		}
213
214		/**
215		* Normalize URL for inclusion as a wiki-template value.
216		* https://en.wikipedia.org/wiki/Template:Citation_Style_documentation/url
217		*
218		*
219		* @return string
220		*/
221		public static function normalizeUrlForTemplate(string $url): string
222		{
223		$searchReplace = [
224		' ' => '%20',
225		'"' => '%22',
226		"'''" => '%27%27%27',
227		"''" => '%27%27',
228		'<' => '%3c',
229		'>' => '%3e',
230		'[' => '%5b',
231		']' => '%5d',
232		'{{' => '%7b%7b',
233		'\|' => '%7c',
234		'}}' => '%7d%7d',
235		];
236
237		return str_replace(array_keys($searchReplace), array_values($searchReplace), $url);
238		}
239		}
240

Dispositif / Wikibot

Push — master ( aea8b9...d57411 )

WikiTextUtil::fixConcatenatedRefsSyntax() A

Complexity

Size

Duplication

Code Coverage

Importance

Duplication Side-by-Side

Filter issues like