Passed
Push — master ( aea8b9...d57411 )
by Dispositif
03:07
created

WikiRefsFixer   A

Complexity

Total Complexity 12

Size/Duplication

Total Lines 107
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
wmc 12
eloc 36
c 1
b 0
f 1
dl 0
loc 107
rs 10

5 Methods

Rating   Name   Duplication   Size   Complexity  
A fixRefSpacingSyntax() 0 16 2
A hasSpecialRefsList() 0 25 4
A fixConcatenatedRefsSyntax() 0 21 2
A beforeSpecialRefsList() 0 12 3
A fixRefWikiSyntax() 0 9 1
1
<?php
2
/*
3
 * This file is part of dispositif/wikibot application (@github)
4
 * 2019/2020 © Philippe/Irønie  <[email protected]>
5
 * For the full copyright and MIT license information, view the license file.
6
 */
7
8
declare(strict_types=1);
9
10
namespace App\Domain\Utils;
11
12
class WikiRefsFixer
13
{
14
    public static function fixRefWikiSyntax(string $text): string
15
    {
16
        // Skip syntax fixes on article's bottom with special refs list.
17
        $initialWorkText = $workText = self::beforeSpecialRefsList($text);
18
19
        $workText = self::fixConcatenatedRefsSyntax($workText);
20
        $workText = self::fixRefSpacingSyntax($workText);
21
22
        return str_replace($initialWorkText, $workText, $text);
23
    }
24
25
    protected static function beforeSpecialRefsList(string $text): string
26
    {
27
        // regex option /s for dot matches carriage return
28
        if (preg_match('#(.*)\{\{ ?(?:Références|Références nombreuses|Références discussion)[\s\r\n\t]*\|[^\{\}]*(références|refs)[\s\r\n\t]*=#si', $text, $matches)) {
29
            return trim($matches[1]);
30
        }
31
32
        if (preg_match('#(.*)<references>.*<ref name=#si', $text, $matches)) {
33
            return trim($matches[1]);
34
        }
35
36
        return $text;
37
    }
38
39
    /**
40
     * Add reference separator {{,}} between reference tags. Not-cosmetic changes.
41
     * Example :
42
     * "<ref>A</ref><ref>B</ref>" => "<ref>A</ref>{{,}}<ref>B</ref>".
43
     * "<ref name="A" /> <ref>…" => "<ref name="A" />{{,}}<ref>…".
44
     * "{{Sfn|...}}<ref name=B>..." => "{{Sfn|...}}{{,}}<ref name=B>...".
45
     * Skip replacement if {Références | références=...} or {Références nombreuses} or {Références discussion}
46
     * TODO : {{note}}
47
     */
48
    public static function fixConcatenatedRefsSyntax(string $wikiText): string
49
    {
50
        if (self::hasSpecialRefsList($wikiText)) {
51
            return $wikiText;
52
        }
53
54
        // carriage return only fund between refs inside {{Références | références= ... }}
55
        // if carriage return </ref>\n<ref… outside that template, the ref-link appears on a new line => \n deleted
56
        $wikiText = preg_replace('#</ref>[\n\r\s]*<ref#', '</ref>{{,}}<ref', $wikiText);
57
        $wikiText = preg_replace('#(<ref name=[^\/\>\r\n]+/>)[\n\r\s]*<ref#', "$1" . '{{,}}<ref', $wikiText);
58
59
        // {{Sfn|...}}{{Sfn|...}}
60
        $wikiText = preg_replace('#(\{\{sfn[\s\|\n\r][^\{\}]+}})\s*(\{\{sfn[\s\|\n\r])#i', '$1{{,}}$2', $wikiText);
61
        // </ref>{{Sfn|...}} => </ref>{{,}}{{Sfn|...}}
62
        $wikiText = preg_replace('#</ref>\s*(\{\{sfn[\s\|\n\r])#i', '</ref>{{,}}$1', $wikiText);
63
        // <ref name="A" />{{Sfn|...}} => <ref name="A" />{{,}}{{Sfn|...}}
64
        $wikiText = preg_replace('#(<ref name=[^\/\>]+/>)\s*(\{\{sfn[\s\|\n\r])#i', "$1{{,}}$2", $wikiText);
65
        // {{Sfn|...}}<ref… => {{Sfn|...}}{{,}}<ref…
66
        $wikiText = preg_replace('#(\{\{sfn[\s\|\n\r][^\{\}]+}})\s*<ref#i', '$1{{,}}<ref', $wikiText);
67
68
        return $wikiText;
69
    }
70
71
    private static function hasSpecialRefsList(string $wikiText): bool
72
    {
73
        // Skip on the rare {Références nombreuses} et {Références discussion} and param "références=..."
74
        if (preg_match(
75
                '#\{\{ ?(Références nombreuses|Références discussion)[\s\r\n\t]*\|[^}]*(références|refs)[\s\r\n\t]*=#i',
76
                $wikiText
77
            ) > 0) {
78
            return true;
79
        }
80
81
        // old style <references><ref name=…>... </references>
82
        if (preg_match('#<references>[\s\r\n\t]*<ref name=#i', $wikiText) > 0) {
83
            return true;
84
        }
85
86
87
        // Skip if {{Références | références= ... }}
88
        if (preg_match(
89
                '#\{\{ ?Références[\s\r\n\t]*\|[^\}]*(références|refs)[\s\r\n\t]*=#i',
90
                $wikiText
91
            ) > 0) {
92
            return true;
93
        }
94
95
        return false;
96
    }
97
98
    /**
99
     * Fix some generic wiki syntax. Not-cosmetic changes.
100
     * todo : final . in ref
101
     * todo punctuation before ref ".<ref…" (mais exclure abbréviations "etc.<ref")
102
     */
103
    public static function fixRefSpacingSyntax(string $text): string
104
    {
105
        if (self::hasSpecialRefsList($text)) {
106
            return $text;
107
        }
108
        // space before ref. (also <ref name=A/>) todo : exlure 1234<ref>... qui complique lecture ?
109
110
        // spaces before ref, not preceded by "|", "=" (cosmetic in wiki-tables) or 0-9 number (reading confusion)
111
        // Regex : negative-lookbehind (?<!fubar) for not preceded by fubar
112
        $text = preg_replace('#(?<![\|\d=])\s+<ref>#', '<ref>', $text); // not cosmetic
113
        $text = preg_replace('#(?<![\|\d=])\s+(<ref name=[^>]+>)#', '$1', $text); // not cosmetic
114
115
        // space+punctuation after ref
116
        $text = preg_replace('#</ref>\s+\.#', '</ref>.', $text); // not cosmetic
117
118
        return preg_replace('#</ref>\s+\,#', '</ref>,', $text);
119
    }
120
}
121