1
|
|
|
<?php |
2
|
|
|
/* |
3
|
|
|
* This file is part of dispositif/wikibot application (@github) |
4
|
|
|
* 2019/2020 © Philippe/Irønie <[email protected]> |
5
|
|
|
* For the full copyright and MIT license information, view the license file. |
6
|
|
|
*/ |
7
|
|
|
|
8
|
|
|
declare(strict_types=1); |
9
|
|
|
|
10
|
|
|
namespace App\Domain\Utils; |
11
|
|
|
|
12
|
|
|
class WikiRefsFixer |
13
|
|
|
{ |
14
|
|
|
public static function fixRefWikiSyntax(string $text): string |
15
|
|
|
{ |
16
|
|
|
// Skip syntax fixes on article's bottom with special refs list. |
17
|
|
|
$initialWorkText = $workText = self::beforeSpecialRefsList($text); |
18
|
|
|
|
19
|
|
|
$workText = self::fixConcatenatedRefsSyntax($workText); |
20
|
|
|
$workText = self::fixRefSpacingSyntax($workText); |
21
|
|
|
|
22
|
|
|
return str_replace($initialWorkText, $workText, $text); |
23
|
|
|
} |
24
|
|
|
|
25
|
|
|
protected static function beforeSpecialRefsList(string $text): string |
26
|
|
|
{ |
27
|
|
|
// regex option /s for dot matches carriage return |
28
|
|
|
if (preg_match('#(.*)\{\{ ?(?:Références|Références nombreuses|Références discussion)[\s\r\n\t]*\|[^\{\}]*(références|refs)[\s\r\n\t]*=#si', $text, $matches)) { |
29
|
|
|
return trim($matches[1]); |
30
|
|
|
} |
31
|
|
|
|
32
|
|
|
if (preg_match('#(.*)<references>.*<ref name=#si', $text, $matches)) { |
33
|
|
|
return trim($matches[1]); |
34
|
|
|
} |
35
|
|
|
|
36
|
|
|
return $text; |
37
|
|
|
} |
38
|
|
|
|
39
|
|
|
/** |
40
|
|
|
* Add reference separator {{,}} between reference tags. Not-cosmetic changes. |
41
|
|
|
* Example : |
42
|
|
|
* "<ref>A</ref><ref>B</ref>" => "<ref>A</ref>{{,}}<ref>B</ref>". |
43
|
|
|
* "<ref name="A" /> <ref>…" => "<ref name="A" />{{,}}<ref>…". |
44
|
|
|
* "{{Sfn|...}}<ref name=B>..." => "{{Sfn|...}}{{,}}<ref name=B>...". |
45
|
|
|
* Skip replacement if {Références | références=...} or {Références nombreuses} or {Références discussion} |
46
|
|
|
* TODO : {{note}} |
47
|
|
|
*/ |
48
|
|
|
public static function fixConcatenatedRefsSyntax(string $wikiText): string |
49
|
|
|
{ |
50
|
|
|
if (self::hasSpecialRefsList($wikiText)) { |
51
|
|
|
return $wikiText; |
52
|
|
|
} |
53
|
|
|
|
54
|
|
|
// carriage return only fund between refs inside {{Références | références= ... }} |
55
|
|
|
// if carriage return </ref>\n<ref… outside that template, the ref-link appears on a new line => \n deleted |
56
|
|
|
$wikiText = preg_replace('#</ref>[\n\r\s]*<ref#', '</ref>{{,}}<ref', $wikiText); |
57
|
|
|
$wikiText = preg_replace('#(<ref name=[^\/\>\r\n]+/>)[\n\r\s]*<ref#', "$1" . '{{,}}<ref', $wikiText); |
58
|
|
|
|
59
|
|
|
// {{Sfn|...}}{{Sfn|...}} |
60
|
|
|
$wikiText = preg_replace('#(\{\{sfn[\s\|\n\r][^\{\}]+}})\s*(\{\{sfn[\s\|\n\r])#i', '$1{{,}}$2', $wikiText); |
61
|
|
|
// </ref>{{Sfn|...}} => </ref>{{,}}{{Sfn|...}} |
62
|
|
|
$wikiText = preg_replace('#</ref>\s*(\{\{sfn[\s\|\n\r])#i', '</ref>{{,}}$1', $wikiText); |
63
|
|
|
// <ref name="A" />{{Sfn|...}} => <ref name="A" />{{,}}{{Sfn|...}} |
64
|
|
|
$wikiText = preg_replace('#(<ref name=[^\/\>]+/>)\s*(\{\{sfn[\s\|\n\r])#i', "$1{{,}}$2", $wikiText); |
65
|
|
|
// {{Sfn|...}}<ref… => {{Sfn|...}}{{,}}<ref… |
66
|
|
|
$wikiText = preg_replace('#(\{\{sfn[\s\|\n\r][^\{\}]+}})\s*<ref#i', '$1{{,}}<ref', $wikiText); |
67
|
|
|
|
68
|
|
|
return $wikiText; |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
private static function hasSpecialRefsList(string $wikiText): bool |
72
|
|
|
{ |
73
|
|
|
// Skip on the rare {Références nombreuses} et {Références discussion} and param "références=..." |
74
|
|
|
if (preg_match( |
75
|
|
|
'#\{\{ ?(Références nombreuses|Références discussion)[\s\r\n\t]*\|[^}]*(références|refs)[\s\r\n\t]*=#i', |
76
|
|
|
$wikiText |
77
|
|
|
) > 0) { |
78
|
|
|
return true; |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
// old style <references><ref name=…>... </references> |
82
|
|
|
if (preg_match('#<references>[\s\r\n\t]*<ref name=#i', $wikiText) > 0) { |
83
|
|
|
return true; |
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
|
87
|
|
|
// Skip if {{Références | références= ... }} |
88
|
|
|
if (preg_match( |
89
|
|
|
'#\{\{ ?Références[\s\r\n\t]*\|[^\}]*(références|refs)[\s\r\n\t]*=#i', |
90
|
|
|
$wikiText |
91
|
|
|
) > 0) { |
92
|
|
|
return true; |
93
|
|
|
} |
94
|
|
|
|
95
|
|
|
return false; |
96
|
|
|
} |
97
|
|
|
|
98
|
|
|
/** |
99
|
|
|
* Fix some generic wiki syntax. Not-cosmetic changes. |
100
|
|
|
* todo : final . in ref |
101
|
|
|
* todo punctuation before ref ".<ref…" (mais exclure abbréviations "etc.<ref") |
102
|
|
|
*/ |
103
|
|
|
public static function fixRefSpacingSyntax(string $text): string |
104
|
|
|
{ |
105
|
|
|
if (self::hasSpecialRefsList($text)) { |
106
|
|
|
return $text; |
107
|
|
|
} |
108
|
|
|
// space before ref. (also <ref name=A/>) todo : exlure 1234<ref>... qui complique lecture ? |
109
|
|
|
|
110
|
|
|
// spaces before ref, not preceded by "|", "=" (cosmetic in wiki-tables) or 0-9 number (reading confusion) |
111
|
|
|
// Regex : negative-lookbehind (?<!fubar) for not preceded by fubar |
112
|
|
|
$text = preg_replace('#(?<![\|\d=])\s+<ref>#', '<ref>', $text); // not cosmetic |
113
|
|
|
$text = preg_replace('#(?<![\|\d=])\s+(<ref name=[^>]+>)#', '$1', $text); // not cosmetic |
114
|
|
|
|
115
|
|
|
// space+punctuation after ref |
116
|
|
|
$text = preg_replace('#</ref>\s+\.#', '</ref>.', $text); // not cosmetic |
117
|
|
|
|
118
|
|
|
return preg_replace('#</ref>\s+\,#', '</ref>,', $text); |
119
|
|
|
} |
120
|
|
|
} |
121
|
|
|
|