|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* This file is part of Peachy MediaWiki Bot API |
|
5
|
|
|
* |
|
6
|
|
|
* Peachy is free software: you can redistribute it and/or modify |
|
7
|
|
|
* it under the terms of the GNU General Public License as published by |
|
8
|
|
|
* the Free Software Foundation, either version 3 of the License, or |
|
9
|
|
|
* (at your option) any later version. |
|
10
|
|
|
* |
|
11
|
|
|
* This program is distributed in the hope that it will be useful, |
|
12
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
13
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
14
|
|
|
* GNU General Public License for more details. |
|
15
|
|
|
* |
|
16
|
|
|
* You should have received a copy of the GNU General Public License |
|
17
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
|
18
|
|
|
*/ |
|
19
|
|
|
|
|
20
|
|
|
/** |
|
21
|
|
|
* PeachyAWBFunctions class. |
|
22
|
|
|
* |
|
23
|
|
|
* It consists of various static functions used for the PeachyAWB script |
|
24
|
|
|
* Much of the code is derived from Pywikipedia and AWB, both under the GPL |
|
25
|
|
|
* |
|
26
|
|
|
*/ |
|
27
|
|
|
class PeachyAWBFunctions { |
|
28
|
|
|
|
|
29
|
|
|
public static $html_tags = array( |
|
30
|
|
|
# Tags that must be closed |
|
31
|
|
|
'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', |
|
32
|
|
|
'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', |
|
33
|
|
|
'strike', 'strong', 'tt', 'var', 'div', 'center', |
|
34
|
|
|
'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', |
|
35
|
|
|
'ruby', 'rt', 'rb', 'rp', 'p', 'span', 'u', 'abbr', |
|
36
|
|
|
# Single |
|
37
|
|
|
'br', 'hr', 'li', 'dt', 'dd', |
|
38
|
|
|
# Elements that cannot have close tags |
|
39
|
|
|
'br', 'hr', |
|
40
|
|
|
# Tags that can be nested--?? |
|
41
|
|
|
'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', |
|
42
|
|
|
'dl', 'font', 'big', 'small', 'sub', 'sup', 'span', |
|
43
|
|
|
# Can only appear inside table, we will close them |
|
44
|
|
|
'td', 'th', 'tr', |
|
45
|
|
|
# Tags used by list |
|
46
|
|
|
'ul', 'ol', |
|
47
|
|
|
# Tags that can appear in a list |
|
48
|
|
|
'li', |
|
49
|
|
|
## pairs |
|
50
|
|
|
# "b", "i", "u", "font", "big", "small", "sub", "sup", "h1", |
|
51
|
|
|
# "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s", "span", |
|
52
|
|
|
# "strike", "strong", "tt", "var", "div", "center", |
|
53
|
|
|
# "blockquote", "ol", "ul", "dl", "table", "caption", "pre", |
|
54
|
|
|
# "ruby", "rt" , "rb" , "rp", |
|
55
|
|
|
## single |
|
56
|
|
|
# "br", "p", "hr", "li", "dt", "dd", |
|
57
|
|
|
## nest |
|
58
|
|
|
# "table", "tr", "td", "th", "div", "blockquote", "ol", "ul", |
|
59
|
|
|
# "dl", "font", "big", "small", "sub", "sup", |
|
60
|
|
|
## table tags |
|
61
|
|
|
# "td", "th", "tr", |
|
62
|
|
|
|
|
63
|
|
|
); |
|
64
|
|
|
|
|
65
|
|
|
public static $html_attrs = array( |
|
66
|
|
|
"title", "align", "lang", "dir", "width", "height", |
|
67
|
|
|
"bgcolor", "clear", "noshade", |
|
68
|
|
|
"cite", "size", "face", "color", |
|
69
|
|
|
"type", "start", "value", "compact", |
|
70
|
|
|
#/* For various lists, mostly deprecated but safe */ |
|
71
|
|
|
"summary", "width", "border", "frame", "rules", |
|
72
|
|
|
"cellspacing", "cellpadding", "valign", "char", |
|
73
|
|
|
"charoff", "colgroup", "col", "span", "abbr", "axis", |
|
74
|
|
|
"headers", "scope", "rowspan", "colspan", |
|
75
|
|
|
"id", "class", "name", "style" |
|
76
|
|
|
); |
|
77
|
|
|
|
|
78
|
|
|
public static $html_colors = array( |
|
79
|
|
|
'#F0FFFF' => 'azure', '#F5F5DC' => 'beige', '#FFE4C4' => 'bisque', '#000000' => 'black', '#0000FF' => 'blue', |
|
80
|
|
|
'#A52A2A' => 'brown', '#FF7F50' => 'coral', '#FFF8DC' => 'cornsilk', '#DC143C' => 'crimson', |
|
81
|
|
|
'#00FFFF' => 'cyan', '#00008B' => 'darkBlue', '#008B8B' => 'darkCyan', '#A9A9A9' => 'darkGray', |
|
82
|
|
|
'#8B0000' => 'darkRed', '#FF1493' => 'deepPink', '#696969' => 'dimGray', '#FF00FF' => 'fuchsia', |
|
83
|
|
|
'#FFD700' => 'gold', '#808080' => 'gray', '#008000' => 'green', '#F0FFF0' => 'honeyDew', '#FF69B4' => 'hotPink', |
|
84
|
|
|
'#4B0082' => 'indigo', '#FFFFF0' => 'ivory', '#F0E68C' => 'khaki', '#E6E6FA' => 'lavender', '#00FF00' => 'lime', |
|
85
|
|
|
'#FAF0E6' => 'linen', '#800000' => 'maroon', '#FFE4B5' => 'moccasin', '#000080' => 'navy', |
|
86
|
|
|
'#FDF5E6' => 'oldLace', '#808000' => 'olive', '#FFA500' => 'orange', '#DA70D6' => 'orchid', '#CD853F' => 'peru', |
|
87
|
|
|
'#FFC0CB' => 'pink', '#DDA0DD' => 'plum', '#800080' => 'purple', '#FF0000' => 'red', '#FA8072' => 'salmon', |
|
88
|
|
|
'#2E8B57' => 'seaGreen', '#FFF5EE' => 'seaShell', '#A0522D' => 'sienna', '#C0C0C0' => 'silver', |
|
89
|
|
|
'#87CEEB' => 'skyBlue', '#FFFAFA' => 'snow', '#D2B48C' => 'tan', '#008080' => 'teal', '#D8BFD8' => 'thistle', |
|
90
|
|
|
'#FF6347' => 'tomato', '#EE82EE' => 'violet', '#F5DEB3' => 'wheat', '#FFFFFF' => 'white', '#FFFF00' => 'yellow', |
|
91
|
|
|
); |
|
92
|
|
|
|
|
93
|
|
|
public static $stub_search = '[Ss]tub'; |
|
94
|
|
|
|
|
95
|
|
|
public static $interwiki_map = array(); |
|
96
|
|
|
|
|
97
|
|
|
public static $typo_list = array(); |
|
98
|
|
|
|
|
99
|
|
|
public static function fixVars( Wiki $wiki ) { |
|
100
|
|
|
$interwiki = $wiki->siteinfo( array( 'interwikimap' ) ); |
|
101
|
|
|
self::$interwiki_map = $interwiki['query']['interwikimap']; |
|
102
|
|
|
} |
|
103
|
|
|
|
|
104
|
|
|
public static function fixCitations( $text ) { |
|
105
|
|
|
|
|
106
|
|
|
//merge all variant of cite web |
|
107
|
|
|
$text = preg_replace( '/\{\{\s*(cite[_ \-]*(url|web|website)|Web[_ \-]*(citation|cite|reference|reference[_ ]4))(?=\s*\|)/i', '{{cite web', $text ); |
|
108
|
|
|
|
|
109
|
|
|
//Remove formatting on certian parameters |
|
110
|
|
|
$text = preg_replace( "/(\|\s*(?:agency|author|first|format|language|last|location|month|publisher|work|year)\s*=\s*)(''|'''|''''')((?:\[\[[^][|]+|\[\[|)[][\w\s,.~!`\"]+)(''+)(?=\s*\|[\w\s]+=|\s*\}\})/", '$1$3', $text ); |
|
111
|
|
|
|
|
112
|
|
|
//Unlink PDF in format parameters |
|
113
|
|
|
$text = preg_replace( '/(\|\s*format\s*=\s*)\[\[(adobe|portable|document|file|format|pdf|\.|\s|\(|\)|\|)+\]\]/i', '$1PDF', $text ); |
|
114
|
|
|
$text = preg_replace( '/(\|\s*format\s*=\s*)(\s*\.?(adobe|portable|document|file|format|pdf|\(|\)))+?(\s*[|}])/i', '$1PDF$4', $text ); |
|
115
|
|
|
|
|
116
|
|
|
//No |format=HTML says {{cite web/doc}} |
|
117
|
|
|
$text = preg_replace( '/(\{\{cite[^{}]+)\|\s*format\s*=\s*(\[\[[^][|]+\||\[\[|)(\]\]| |html?|world|wide|web)+\s*(?=\||\}\})/i', '$1', $text ); |
|
118
|
|
|
|
|
119
|
|
|
//Fix accessdate tags [[WP:AWB/FR#Fix accessdate tags]] |
|
120
|
|
|
$text = preg_replace( |
|
121
|
|
|
array( |
|
122
|
|
|
'/(\|\s*)a[ces]{3,8}date(\s*=\s*)(?=[^{|}]*20\d\d|\}\})/', |
|
123
|
|
|
'/accessdate(\s*=\s*)\[*(200\d)[/_\-](\d{2})[/_\-](\d{2})\]*/', |
|
124
|
|
|
'/(\|\s*)a[cs]*es*mou*nthday(\s*=\s*)/', |
|
125
|
|
|
'/(\|\s*)a[cs]*es*daymou*nth(\s*=\s*)/', |
|
126
|
|
|
'/(\|\s*)accessdate(\s*=\s*[0-3]?[0-9] +(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)([^][<>}{]*accessyear[\s=]+20\d\d)/', |
|
127
|
|
|
'/(\|\s*)accessdate(\s*=\s*(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w* +[0-3]?[0-9])([^][<>}{]*accessyear[\s=]+20\d\d)/', |
|
128
|
|
|
'/(\|\s*)accessdaymonth(\s*=\s*)\s*([^{|}<>]+?)\s*(\|[^][<>}{]*accessyear[\s=]+)(20\d\d)/', |
|
129
|
|
|
'/(\|\s*)accessmonthday(\s*=\s*)\s*([^{|}<>]+?)\s*(\|[^][<>}{]*accessyear[\s=]+)(20\d\d)/', |
|
130
|
|
|
), |
|
131
|
|
|
array( |
|
132
|
|
|
'$1accessdate$2', |
|
133
|
|
|
'accessdate$1$2-$3-$4', |
|
134
|
|
|
'$1accessmonthday$2', |
|
135
|
|
|
'$1accessdaymonth$2', |
|
136
|
|
|
'$1accessdaymonth$2$3', |
|
137
|
|
|
'$1accessmonthday$2$3', |
|
138
|
|
|
'$1accessdate$2$3 $5', |
|
139
|
|
|
'$1accessdate$2$3, $5', |
|
140
|
|
|
), |
|
141
|
|
|
$text |
|
142
|
|
|
); |
|
143
|
|
|
|
|
144
|
|
|
//Fix improper dates |
|
145
|
|
|
$text = preg_replace( |
|
146
|
|
|
array( |
|
147
|
|
|
'/(\{\{cit[ea][^{}]+\|\s*date\s*=\s*\d{2}[/\-.]\d{2}[/\-.])([5-9]\d)(?=\s*[|}])/i', |
|
148
|
|
|
'/(\{\{cit[ea][^{}]+\|\s*date\s*=\s*)(0[1-9]|1[012])[/\-.](1[3-9]|2\d|3[01])[/\-.](19\d\d|20\d\d)(?=\s*[|}])/i', |
|
149
|
|
|
'/(\{\{cit[ea][^{}]+\|\s*date\s*=\s*)(1[3-9]|2\d|3[01])[/\-.](0[1-9]|1[012])[/\-.](19\d\d|20\d\d)(?=\s*[|}])/i', |
|
150
|
|
|
), |
|
151
|
|
|
array( |
|
152
|
|
|
'${1}19$2', |
|
153
|
|
|
'$1$4-$2-$3', |
|
154
|
|
|
'$1$4-$3-$2', |
|
155
|
|
|
), |
|
156
|
|
|
|
|
157
|
|
|
$text |
|
158
|
|
|
); |
|
159
|
|
|
|
|
160
|
|
|
//Fix URLS lacking http:// |
|
161
|
|
|
$text = preg_replace( '/(\|\s*url\s*=\s*)([0-9a-z.\-]+\.[a-z]{2,4}/[^][{|}:\s"]\s*[|}])/', '$1http://$2', $text ); |
|
162
|
|
|
|
|
163
|
|
|
//Fix {{citation|title=[url title]}} |
|
164
|
|
|
$text = preg_replace( '/(\{\{cit[ea][^{}]*?)(\s*\|\s*)(?:url|title)(\s*=\s*)\[([^][<>\s"]*) +([^]\n]+)\](?=[|}])/i', '$1$2url$3$4$2title$3$5', $text ); |
|
165
|
|
|
|
|
166
|
|
|
return $text; |
|
167
|
|
|
|
|
168
|
|
|
} |
|
169
|
|
|
|
|
170
|
|
|
public static function fixDateTags( $text ) { |
|
171
|
|
|
|
|
172
|
|
|
$text = preg_replace( '/\{\{\s*(?:template:)?\s*(?:wikify(?:-date)?|wfy|wiki)(\s*\|\s*section)?\s*\}\}/iS', "{{Wikify$1|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
173
|
|
|
$text = preg_replace( '/\{\{(template:)?(Clean( ?up)?|CU|Tidy)\}\}/iS', "{{Cleanup|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
174
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(Linkless|Orphan)\}\}/iS', "{{Orphan|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
175
|
|
|
$text = preg_replace( '/\{\{(template:)?(Unreferenced(sect)?|add references|cite[ -]sources?|cleanup-sources?|needs? references|no sources|no references?|not referenced|references|unref|unsourced)\}\}/iS', "{{Unreferenced|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
176
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(Uncategori[sz]ed|Uncat|Classify|Category needed|Catneeded|categori[zs]e|nocats?)\}\}/iS', "{{Uncategorized|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
177
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(Trivia2?|Too ?much ?trivia|Trivia section|Cleanup-trivia)\}\}/iS', "{{Trivia|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
178
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(deadend|DEP)\}\}/iS', "{{Deadend|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
179
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(copyedit|g(rammar )?check|copy-edit|cleanup-copyedit|cleanup-english)\}\}/iS', "{{Copyedit|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
180
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(sources|refimprove|not verified)\}\}/iS', "{{Refimprove|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
181
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(Expand)\}\}/iS', "{{Expand|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
182
|
2 |
|
//$text = preg_replace( '/\{\{(?:\s*[Tt]emplate:)?(\s*(?:[Cc]n|[Ff]act|[Pp]roveit|[Cc]iteneeded|[Uu]ncited|[Cc]itation needed)\s*(?:\|[^{}]+(?\<!\|\s*date\s*=[^{}]+))?)\}\}/iS', "{{$1|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
183
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(COI|Conflict of interest|Selfpromotion)\}\}/iS', "{{COI|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
184
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(Intro( |-)?missing|Nointro(duction)?|Lead missing|No ?lead|Missingintro|Opening|No-intro|Leadsection|No lead section)\}\}/iS', "{{Intro missing|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
185
|
2 |
|
$text = preg_replace( '/\{\{(template:)?([Pp]rimary ?[Ss]ources?|[Rr]eliable ?sources)\}\}/iS', "{{Primary sources|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
|
186
|
|
|
|
|
187
|
2 |
|
//Other template fixes |
|
188
|
2 |
|
$text = preg_replace( '/\{\{(?:Template:)?(Dab|Disamb|Disambiguation)\}\}/iS', "{{Disambig}}", $text ); |
|
189
|
2 |
|
$text = preg_replace( '/\{\{(?:Template:)?(Bio-dab|Hndisambig)/iS', "{{Hndis", $text ); |
|
190
|
|
|
$text = preg_replace( '/\{\{(?:Template:)?(Prettytable|Prettytable100)\}\}/iS', "{{subst:Prettytable}}", $text ); |
|
191
|
|
|
$text = preg_replace( '/\{\{(?:[Tt]emplate:)?((?:BASE)?PAGENAMEE?\}\}|[Ll]ived\||[Bb]io-cats\|)/iS', "{{subst:$1", $text ); |
|
192
|
2 |
|
$text = preg_replace( '/({{\s*[Aa]rticle ?issues\s*(?:\|[^{}]*|\|)\s*[Dd]o-attempt\s*=\s*)[^{}\|]+\|\s*att\s*=\s*([^{}\|]+)(?=\||}})/iS', "$1$2", $text ); |
|
193
|
2 |
|
$text = preg_replace( '/({{\s*[Aa]rticle ?issues\s*(?:\|[^{}]*|\|)\s*[Cc]opyedit\s*)for\s*=\s*[^{}\|]+\|\s*date(\s*=[^{}\|]+)(?=\||}})/iS', "$1$2", $text ); |
|
194
|
2 |
|
$text = preg_replace( '/\{\{[Aa]rticle ?issues(?:\s*\|\s*(?:section|article)\s*=\s*[Yy])?\s*\}\}/iS', "", $text ); |
|
195
|
2 |
|
$text = preg_replace( '/\{\{[Cc]ommons\|\s*[Cc]ategory:\s*([^{}]+?)\s*\}\}/iS', "{{Commons category|$1}}", $text ); |
|
196
|
2 |
|
$text = preg_replace( '/(?!{{[Cc]ite wikisource)(\{\{\s*(?:[Cc]it[ae]|[Aa]rticle ?issues)[^{}]*)\|\s*(\}\}|\|)/iS', "$1$2", $text ); |
|
197
|
2 |
|
$text = preg_replace( '/({{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)\s*=\s*([^\|}{]+?)\s*\|((?:[^{}]*?\|)?\s*)\2(\s*=\s*)\3(\s*(\||\}\}))/iS', "$1$4$2$5$3$6", $text ); |
|
198
|
2 |
|
$text = preg_replace( '/(\{\{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)(\s*=\s*[^\|}{]+(?:\|[^{}]+?)?)\|\s*\2\s*=\s*(\||\}\})/iS', "$1$2$3$4", $text ); |
|
199
|
2 |
|
$text = preg_replace( '/(\{\{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)\s*=\s*\|\s*((?:[^{}]+?\|)?\s*\2\s*=\s*[^\|}{\s])/iS', "$1$3", $text ); |
|
200
|
2 |
|
$text = preg_replace( '/{{\s*(?:[Cc]n|[Ff]act|[Pp]roveit|[Cc]iteneeded|[Uu]ncited)(?=\s*[\|}])/S', "{{Citation needed", $text ); |
|
201
|
2 |
|
|
|
202
|
2 |
|
return $text; |
|
203
|
2 |
|
} |
|
204
|
2 |
|
|
|
205
|
|
|
public static function fixHTML( $text ) { |
|
206
|
2 |
|
|
|
207
|
|
|
$text = preg_replace( '/(\n\{\| class="wikitable[^\n]+\n\|-[^\n]*)(bgcolor\W+CCC+|background\W+ccc+)(?=\W+\n!)/mi', '$1', $text ); |
|
208
|
|
|
|
|
209
|
|
|
$text = preg_replace( '/(\n([^<\n]|<(?!br[^>]*>))+\w+[^\w\s<>]*)<br[ /]*>(?=\n[*#:;]|\n?<div|\n?<blockquote)/mi', '$1', $text ); |
|
210
|
|
|
|
|
211
|
|
|
$text = preg_replace( |
|
212
|
|
|
array( |
|
213
|
|
|
'/(<br[^</>]*>)\n?</br>/mi', |
|
214
|
|
|
'/<[/]?br([^{/}<>]*?/?)>/mi', |
|
215
|
|
|
'/<br\s\S*clear\S*(all|both)\S*[\s/]*>/i', |
|
216
|
|
|
'/<br\s\S*clear\S*(left|right)\S*[\s/]*>/', |
|
217
|
|
|
), |
|
218
|
|
|
array( |
|
219
|
|
|
'$1', |
|
220
|
|
|
'<br$1>', |
|
221
|
|
|
'{{-}}', |
|
222
|
|
|
'{{clear$1}}' |
|
223
|
|
|
), |
|
224
|
|
|
$text |
|
225
|
|
|
); |
|
226
|
|
|
|
|
227
|
|
|
$text = preg_replace( '/(<font\b[^<>]*)> *\n?<font\b([^<>]*>)((?:[^<]|<(?!/?font))*?</font> *\n?)</font>/mi', '$1$2$3', $text ); |
|
228
|
|
|
|
|
229
|
|
|
$text = preg_replace( '/<font ([^<>]*)>\[\[([^[\]{|}]+)\|([^[\]\n]*?)\]\]</font>/mi', '[[$2|<font $1>$3</font>]]', $text ); |
|
230
|
|
|
|
|
231
|
|
|
$text = preg_replace( '/<font(( +style="[^"]+")+)>(?!\[\[)((?:[^<]|<(?!/?font))*?)(?<!\]\])</font>/mi', '<span$1>$3</span>', $text ); |
|
232
|
|
|
|
|
233
|
|
|
return $text; |
|
234
|
|
|
|
|
235
|
|
|
} |
|
236
|
|
|
|
|
237
|
|
|
public static function fixHyperlinking( $text ) { |
|
238
|
|
|
|
|
239
|
|
|
$text = preg_replace( '/(http:\/\/[^][<>\s"|])(&client=firefox-a|<=)(?=[][<>\s"|&])/', '$1', $text ); |
|
240
|
|
|
|
|
241
|
|
|
$text = str_replace( '[{{SERVER}}{{localurl:', '[{{fullurl:', $text ); |
|
242
|
|
|
|
|
243
|
|
|
$text = preg_replace( '/[(](?:see|) *(http:\/\/[^][<>"\s(|)]+[\w=\/&])\s?[)]/i', '<$1>', $text ); |
|
244
|
|
|
|
|
245
|
|
|
$text = preg_replace( '/\[\[(https?:\/\/[^\]\n]+?)\]\]/', '[$1]', $text ); |
|
246
|
|
|
$text = preg_replace( '/\[\[(https?:\/\/.+?)\]/', '[$1]', $text ); |
|
247
|
|
|
|
|
248
|
|
|
$text = preg_replace( '/\[\[(:?)Image:([^][{|}]+\.(pdf|midi?|ogg|ogv|xcf))(?=\||\]\])/i', '[[$1File:$2', $text ); |
|
249
|
|
|
|
|
250
|
|
|
$text = preg_replace( |
|
251
|
|
|
array( |
|
252
|
|
|
'/(http:\/* *){2,}(?=[a-z0-9:.\-]+\/)/i', |
|
253
|
|
|
"/(\[\w+:\/\/[^][<>\"\s]+?)''/i", |
|
254
|
|
|
'/\[\n*(\w+:\/\/[^][<>"\s]+ *(?:(?<= )[^\n\]<>]*?|))\n([^[\]<>{}\n=@\/]*?) *\n*\]/i', |
|
255
|
|
|
'/\[(\w+:\/\/[^][<>"\s]+) +([Cc]lick here|[Hh]ere|\W|→|[ -\/;-@]) *\]/i', |
|
256
|
|
|
), |
|
257
|
|
|
array( |
|
258
|
|
|
'http://', |
|
259
|
|
|
"$1 ''", |
|
260
|
|
|
'[$1 $2]', |
|
261
|
|
|
'$2 [$1]', |
|
262
|
|
|
), |
|
263
|
|
|
$text |
|
264
|
|
|
); |
|
265
|
|
|
|
|
266
|
|
|
$text = preg_replace( '/(\[\[(?:File|Image):[^][<>{|}]+)#(|filehistory|filelinks|file)(?=[\]|])/i', '$1', $text ); |
|
267
|
|
|
|
|
268
|
|
|
$text = preg_replace( '/\[http://(www\.toolserver\.org|toolserver\.org|tools\.wikimedia\.org|tools\.wikimedia\.de)/([^][<>"\s;?]*)\?? ([^]\n]+)\]/', '[[tools:$2|$3]]', $text ); |
|
269
|
|
|
|
|
270
|
|
|
return $text; |
|
271
|
|
|
|
|
272
|
|
|
} |
|
273
|
|
|
|
|
274
|
|
|
/** |
|
275
|
|
|
* @Fixme Method getWiki() not found. |
|
276
|
|
|
* |
|
277
|
|
|
* @see getWiki() |
|
278
|
|
|
* |
|
279
|
|
|
* @param string $text |
|
280
|
|
|
* @param string $title |
|
281
|
|
|
* @return mixed |
|
282
|
|
|
*/ |
|
283
|
|
|
public static function fixTypos( $text, $title ) { |
|
284
|
|
|
|
|
285
|
|
|
if( !count( self::$typo_list ) ) { |
|
286
|
|
|
global $script; |
|
287
|
|
|
|
|
288
|
|
|
$str = $script->getWiki()->initPage( 'Wikipedia:AutoWikiBrowser/Typos' )->get_text(); |
|
289
|
|
|
|
|
290
|
|
|
foreach( explode( "\n", $str ) as $line ){ |
|
291
|
|
|
if( substr( $line, 0, 5 ) == "<Typo" ) { |
|
292
|
|
|
|
|
293
|
|
|
preg_match( '/\<Typo word=\"(.*)\" find=\"(.*)\" replace=\"(.*)\" \/\>/', $line, $m ); |
|
294
|
|
|
|
|
295
|
|
|
if( !empty( $m[2] ) && !empty( $m[3] ) ) { |
|
296
|
|
|
self::$typo_list[] = array( 'word' => $m[1], 'find' => $m[2], 'replace' => $m[3] ); |
|
297
|
|
|
} |
|
298
|
|
|
//<Typo word="the first time" find="\b(T|t)he\s+(very\s+)?fr?ist\s+time\b" replace="$1he $2first time" /> |
|
299
|
|
|
} |
|
300
|
|
|
} |
|
301
|
|
|
|
|
302
|
|
|
} |
|
303
|
|
|
|
|
304
|
|
|
$run_times = array(); |
|
305
|
|
|
|
|
306
|
|
|
shuffle( self::$typo_list ); //So that if it quits randomly, it will give equal prejudice to each typo. |
|
307
|
|
|
|
|
308
|
|
|
if( !count( self::$typo_list ) || preg_match( '/133t|-ology|\\(sic\\)|\\[sic\\]|\\[\'\'sic\'\'\\]|\\{\\{sic\\}\\}|spellfixno/', $text ) ) return $text; |
|
309
|
|
|
|
|
310
|
|
|
foreach( self::$typo_list as $typo ){ |
|
311
|
|
|
//Skip typos in links |
|
312
|
|
|
$time = microtime( 1 ); |
|
313
|
|
|
|
|
314
|
|
|
if( @preg_match( '/' . $typo['find'] . '/S', $title ) ) continue; //Skip if matches title |
|
315
|
|
|
|
|
316
|
|
|
if( @preg_match( "/(\{\{|\[\[)[^\[\]\r\n\|\{\}]*?" . $typo['find'] . "[^\[\]\r\n\|\{\}]*?(\]\]|\}\})/S", $text ) ) continue; |
|
317
|
|
|
|
|
318
|
|
|
$text2 = @preg_replace( '/' . $typo['find'] . '/S', $typo['replace'], $text ); |
|
319
|
|
|
if( !is_null( $text2 ) ) $text = $text2; |
|
320
|
|
|
$run_times[$typo['word']] = number_format( microtime( 1 ) - $time, 2 ); |
|
321
|
|
|
} |
|
322
|
|
|
return $text; |
|
323
|
|
|
} |
|
324
|
|
|
} |
|
325
|
|
|
|