1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/** |
4
|
|
|
* This file is part of Peachy MediaWiki Bot API |
5
|
|
|
* |
6
|
|
|
* Peachy is free software: you can redistribute it and/or modify |
7
|
|
|
* it under the terms of the GNU General Public License as published by |
8
|
|
|
* the Free Software Foundation, either version 3 of the License, or |
9
|
|
|
* (at your option) any later version. |
10
|
|
|
* |
11
|
|
|
* This program is distributed in the hope that it will be useful, |
12
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
13
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
14
|
|
|
* GNU General Public License for more details. |
15
|
|
|
* |
16
|
|
|
* You should have received a copy of the GNU General Public License |
17
|
|
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. |
18
|
|
|
*/ |
19
|
|
|
|
20
|
|
|
/** |
21
|
|
|
* PeachyAWBFunctions class. |
22
|
|
|
* |
23
|
|
|
* It consists of various static functions used for the PeachyAWB script |
24
|
|
|
* Much of the code is derived from Pywikipedia and AWB, both under the GPL |
25
|
|
|
* |
26
|
|
|
*/ |
27
|
|
|
class PeachyAWBFunctions { |
28
|
|
|
|
29
|
|
|
public static $html_tags = array( |
30
|
|
|
# Tags that must be closed |
31
|
|
|
'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', |
32
|
|
|
'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', |
33
|
|
|
'strike', 'strong', 'tt', 'var', 'div', 'center', |
34
|
|
|
'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', |
35
|
|
|
'ruby', 'rt', 'rb', 'rp', 'p', 'span', 'u', 'abbr', |
36
|
|
|
# Single |
37
|
|
|
'br', 'hr', 'li', 'dt', 'dd', |
38
|
|
|
# Elements that cannot have close tags |
39
|
|
|
'br', 'hr', |
40
|
|
|
# Tags that can be nested--?? |
41
|
|
|
'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', |
42
|
|
|
'dl', 'font', 'big', 'small', 'sub', 'sup', 'span', |
43
|
|
|
# Can only appear inside table, we will close them |
44
|
|
|
'td', 'th', 'tr', |
45
|
|
|
# Tags used by list |
46
|
|
|
'ul', 'ol', |
47
|
|
|
# Tags that can appear in a list |
48
|
|
|
'li', |
49
|
|
|
## pairs |
50
|
|
|
# "b", "i", "u", "font", "big", "small", "sub", "sup", "h1", |
51
|
|
|
# "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s", "span", |
52
|
|
|
# "strike", "strong", "tt", "var", "div", "center", |
53
|
|
|
# "blockquote", "ol", "ul", "dl", "table", "caption", "pre", |
54
|
|
|
# "ruby", "rt" , "rb" , "rp", |
55
|
|
|
## single |
56
|
|
|
# "br", "p", "hr", "li", "dt", "dd", |
57
|
|
|
## nest |
58
|
|
|
# "table", "tr", "td", "th", "div", "blockquote", "ol", "ul", |
59
|
|
|
# "dl", "font", "big", "small", "sub", "sup", |
60
|
|
|
## table tags |
61
|
|
|
# "td", "th", "tr", |
62
|
|
|
|
63
|
|
|
); |
64
|
|
|
|
65
|
|
|
public static $html_attrs = array( |
66
|
|
|
"title", "align", "lang", "dir", "width", "height", |
67
|
|
|
"bgcolor", "clear", "noshade", |
68
|
|
|
"cite", "size", "face", "color", |
69
|
|
|
"type", "start", "value", "compact", |
70
|
|
|
#/* For various lists, mostly deprecated but safe */ |
71
|
|
|
"summary", "width", "border", "frame", "rules", |
72
|
|
|
"cellspacing", "cellpadding", "valign", "char", |
73
|
|
|
"charoff", "colgroup", "col", "span", "abbr", "axis", |
74
|
|
|
"headers", "scope", "rowspan", "colspan", |
75
|
|
|
"id", "class", "name", "style" |
76
|
|
|
); |
77
|
|
|
|
78
|
|
|
public static $html_colors = array( |
79
|
|
|
'#F0FFFF' => 'azure', '#F5F5DC' => 'beige', '#FFE4C4' => 'bisque', '#000000' => 'black', '#0000FF' => 'blue', |
80
|
|
|
'#A52A2A' => 'brown', '#FF7F50' => 'coral', '#FFF8DC' => 'cornsilk', '#DC143C' => 'crimson', |
81
|
|
|
'#00FFFF' => 'cyan', '#00008B' => 'darkBlue', '#008B8B' => 'darkCyan', '#A9A9A9' => 'darkGray', |
82
|
|
|
'#8B0000' => 'darkRed', '#FF1493' => 'deepPink', '#696969' => 'dimGray', '#FF00FF' => 'fuchsia', |
83
|
|
|
'#FFD700' => 'gold', '#808080' => 'gray', '#008000' => 'green', '#F0FFF0' => 'honeyDew', '#FF69B4' => 'hotPink', |
84
|
|
|
'#4B0082' => 'indigo', '#FFFFF0' => 'ivory', '#F0E68C' => 'khaki', '#E6E6FA' => 'lavender', '#00FF00' => 'lime', |
85
|
|
|
'#FAF0E6' => 'linen', '#800000' => 'maroon', '#FFE4B5' => 'moccasin', '#000080' => 'navy', |
86
|
|
|
'#FDF5E6' => 'oldLace', '#808000' => 'olive', '#FFA500' => 'orange', '#DA70D6' => 'orchid', '#CD853F' => 'peru', |
87
|
|
|
'#FFC0CB' => 'pink', '#DDA0DD' => 'plum', '#800080' => 'purple', '#FF0000' => 'red', '#FA8072' => 'salmon', |
88
|
|
|
'#2E8B57' => 'seaGreen', '#FFF5EE' => 'seaShell', '#A0522D' => 'sienna', '#C0C0C0' => 'silver', |
89
|
|
|
'#87CEEB' => 'skyBlue', '#FFFAFA' => 'snow', '#D2B48C' => 'tan', '#008080' => 'teal', '#D8BFD8' => 'thistle', |
90
|
|
|
'#FF6347' => 'tomato', '#EE82EE' => 'violet', '#F5DEB3' => 'wheat', '#FFFFFF' => 'white', '#FFFF00' => 'yellow', |
91
|
|
|
); |
92
|
|
|
|
93
|
|
|
public static $stub_search = '[Ss]tub'; |
94
|
|
|
|
95
|
|
|
public static $interwiki_map = array(); |
96
|
|
|
|
97
|
|
|
public static $typo_list = array(); |
98
|
|
|
|
99
|
|
|
public static function fixVars( Wiki $wiki ) { |
100
|
|
|
$interwiki = $wiki->siteinfo( array( 'interwikimap' ) ); |
101
|
|
|
self::$interwiki_map = $interwiki['query']['interwikimap']; |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
public static function fixCitations( $text ) { |
105
|
|
|
|
106
|
|
|
//merge all variant of cite web |
107
|
|
|
$text = preg_replace( '/\{\{\s*(cite[_ \-]*(url|web|website)|Web[_ \-]*(citation|cite|reference|reference[_ ]4))(?=\s*\|)/i', '{{cite web', $text ); |
108
|
|
|
|
109
|
|
|
//Remove formatting on certian parameters |
110
|
|
|
$text = preg_replace( "/(\|\s*(?:agency|author|first|format|language|last|location|month|publisher|work|year)\s*=\s*)(''|'''|''''')((?:\[\[[^][|]+|\[\[|)[][\w\s,.~!`\"]+)(''+)(?=\s*\|[\w\s]+=|\s*\}\})/", '$1$3', $text ); |
111
|
|
|
|
112
|
|
|
//Unlink PDF in format parameters |
113
|
|
|
$text = preg_replace( '/(\|\s*format\s*=\s*)\[\[(adobe|portable|document|file|format|pdf|\.|\s|\(|\)|\|)+\]\]/i', '$1PDF', $text ); |
114
|
|
|
$text = preg_replace( '/(\|\s*format\s*=\s*)(\s*\.?(adobe|portable|document|file|format|pdf|\(|\)))+?(\s*[|}])/i', '$1PDF$4', $text ); |
115
|
|
|
|
116
|
|
|
//No |format=HTML says {{cite web/doc}} |
117
|
|
|
$text = preg_replace( '/(\{\{cite[^{}]+)\|\s*format\s*=\s*(\[\[[^][|]+\||\[\[|)(\]\]| |html?|world|wide|web)+\s*(?=\||\}\})/i', '$1', $text ); |
118
|
|
|
|
119
|
|
|
//Fix accessdate tags [[WP:AWB/FR#Fix accessdate tags]] |
120
|
|
|
$text = preg_replace( |
121
|
|
|
array( |
122
|
|
|
'/(\|\s*)a[ces]{3,8}date(\s*=\s*)(?=[^{|}]*20\d\d|\}\})/', |
123
|
|
|
'/accessdate(\s*=\s*)\[*(200\d)[/_\-](\d{2})[/_\-](\d{2})\]*/', |
124
|
|
|
'/(\|\s*)a[cs]*es*mou*nthday(\s*=\s*)/', |
125
|
|
|
'/(\|\s*)a[cs]*es*daymou*nth(\s*=\s*)/', |
126
|
|
|
'/(\|\s*)accessdate(\s*=\s*[0-3]?[0-9] +(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*)([^][<>}{]*accessyear[\s=]+20\d\d)/', |
127
|
|
|
'/(\|\s*)accessdate(\s*=\s*(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w* +[0-3]?[0-9])([^][<>}{]*accessyear[\s=]+20\d\d)/', |
128
|
|
|
'/(\|\s*)accessdaymonth(\s*=\s*)\s*([^{|}<>]+?)\s*(\|[^][<>}{]*accessyear[\s=]+)(20\d\d)/', |
129
|
|
|
'/(\|\s*)accessmonthday(\s*=\s*)\s*([^{|}<>]+?)\s*(\|[^][<>}{]*accessyear[\s=]+)(20\d\d)/', |
130
|
|
|
), |
131
|
|
|
array( |
132
|
|
|
'$1accessdate$2', |
133
|
|
|
'accessdate$1$2-$3-$4', |
134
|
|
|
'$1accessmonthday$2', |
135
|
|
|
'$1accessdaymonth$2', |
136
|
|
|
'$1accessdaymonth$2$3', |
137
|
|
|
'$1accessmonthday$2$3', |
138
|
|
|
'$1accessdate$2$3 $5', |
139
|
|
|
'$1accessdate$2$3, $5', |
140
|
|
|
), |
141
|
|
|
$text |
142
|
|
|
); |
143
|
|
|
|
144
|
|
|
//Fix improper dates |
145
|
|
|
$text = preg_replace( |
146
|
|
|
array( |
147
|
|
|
'/(\{\{cit[ea][^{}]+\|\s*date\s*=\s*\d{2}[/\-.]\d{2}[/\-.])([5-9]\d)(?=\s*[|}])/i', |
148
|
|
|
'/(\{\{cit[ea][^{}]+\|\s*date\s*=\s*)(0[1-9]|1[012])[/\-.](1[3-9]|2\d|3[01])[/\-.](19\d\d|20\d\d)(?=\s*[|}])/i', |
149
|
|
|
'/(\{\{cit[ea][^{}]+\|\s*date\s*=\s*)(1[3-9]|2\d|3[01])[/\-.](0[1-9]|1[012])[/\-.](19\d\d|20\d\d)(?=\s*[|}])/i', |
150
|
|
|
), |
151
|
|
|
array( |
152
|
|
|
'${1}19$2', |
153
|
|
|
'$1$4-$2-$3', |
154
|
|
|
'$1$4-$3-$2', |
155
|
|
|
), |
156
|
|
|
|
157
|
|
|
$text |
158
|
|
|
); |
159
|
|
|
|
160
|
|
|
//Fix URLS lacking http:// |
161
|
|
|
$text = preg_replace( '/(\|\s*url\s*=\s*)([0-9a-z.\-]+\.[a-z]{2,4}/[^][{|}:\s"]\s*[|}])/', '$1http://$2', $text ); |
162
|
|
|
|
163
|
|
|
//Fix {{citation|title=[url title]}} |
164
|
|
|
$text = preg_replace( '/(\{\{cit[ea][^{}]*?)(\s*\|\s*)(?:url|title)(\s*=\s*)\[([^][<>\s"]*) +([^]\n]+)\](?=[|}])/i', '$1$2url$3$4$2title$3$5', $text ); |
165
|
|
|
|
166
|
|
|
return $text; |
167
|
|
|
|
168
|
|
|
} |
169
|
|
|
|
170
|
|
|
public static function fixDateTags( $text ) { |
171
|
|
|
|
172
|
|
|
$text = preg_replace( '/\{\{\s*(?:template:)?\s*(?:wikify(?:-date)?|wfy|wiki)(\s*\|\s*section)?\s*\}\}/iS', "{{Wikify$1|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
173
|
|
|
$text = preg_replace( '/\{\{(template:)?(Clean( ?up)?|CU|Tidy)\}\}/iS', "{{Cleanup|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
174
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(Linkless|Orphan)\}\}/iS', "{{Orphan|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
175
|
|
|
$text = preg_replace( '/\{\{(template:)?(Unreferenced(sect)?|add references|cite[ -]sources?|cleanup-sources?|needs? references|no sources|no references?|not referenced|references|unref|unsourced)\}\}/iS', "{{Unreferenced|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
176
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(Uncategori[sz]ed|Uncat|Classify|Category needed|Catneeded|categori[zs]e|nocats?)\}\}/iS', "{{Uncategorized|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
177
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(Trivia2?|Too ?much ?trivia|Trivia section|Cleanup-trivia)\}\}/iS', "{{Trivia|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
178
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(deadend|DEP)\}\}/iS', "{{Deadend|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
179
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(copyedit|g(rammar )?check|copy-edit|cleanup-copyedit|cleanup-english)\}\}/iS', "{{Copyedit|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
180
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(sources|refimprove|not verified)\}\}/iS', "{{Refimprove|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
181
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(Expand)\}\}/iS', "{{Expand|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
182
|
2 |
|
//$text = preg_replace( '/\{\{(?:\s*[Tt]emplate:)?(\s*(?:[Cc]n|[Ff]act|[Pp]roveit|[Cc]iteneeded|[Uu]ncited|[Cc]itation needed)\s*(?:\|[^{}]+(?\<!\|\s*date\s*=[^{}]+))?)\}\}/iS', "{{$1|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
183
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(COI|Conflict of interest|Selfpromotion)\}\}/iS', "{{COI|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
184
|
2 |
|
$text = preg_replace( '/\{\{(template:)?(Intro( |-)?missing|Nointro(duction)?|Lead missing|No ?lead|Missingintro|Opening|No-intro|Leadsection|No lead section)\}\}/iS', "{{Intro missing|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
185
|
2 |
|
$text = preg_replace( '/\{\{(template:)?([Pp]rimary ?[Ss]ources?|[Rr]eliable ?sources)\}\}/iS', "{{Primary sources|date={{subst:CURRENTMONTHNAME}} {{subst:CURRENTYEAR}}}}", $text ); |
186
|
|
|
|
187
|
2 |
|
//Other template fixes |
188
|
2 |
|
$text = preg_replace( '/\{\{(?:Template:)?(Dab|Disamb|Disambiguation)\}\}/iS', "{{Disambig}}", $text ); |
189
|
2 |
|
$text = preg_replace( '/\{\{(?:Template:)?(Bio-dab|Hndisambig)/iS', "{{Hndis", $text ); |
190
|
|
|
$text = preg_replace( '/\{\{(?:Template:)?(Prettytable|Prettytable100)\}\}/iS', "{{subst:Prettytable}}", $text ); |
191
|
|
|
$text = preg_replace( '/\{\{(?:[Tt]emplate:)?((?:BASE)?PAGENAMEE?\}\}|[Ll]ived\||[Bb]io-cats\|)/iS', "{{subst:$1", $text ); |
192
|
2 |
|
$text = preg_replace( '/({{\s*[Aa]rticle ?issues\s*(?:\|[^{}]*|\|)\s*[Dd]o-attempt\s*=\s*)[^{}\|]+\|\s*att\s*=\s*([^{}\|]+)(?=\||}})/iS', "$1$2", $text ); |
193
|
2 |
|
$text = preg_replace( '/({{\s*[Aa]rticle ?issues\s*(?:\|[^{}]*|\|)\s*[Cc]opyedit\s*)for\s*=\s*[^{}\|]+\|\s*date(\s*=[^{}\|]+)(?=\||}})/iS', "$1$2", $text ); |
194
|
2 |
|
$text = preg_replace( '/\{\{[Aa]rticle ?issues(?:\s*\|\s*(?:section|article)\s*=\s*[Yy])?\s*\}\}/iS', "", $text ); |
195
|
2 |
|
$text = preg_replace( '/\{\{[Cc]ommons\|\s*[Cc]ategory:\s*([^{}]+?)\s*\}\}/iS', "{{Commons category|$1}}", $text ); |
196
|
2 |
|
$text = preg_replace( '/(?!{{[Cc]ite wikisource)(\{\{\s*(?:[Cc]it[ae]|[Aa]rticle ?issues)[^{}]*)\|\s*(\}\}|\|)/iS', "$1$2", $text ); |
197
|
2 |
|
$text = preg_replace( '/({{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)\s*=\s*([^\|}{]+?)\s*\|((?:[^{}]*?\|)?\s*)\2(\s*=\s*)\3(\s*(\||\}\}))/iS', "$1$4$2$5$3$6", $text ); |
198
|
2 |
|
$text = preg_replace( '/(\{\{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)(\s*=\s*[^\|}{]+(?:\|[^{}]+?)?)\|\s*\2\s*=\s*(\||\}\})/iS', "$1$2$3$4", $text ); |
199
|
2 |
|
$text = preg_replace( '/(\{\{\s*[Aa]rticle ?issues[^{}]*\|\s*)(\w+)\s*=\s*\|\s*((?:[^{}]+?\|)?\s*\2\s*=\s*[^\|}{\s])/iS', "$1$3", $text ); |
200
|
2 |
|
$text = preg_replace( '/{{\s*(?:[Cc]n|[Ff]act|[Pp]roveit|[Cc]iteneeded|[Uu]ncited)(?=\s*[\|}])/S', "{{Citation needed", $text ); |
201
|
2 |
|
|
202
|
2 |
|
return $text; |
203
|
2 |
|
} |
204
|
2 |
|
|
205
|
|
|
public static function fixHTML( $text ) { |
206
|
2 |
|
|
207
|
|
|
$text = preg_replace( '/(\n\{\| class="wikitable[^\n]+\n\|-[^\n]*)(bgcolor\W+CCC+|background\W+ccc+)(?=\W+\n!)/mi', '$1', $text ); |
208
|
|
|
|
209
|
|
|
$text = preg_replace( '/(\n([^<\n]|<(?!br[^>]*>))+\w+[^\w\s<>]*)<br[ /]*>(?=\n[*#:;]|\n?<div|\n?<blockquote)/mi', '$1', $text ); |
210
|
|
|
|
211
|
|
|
$text = preg_replace( |
212
|
|
|
array( |
213
|
|
|
'/(<br[^</>]*>)\n?</br>/mi', |
214
|
|
|
'/<[/]?br([^{/}<>]*?/?)>/mi', |
215
|
|
|
'/<br\s\S*clear\S*(all|both)\S*[\s/]*>/i', |
216
|
|
|
'/<br\s\S*clear\S*(left|right)\S*[\s/]*>/', |
217
|
|
|
), |
218
|
|
|
array( |
219
|
|
|
'$1', |
220
|
|
|
'<br$1>', |
221
|
|
|
'{{-}}', |
222
|
|
|
'{{clear$1}}' |
223
|
|
|
), |
224
|
|
|
$text |
225
|
|
|
); |
226
|
|
|
|
227
|
|
|
$text = preg_replace( '/(<font\b[^<>]*)> *\n?<font\b([^<>]*>)((?:[^<]|<(?!/?font))*?</font> *\n?)</font>/mi', '$1$2$3', $text ); |
228
|
|
|
|
229
|
|
|
$text = preg_replace( '/<font ([^<>]*)>\[\[([^[\]{|}]+)\|([^[\]\n]*?)\]\]</font>/mi', '[[$2|<font $1>$3</font>]]', $text ); |
230
|
|
|
|
231
|
|
|
$text = preg_replace( '/<font(( +style="[^"]+")+)>(?!\[\[)((?:[^<]|<(?!/?font))*?)(?<!\]\])</font>/mi', '<span$1>$3</span>', $text ); |
232
|
|
|
|
233
|
|
|
return $text; |
234
|
|
|
|
235
|
|
|
} |
236
|
|
|
|
237
|
|
|
public static function fixHyperlinking( $text ) { |
238
|
|
|
|
239
|
|
|
$text = preg_replace( '/(http:\/\/[^][<>\s"|])(&client=firefox-a|<=)(?=[][<>\s"|&])/', '$1', $text ); |
240
|
|
|
|
241
|
|
|
$text = str_replace( '[{{SERVER}}{{localurl:', '[{{fullurl:', $text ); |
242
|
|
|
|
243
|
|
|
$text = preg_replace( '/[(](?:see|) *(http:\/\/[^][<>"\s(|)]+[\w=\/&])\s?[)]/i', '<$1>', $text ); |
244
|
|
|
|
245
|
|
|
$text = preg_replace( '/\[\[(https?:\/\/[^\]\n]+?)\]\]/', '[$1]', $text ); |
246
|
|
|
$text = preg_replace( '/\[\[(https?:\/\/.+?)\]/', '[$1]', $text ); |
247
|
|
|
|
248
|
|
|
$text = preg_replace( '/\[\[(:?)Image:([^][{|}]+\.(pdf|midi?|ogg|ogv|xcf))(?=\||\]\])/i', '[[$1File:$2', $text ); |
249
|
|
|
|
250
|
|
|
$text = preg_replace( |
251
|
|
|
array( |
252
|
|
|
'/(http:\/* *){2,}(?=[a-z0-9:.\-]+\/)/i', |
253
|
|
|
"/(\[\w+:\/\/[^][<>\"\s]+?)''/i", |
254
|
|
|
'/\[\n*(\w+:\/\/[^][<>"\s]+ *(?:(?<= )[^\n\]<>]*?|))\n([^[\]<>{}\n=@\/]*?) *\n*\]/i', |
255
|
|
|
'/\[(\w+:\/\/[^][<>"\s]+) +([Cc]lick here|[Hh]ere|\W|→|[ -\/;-@]) *\]/i', |
256
|
|
|
), |
257
|
|
|
array( |
258
|
|
|
'http://', |
259
|
|
|
"$1 ''", |
260
|
|
|
'[$1 $2]', |
261
|
|
|
'$2 [$1]', |
262
|
|
|
), |
263
|
|
|
$text |
264
|
|
|
); |
265
|
|
|
|
266
|
|
|
$text = preg_replace( '/(\[\[(?:File|Image):[^][<>{|}]+)#(|filehistory|filelinks|file)(?=[\]|])/i', '$1', $text ); |
267
|
|
|
|
268
|
|
|
$text = preg_replace( '/\[http://(www\.toolserver\.org|toolserver\.org|tools\.wikimedia\.org|tools\.wikimedia\.de)/([^][<>"\s;?]*)\?? ([^]\n]+)\]/', '[[tools:$2|$3]]', $text ); |
269
|
|
|
|
270
|
|
|
return $text; |
271
|
|
|
|
272
|
|
|
} |
273
|
|
|
|
274
|
|
|
/** |
275
|
|
|
* @Fixme Method getWiki() not found. |
276
|
|
|
* |
277
|
|
|
* @see getWiki() |
278
|
|
|
* |
279
|
|
|
* @param string $text |
280
|
|
|
* @param string $title |
281
|
|
|
* @return mixed |
282
|
|
|
*/ |
283
|
|
|
public static function fixTypos( $text, $title ) { |
284
|
|
|
|
285
|
|
|
if( !count( self::$typo_list ) ) { |
286
|
|
|
global $script; |
287
|
|
|
|
288
|
|
|
$str = $script->getWiki()->initPage( 'Wikipedia:AutoWikiBrowser/Typos' )->get_text(); |
289
|
|
|
|
290
|
|
|
foreach( explode( "\n", $str ) as $line ){ |
291
|
|
|
if( substr( $line, 0, 5 ) == "<Typo" ) { |
292
|
|
|
|
293
|
|
|
preg_match( '/\<Typo word=\"(.*)\" find=\"(.*)\" replace=\"(.*)\" \/\>/', $line, $m ); |
294
|
|
|
|
295
|
|
|
if( !empty( $m[2] ) && !empty( $m[3] ) ) { |
296
|
|
|
self::$typo_list[] = array( 'word' => $m[1], 'find' => $m[2], 'replace' => $m[3] ); |
297
|
|
|
} |
298
|
|
|
//<Typo word="the first time" find="\b(T|t)he\s+(very\s+)?fr?ist\s+time\b" replace="$1he $2first time" /> |
299
|
|
|
} |
300
|
|
|
} |
301
|
|
|
|
302
|
|
|
} |
303
|
|
|
|
304
|
|
|
$run_times = array(); |
305
|
|
|
|
306
|
|
|
shuffle( self::$typo_list ); //So that if it quits randomly, it will give equal prejudice to each typo. |
307
|
|
|
|
308
|
|
|
if( !count( self::$typo_list ) || preg_match( '/133t|-ology|\\(sic\\)|\\[sic\\]|\\[\'\'sic\'\'\\]|\\{\\{sic\\}\\}|spellfixno/', $text ) ) return $text; |
309
|
|
|
|
310
|
|
|
foreach( self::$typo_list as $typo ){ |
311
|
|
|
//Skip typos in links |
312
|
|
|
$time = microtime( 1 ); |
313
|
|
|
|
314
|
|
|
if( @preg_match( '/' . $typo['find'] . '/S', $title ) ) continue; //Skip if matches title |
315
|
|
|
|
316
|
|
|
if( @preg_match( "/(\{\{|\[\[)[^\[\]\r\n\|\{\}]*?" . $typo['find'] . "[^\[\]\r\n\|\{\}]*?(\]\]|\}\})/S", $text ) ) continue; |
317
|
|
|
|
318
|
|
|
$text2 = @preg_replace( '/' . $typo['find'] . '/S', $typo['replace'], $text ); |
319
|
|
|
if( !is_null( $text2 ) ) $text = $text2; |
320
|
|
|
$run_times[$typo['word']] = number_format( microtime( 1 ) - $time, 2 ); |
321
|
|
|
} |
322
|
|
|
return $text; |
323
|
|
|
} |
324
|
|
|
} |
325
|
|
|
|