1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace SMW\Parser; |
4
|
|
|
|
5
|
|
|
use SMW\InTextAnnotationParser; |
6
|
|
|
|
7
|
|
|
/** |
8
|
|
|
* @license GNU GPL v2+ |
9
|
|
|
* @since 2.5 |
10
|
|
|
* |
11
|
|
|
* @author mwjames |
12
|
|
|
*/ |
13
|
|
|
class Obfuscator { |
14
|
|
|
|
15
|
|
|
/** |
16
|
|
|
* @since 2.5 |
17
|
|
|
* |
18
|
|
|
* @param string $text |
19
|
|
|
* @param InTextAnnotationParser $parser |
20
|
|
|
* |
21
|
|
|
* @return text |
22
|
|
|
*/ |
23
|
5 |
|
public static function obfuscateLinks( $text, InTextAnnotationParser $parser ) { |
24
|
|
|
|
25
|
|
|
// Use [ instead of [ to distinguish it from the MW's Sanitizer |
26
|
|
|
// who uses the same decode sequence and avoid issues when removing links |
27
|
|
|
// after obfuscation |
28
|
5 |
|
|
29
|
5 |
|
// Filter simple [ ... ] from [[ ... ]] links and ensure to find the correct |
30
|
5 |
|
// start and end in case of [[Foo::[[Bar]]]] or [[Foo::[http://example.org/foo]]] |
|
|
|
|
31
|
|
|
$text = str_replace( |
32
|
|
|
array( '[', ']', '[[', ']]]]', ']]]', ']]' ), |
33
|
|
|
array( '[', ']', '[[', ']]]]', ']]]', ']]' ), |
34
|
5 |
|
$text |
35
|
|
|
); |
36
|
|
|
|
37
|
|
|
// Deep nesting is NOT supported as in [[Foo::[[abc]] [[Bar::123[[abc]] ]] ]] |
|
|
|
|
38
|
|
|
return self::doObfuscate( $text, $parser ); |
39
|
|
|
} |
40
|
|
|
|
41
|
|
|
/** |
42
|
|
|
* @since 2.5 |
43
|
|
|
* |
44
|
222 |
|
* @param string $text |
45
|
222 |
|
* |
46
|
222 |
|
* @return text |
47
|
222 |
|
*/ |
48
|
|
|
public static function removeLinkObfuscation( $text ) { |
49
|
|
|
return str_replace( |
50
|
|
|
array( '[', ']', '|' ), |
51
|
|
|
array( '[', ']', '|' ), |
52
|
|
|
$text |
53
|
|
|
); |
54
|
|
|
} |
55
|
|
|
|
56
|
|
|
/** |
57
|
|
|
* @since 2.5 |
58
|
|
|
* |
59
|
8 |
|
* @param string $text |
60
|
8 |
|
* |
61
|
8 |
|
* @return text |
62
|
8 |
|
*/ |
63
|
|
|
public static function encodeLinks( $text ) { |
64
|
|
|
return str_replace( |
65
|
|
|
array( '[', ']', '|' ), |
66
|
|
|
array( '[', ']', '|' ), |
67
|
|
|
$text |
68
|
|
|
); |
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
/** |
72
|
|
|
* @since 2.5 |
73
|
|
|
* |
74
|
39 |
|
* @param string $text |
75
|
39 |
|
* |
76
|
|
|
* @return text |
77
|
|
|
*/ |
78
|
|
|
public static function decodeSquareBracket( $text ) { |
79
|
|
|
return str_replace( array( '%5B', '%5D' ), array( '[', ']' ), $text ); |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
/** |
83
|
|
|
* @since 2.5 |
84
|
|
|
* |
85
|
9 |
|
* @param string $text |
86
|
9 |
|
* |
87
|
9 |
|
* @return text |
88
|
9 |
|
*/ |
89
|
7 |
|
public static function obfuscateAnnotation( $text ) { |
90
|
9 |
|
return preg_replace_callback( |
91
|
9 |
|
LinksProcessor::getRegexpPattern( false ), |
92
|
|
|
function( array $matches ) { |
93
|
|
|
return str_replace( '[', '[', $matches[0] ); |
94
|
|
|
}, |
95
|
|
|
self::decodeSquareBracket( $text ) |
96
|
|
|
); |
97
|
|
|
} |
98
|
|
|
|
99
|
|
|
/** |
100
|
|
|
* @since 2.5 |
101
|
|
|
* |
102
|
39 |
|
* @param string $text |
103
|
39 |
|
* |
104
|
39 |
|
* @return text |
105
|
39 |
|
*/ |
106
|
39 |
|
public static function removeAnnotation( $text ) { |
107
|
|
|
|
108
|
|
|
if ( strpos( $text, '::' ) === false && strpos( $text, ':=' ) === false ) { |
109
|
|
|
return $text; |
110
|
10 |
|
} |
111
|
|
|
|
112
|
10 |
|
return preg_replace_callback( |
113
|
10 |
|
LinksProcessor::getRegexpPattern( false ), |
114
|
|
|
'self::doRemoveAnnotation', |
115
|
|
|
self::decodeSquareBracket( $text ) |
116
|
10 |
|
); |
117
|
2 |
|
} |
118
|
|
|
|
119
|
|
|
private static function doRemoveAnnotation( array $matches ) { |
|
|
|
|
120
|
|
|
|
121
|
9 |
|
$caption = false; |
122
|
9 |
|
$value = ''; |
123
|
4 |
|
|
124
|
|
|
// #1453 |
125
|
|
|
if ( $matches[0] === InTextAnnotationParser::OFF || $matches[0] === InTextAnnotationParser::ON ) { |
126
|
|
|
return false; |
127
|
9 |
|
} |
128
|
|
|
|
129
|
|
|
// Strict mode matching |
130
|
9 |
|
if ( array_key_exists( 1, $matches ) ) { |
131
|
2 |
|
if ( strpos( $matches[1], ':' ) !== false && isset( $matches[2] ) ) { |
132
|
|
|
list( $matches[1], $matches[2] ) = explode( '::', $matches[1] . '::' . $matches[2], 2 ); |
133
|
|
|
} |
134
|
7 |
|
} |
135
|
7 |
|
|
136
|
7 |
|
if ( array_key_exists( 2, $matches ) ) { |
137
|
|
|
|
138
|
|
|
// #1747 |
139
|
|
|
if ( strpos( $matches[1], '|' ) !== false ) { |
140
|
7 |
|
return $matches[0]; |
141
|
1 |
|
} |
142
|
|
|
|
143
|
|
|
$parts = explode( '|', $matches[2] ); |
144
|
7 |
|
$value = array_key_exists( 0, $parts ) ? $parts[0] : ''; |
145
|
|
|
$caption = array_key_exists( 1, $parts ) ? $parts[1] : false; |
146
|
|
|
} |
147
|
5 |
|
|
148
|
|
|
// #1855 |
149
|
|
|
if ( $value === '@@@' ) { |
150
|
5 |
|
$value = ''; |
151
|
5 |
|
} |
152
|
|
|
|
153
|
5 |
|
return $caption !== false ? $caption : $value; |
154
|
|
|
} |
155
|
|
|
|
156
|
4 |
|
private static function doObfuscate( $text, $parser ) { |
157
|
1 |
|
|
158
|
|
|
/** |
159
|
|
|
* @see http://blog.angeloff.name/post/2012/08/05/php-recursive-patterns/ |
160
|
|
|
* |
161
|
|
|
* \[{2} # find the first opening '[['. |
162
|
|
|
* (?: # start a new group, this is so '|' below does not apply/affect the opening '['. |
163
|
4 |
|
* [^\[\]]+ # skip ahead happily if no '[' or ']'. |
164
|
4 |
|
* | # ...otherwise... |
165
|
|
|
* (?R) # we may be at the start of a new group, repeat whole pattern. |
166
|
|
|
* ) |
167
|
4 |
|
* * # nesting can be many levels deep. |
168
|
4 |
|
* \]{2} # finally, expect a balanced closing ']]' |
169
|
4 |
|
*/ |
170
|
|
|
preg_match_all("/\[{2}(?:[^\[\]]+|(?R))*\]{2}/is", $text, $matches ); |
171
|
4 |
|
$isOffAnnotation = false; |
172
|
|
|
|
173
|
|
|
// At this point we distinguish between a normal [[Foo::bar]] annotation |
174
|
3 |
|
// and a compound construct such as [[Foo::[[Foobar::Bar]] ]] and |
|
|
|
|
175
|
2 |
|
// [[Foo::[http://example.org/foo foo] [[Foo::123|Bar]] ]]. |
|
|
|
|
176
|
|
|
// |
177
|
|
|
// Only the compound is being processed and matched as we require to |
178
|
2 |
|
// identify the boundaries of the enclosing annotation |
179
|
2 |
|
foreach ( $matches[0] as $match ) { |
180
|
2 |
|
|
181
|
1 |
|
// Normal link |
182
|
|
|
if ( strpos( $match, '[[:' ) !== false ) { |
183
|
|
|
continue; |
184
|
1 |
|
} |
185
|
1 |
|
|
186
|
1 |
|
// Remember whether the text contains OFF/ON marker (added by |
187
|
|
|
// recursive parser, template, embedded result printer) |
188
|
|
|
if ( $isOffAnnotation === false ) { |
189
|
1 |
|
$isOffAnnotation = $match === InTextAnnotationParser::OFF; |
190
|
1 |
|
} |
191
|
|
|
|
192
|
|
|
$annotationOpenNum = substr_count( $match, '[[' ); |
193
|
4 |
|
|
194
|
|
|
// Only engage if the match contains more than one [[ :: ]] pair |
195
|
|
|
if ( $annotationOpenNum > 1 ) { |
196
|
|
|
$replace = self::doMatchAndReplace( $match, $parser, $isOffAnnotation ); |
197
|
|
|
$text = str_replace( $match, $replace, $text ); |
198
|
5 |
|
} |
199
|
1 |
|
} |
200
|
|
|
|
201
|
|
|
return $text; |
202
|
5 |
|
} |
203
|
|
|
|
204
|
|
|
private static function doMatchAndReplace( $match, $parser, $isOffAnnotation = false ) { |
205
|
|
|
|
206
|
|
|
// Remove the Leading and last square bracket to avoid distortion |
207
|
|
|
// during the annotation parsing |
208
|
|
|
$match = substr( substr( $match, 2 ), 0, -2 ); |
209
|
|
|
|
210
|
|
|
// Restore OFF/ON for the recursive processing |
211
|
|
|
if ( $isOffAnnotation === true ) { |
212
|
|
|
$match = InTextAnnotationParser::OFF . $match . InTextAnnotationParser::ON; |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
// Only match annotations of style [[...::...]] during a recursive |
216
|
|
|
// obfuscation process, any other processing is being done by the |
217
|
|
|
// InTextAnnotation parser hereafter |
218
|
|
|
// |
219
|
|
|
// [[Foo::Bar]] annotation therefore run a pattern match and |
220
|
|
|
// obfuscate the returning [[, |, ]] result |
|
|
|
|
221
|
|
|
$replace = self::encodeLinks( preg_replace_callback( |
222
|
|
|
LinksProcessor::getRegexpPattern( false ), |
223
|
|
|
array( $parser, 'preprocess' ), |
224
|
|
|
$match |
225
|
|
|
) ); |
226
|
|
|
|
227
|
|
|
// Restore the square brackets |
228
|
|
|
return '[[' . $replace . ']]'; |
229
|
|
|
} |
230
|
|
|
|
231
|
|
|
} |
232
|
|
|
|
Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.
The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.
This check looks for comments that seem to be mostly valid code and reports them.