Completed
Push — master ( 14d2bd...06e609 )
by mw
81:37 queued 59:24
created

includes/datavalues/SMW_DV_URI.php (3 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
use SMW\UrlEncoder;
4
5
/**
6
 * @ingroup SMWDataValues
7
 */
8
9
define( 'SMW_URI_MODE_EMAIL', 1 );
10
define( 'SMW_URI_MODE_URI', 3 );
11
define( 'SMW_URI_MODE_ANNOURI', 4 );
12
define( 'SMW_URI_MODE_TEL', 5 );
13
14
/**
15
 * This datavalue implements URL/URI/ANNURI/PHONE/EMAIL datavalues suitable for
16
 * defining the respective types of properties.
17
 *
18
 * @author Nikolas Iwan
19
 * @author Markus Krötzsch
20
 * @ingroup SMWDataValues
21
 * @bug Correctly create safe HTML and Wiki text.
22
 */
23
class SMWURIValue extends SMWDataValue {
24
25
	/**
26
	 * The value as returned by getWikitext() and getLongText().
27
	 * @var string
28
	 */
29
	protected $m_wikitext;
30
	/**
31
	 * One of the basic modes of operation for this class (emails, URL,
32
	 * telephone number URI, ...).
33
	 * @var integer
34
	 */
35
	private $m_mode;
36
37
	/**
38
	 * @var boolean
39
	 */
40
	private $showUrlContextInRawFormat = true;
41
42 51
	public function __construct( $typeid ) {
43 51
		parent::__construct( $typeid );
44
		switch ( $typeid ) {
45 51
			case '_ema':
46 7
				$this->m_mode = SMW_URI_MODE_EMAIL;
47 7
			break;
48 50
			case '_anu':
49 23
				$this->m_mode = SMW_URI_MODE_ANNOURI;
50 23
			break;
51 33
			case '_tel':
52 2
				$this->m_mode = SMW_URI_MODE_TEL;
53 2
			break;
54 31
			case '__spu':
55 30
			case '_uri':
56
			case '_url':
57
			default:
58 31
				$this->m_mode = SMW_URI_MODE_URI;
59 31
			break;
60
		}
61 51
	}
62
63 50
	protected function parseUserValue( $value ) {
64 50
		$value = trim( $value );
65 50
		$this->m_wikitext = $value;
66 50
		if ( $this->m_caption === false ) {
67 33
			$this->m_caption = $this->m_wikitext;
68
		}
69
70 50
		$scheme = $hierpart = $query = $fragment = '';
71 50
		if ( $value === '' ) { // do not accept empty strings
72
			$this->addError( wfMessage( 'smw_emptystring' )->inContentLanguage()->text() );
73
			return;
74
		}
75
76 50
		switch ( $this->m_mode ) {
77 50
			case SMW_URI_MODE_URI:
78 26
			case SMW_URI_MODE_ANNOURI:
79
80
				// Whether the the url value was externally encoded or not
81 47
				if ( strpos( $value, "%" ) === false ) {
82 22
					$this->showUrlContextInRawFormat = false;
83
				}
84
85
				// If somehow the slash was encoded bring into one format
86 47
				$value = str_replace( "%2F", "/", $value );
87
88 47
				$parts = explode( ':', $value, 2 ); // try to split "schema:rest"
89 47
				if ( count( $parts ) == 1 ) { // possibly add "http" as default
90 1
					$value = 'http://' . $value;
91 1
					$parts[1] = $parts[0];
92 1
					$parts[0] = 'http';
93
				}
94
				// check against blacklist
95 47
				$uri_blacklist = explode( "\n", wfMessage( 'smw_uri_blacklist' )->inContentLanguage()->text() );
96 47
				foreach ( $uri_blacklist as $uri ) {
97 47
					$uri = trim( $uri );
98 47
					if ( $uri !== '' && $uri == mb_substr( $value, 0, mb_strlen( $uri ) ) ) { // disallowed URI!
99
						$this->addError( wfMessage( 'smw_baduri', $value )->inContentLanguage()->text() );
100 47
						return;
101
					}
102
				}
103
				// decompose general URI components
104 47
				$scheme = $parts[0];
105 47
				$parts = explode( '?', $parts[1], 2 ); // try to split "hier-part?queryfrag"
106 47
				if ( count( $parts ) == 2 ) {
107 10
					$hierpart = $parts[0];
108 10
					$parts = explode( '#', $parts[1], 2 ); // try to split "query#frag"
109 10
					$query = $parts[0];
110 10
					$fragment = ( count( $parts ) == 2 ) ? $parts[1] : '';
111
				} else {
112 38
					$query = '';
113 38
					$parts = explode( '#', $parts[0], 2 ); // try to split "hier-part#frag"
114 38
					$hierpart = $parts[0];
115 38
					$fragment = ( count( $parts ) == 2 ) ? $parts[1] : '';
116
				}
117
				// We do not validate the URI characters (the data item will do this) but we do some escaping:
118
				// encode most characters, but leave special symbols as given by user:
119 47
				$hierpart = str_replace( array( '%3A', '%2F', '%23', '%40', '%3F', '%3D', '%26', '%25' ), array( ':', '/', '#', '@', '?', '=', '&', '%' ), rawurlencode( $hierpart ) );
120 47
				$query = str_replace( array( '%3A', '%2F', '%23', '%40', '%3F', '%3D', '%26', '%25' ), array( ':', '/', '#', '@', '?', '=', '&', '%' ), rawurlencode( $query ) );
121 47
				$fragment = str_replace( array( '%3A', '%2F', '%23', '%40', '%3F', '%3D', '%26', '%25' ), array( ':', '/', '#', '@', '?', '=', '&', '%' ), rawurlencode( $fragment ) );
122
				/// NOTE: we do not support raw [ (%5D) and ] (%5E), although they are needed for ldap:// (but rarely in a wiki)
123
				/// NOTE: "+" gets encoded, as it is interpreted as space by most browsers when part of a URL;
124
				///       this prevents tel: from working directly, but we have a datatype for this anyway.
125
126 47
				if ( substr( $hierpart, 0, 2 ) === '//' ) {
127 47
					$hierpart = substr( $hierpart, 2 );
128
				}
129
130 47
				break;
131 8
			case SMW_URI_MODE_TEL:
132 2
				$scheme = 'tel';
133
134 2
				if ( substr( $value, 0, 4 ) === 'tel:' ) { // accept optional "tel"
135 1
					$value = substr( $value, 4 );
136 1
					$this->m_wikitext = $value;
137
				}
138
139 2
				$hierpart = preg_replace( '/(?<=[0-9]) (?=[0-9])/', '\1-\2', $value );
140 2
				$hierpart = str_replace( ' ', '', $hierpart );
141 2
				if ( substr( $hierpart, 0, 2 ) == '00' ) {
142
					$hierpart = '+' . substr( $hierpart, 2 );
143
				}
144
145 2
				if ( !$this->getOptionValueFor( 'description.processor' ) && ( ( strlen( preg_replace( '/[^0-9]/', '', $hierpart ) ) < 6 ) ||
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->getOptionValueFor('description.processor') of type string|false is loosely compared to false; this is ambiguous if the string can be empty. You might want to explicitly use === false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
146 2
					( preg_match( '<[-+./][-./]>', $hierpart ) ) ||
147 2
					( !self::isValidTelURI( 'tel:' . $hierpart ) ) ) ) { /// TODO: introduce error-message for "bad" phone number
148
					$this->addError( wfMessage( 'smw_baduri', $this->m_wikitext )->inContentLanguage()->text() );
149
					return;
150
				}
151 2
				break;
152 7
			case SMW_URI_MODE_EMAIL:
153 7
				$scheme = 'mailto';
154 7
				if ( strpos( $value, 'mailto:' ) === 0 ) { // accept optional "mailto"
155 1
					$value = substr( $value, 7 );
156 1
					$this->m_wikitext = $value;
157
				}
158
159 7
				if ( !$this->getOptionValueFor( 'description.processor' ) && !Sanitizer::validateEmail( $value ) ) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->getOptionValueFor('description.processor') of type string|false is loosely compared to false; this is ambiguous if the string can be empty. You might want to explicitly use === false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
160
					/// TODO: introduce error-message for "bad" email
161
					$this->addError( wfMessage( 'smw_baduri', $value )->inContentLanguage()->text() );
162
					return;
163
				}
164 7
				$hierpart = str_replace( array( '%3A', '%2F', '%23', '%40', '%3F', '%3D', '%26', '%25' ), array( ':', '/', '#', '@', '?', '=', '&', '%' ), rawurlencode( $value ) );
165
		}
166
167
		// Now create the URI data item:
168
		try {
169 50
			$this->m_dataitem = new SMWDIUri( $scheme, $hierpart, $query, $fragment, $this->m_typeid );
170
		} catch ( SMWDataItemException $e ) {
171
			$this->addError( wfMessage( 'smw_baduri', $this->m_wikitext )->inContentLanguage()->text() );
172
		}
173 50
	}
174
175
	/**
176
	 * Returns true if the argument is a valid RFC 3966 phone number.
177
	 * Only global phone numbers are supported, and no full validation
178
	 * of parameters (appended via ;param=value) is performed.
179
	 */
180 2
	protected static function isValidTelURI( $s ) {
181 2
		$tel_uri_regex = '<^tel:\+[0-9./-]*[0-9][0-9./-]*(;[0-9a-zA-Z-]+=(%[0-9a-zA-Z][0-9a-zA-Z]|[0-9a-zA-Z._~:/?#[\]@!$&\'()*+,;=-])*)*$>';
182 2
		return (bool) preg_match( $tel_uri_regex, $s );
183
	}
184
185
	/**
186
	 * @see SMWDataValue::loadDataItem()
187
	 * @param $dataitem SMWDataItem
188
	 * @return boolean
189
	 */
190 5
	protected function loadDataItem( SMWDataItem $dataItem ) {
191
192 5
		if ( $dataItem->getDIType() !== SMWDataItem::TYPE_URI ) {
193
			return false;
194
		}
195
196 5
		$this->m_dataitem = $dataItem;
197 5
		if ( $this->m_mode == SMW_URI_MODE_EMAIL ) {
198 1
			$this->m_wikitext = substr( $dataItem->getURI(), 7 );
199 5
		} elseif ( $this->m_mode == SMW_URI_MODE_TEL ) {
200 1
			$this->m_wikitext = substr( $dataItem->getURI(), 4 );
201
		} else {
202 4
			$this->m_wikitext = $dataItem->getURI();
203
		}
204
205 5
		$this->m_caption = $this->m_wikitext;
206 5
		$this->showUrlContextInRawFormat = false;
207
208 5
		return true;
209
	}
210
211 50
	public function getShortWikiText( $linked = null ) {
212
213 50
		list( $url, $caption ) = $this->decodeUriContext( $this->m_caption );
214
215 50
		if ( is_null( $linked ) || ( $linked === false ) || ( $url === '' ) ||
216 50
			( $this->m_outformat == '-' ) || ( $this->m_caption === '' ) ) {
217 20
			return $caption;
218 30
		} elseif ( $this->m_outformat == 'nowiki' ) {
219
			return $this->makeNonlinkedWikiText( $caption );
220
		} else {
221 30
			return '[' . $url . ' ' . $caption . ']';
222
		}
223
	}
224
225 36
	public function getShortHTMLText( $linker = null ) {
226
227 36
		list( $url, $caption ) = $this->decodeUriContext( $this->m_caption );
228
229 36
		if ( is_null( $linker ) || ( !$this->isValid() ) || ( $url === '' ) ||
230 16
			( $this->m_outformat == '-' ) || ( $this->m_outformat == 'nowiki' ) ||
231 36
			( $this->m_caption === '' ) ) {
232 20
			return $caption;
233
		} else {
234 16
			return $linker->makeExternalLink( $url, $caption );
235
		}
236
	}
237
238 36
	public function getLongWikiText( $linked = null ) {
239
240 36
		if ( !$this->isValid() ) {
241
			return $this->getErrorText();
242
		}
243
244 36
		list( $url, $wikitext ) = $this->decodeUriContext( $this->m_wikitext );
245
246 36
		if ( is_null( $linked ) || ( $linked === false ) || ( $url === '' ) ||
247 36
			( $this->m_outformat == '-' ) ) {
248 20
			return $wikitext;
249 16
		} elseif ( $this->m_outformat == 'nowiki' ) {
250
			return $this->makeNonlinkedWikiText( $wikitext );
251
		} else {
252 16
			return '[' . $url . ' ' . $wikitext . ']';
253
		}
254
	}
255
256 36
	public function getLongHTMLText( $linker = null ) {
257
258 36
		if ( !$this->isValid() ) {
259
			return $this->getErrorText();
260
		}
261
262 36
		list( $url, $wikitext ) = $this->decodeUriContext( $this->m_wikitext );
263
264 36
		if ( is_null( $linker ) || ( !$this->isValid() ) || ( $url === '' ) ||
265 36
			( $this->m_outformat == '-' ) || ( $this->m_outformat == 'nowiki' ) ) {
266 20
			return $wikitext;
267
		} else {
268 16
			return $linker->makeExternalLink( $url, $wikitext );
269
		}
270
	}
271
272 42
	public function getWikiValue() {
273 42
		return $this->m_wikitext;
274
	}
275
276
	public function getURI() {
277
		return $this->getUriDataitem()->getURI();
278
	}
279
280
	protected function getServiceLinkParams() {
281
		// Create links to mapping services based on a wiki-editable message. The parameters
282
		// available to the message are:
283
		// $1: urlencoded version of URI/URL value (includes mailto: for emails)
284
		return array( rawurlencode( $this->getUriDataitem()->getURI() ) );
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array(rawurlencod...Dataitem()->getURI())); (string[]) is incompatible with the return type of the parent method SMWDataValue::getServiceLinkParams of type boolean.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
285
	}
286
287
	/**
288
	 * Get a URL for hyperlinking this URI, or the empty string if this URI
289
	 * is not hyperlinked in MediaWiki.
290
	 * @return string
291
	 */
292 50
	public function getURL() {
293 50
		global $wgUrlProtocols;
294
295 50
		foreach ( $wgUrlProtocols as $prot ) {
296 50
			if ( ( $prot == $this->getUriDataitem()->getScheme() . ':' ) || ( $prot == $this->getUriDataitem()->getScheme() . '://' ) ) {
297 50
				return $this->getUriDataitem()->getURI();
298
			}
299
		}
300
301
		return '';
302
	}
303
304
	/**
305
	 * Helper function to get the current dataitem, or some dummy URI
306
	 * dataitem if the dataitem was not set. This makes it easier to
307
	 * write code that avoids errors even if the data was not
308
	 * initialized properly.
309
	 * @return SMWDIUri
310
	 */
311 50
	protected function getUriDataitem() {
312 50
		if ( isset( $this->m_dataitem ) ) {
313 50
			return $this->m_dataitem;
314
		} else { // note: use "noprotocol" to avoid accidental use in an MW link, see getURL()
315
			return new SMWDIUri( 'noprotocol', 'x', '', '', $this->m_typeid );
316
		}
317
	}
318
319
	/**
320
	 * Helper function that changes a URL string in such a way that it
321
	 * can be used in wikitext without being turned into a hyperlink,
322
	 * while still displaying the same characters. The use of
323
	 * &lt;nowiki&gt; is avoided, since the resulting strings may be
324
	 * inserted during parsing, after this has been stripped.
325
	 *
326
	 * @since 1.8
327
	 */
328
	protected function makeNonlinkedWikiText( $url ) {
329
		return str_replace( ':', '&#58;', $url );
330
	}
331
332 50
	private function decodeUriContext( $context ) {
333
334
		// Prior to decoding turn any `-` into an internal representation to avoid
335
		// potential breakage
336 50
		if ( !$this->showUrlContextInRawFormat ) {
337 22
			$context = UrlEncoder::decode( str_replace( '-', '-2D', $context ) );
338
		}
339
340 50
		return array( $this->getURL(), $context );
341
	}
342
343
}
344