Completed
Push — master ( d2d28e...1c2760 )
by mw
35:37
created

includes/datavalues/SMW_DV_URI.php (2 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
use SMW\UrlEncoder;
4
use SMW\Message;
5
6
/**
7
 * @ingroup SMWDataValues
8
 */
9
10
define( 'SMW_URI_MODE_EMAIL', 1 );
11
define( 'SMW_URI_MODE_URI', 3 );
12
define( 'SMW_URI_MODE_ANNOURI', 4 );
13
define( 'SMW_URI_MODE_TEL', 5 );
14
15
/**
16
 * This datavalue implements URL/URI/ANNURI/PHONE/EMAIL datavalues suitable for
17
 * defining the respective types of properties.
18
 *
19
 * @author Nikolas Iwan
20
 * @author Markus Krötzsch
21
 * @ingroup SMWDataValues
22
 * @bug Correctly create safe HTML and Wiki text.
23
 */
24
class SMWURIValue extends SMWDataValue {
25
26
	/**
27
	 * The value as returned by getWikitext() and getLongText().
28
	 * @var string
29
	 */
30
	protected $m_wikitext;
31
	/**
32
	 * One of the basic modes of operation for this class (emails, URL,
33
	 * telephone number URI, ...).
34
	 * @var integer
35
	 */
36
	private $m_mode;
37
38
	/**
39
	 * @var boolean
40
	 */
41
	private $showUrlContextInRawFormat = true;
42
43 56
	public function __construct( $typeid ) {
44 56
		parent::__construct( $typeid );
45
		switch ( $typeid ) {
46 56
			case '_ema':
47 7
				$this->m_mode = SMW_URI_MODE_EMAIL;
48 7
			break;
49 55
			case '_anu':
50 23
				$this->m_mode = SMW_URI_MODE_ANNOURI;
51 23
			break;
52 38
			case '_tel':
53 3
				$this->m_mode = SMW_URI_MODE_TEL;
54 3
			break;
55 35
			case '__spu':
56 34
			case '_uri':
57 1
			case '_url':
58
			default:
59 35
				$this->m_mode = SMW_URI_MODE_URI;
60 35
			break;
61
		}
62 56
	}
63
64 55
	protected function parseUserValue( $value ) {
65 55
		$value = trim( $value );
66 55
		$this->m_wikitext = $value;
67 55
		if ( $this->m_caption === false ) {
68 38
			$this->m_caption = $this->m_wikitext;
69
		}
70
71 55
		$scheme = $hierpart = $query = $fragment = '';
72 55
		if ( $value === '' ) { // do not accept empty strings
73
			$this->addErrorMsg( array( 'smw_emptystring' ) );
74
			return;
75
		}
76
77 55
		switch ( $this->m_mode ) {
78 55
			case SMW_URI_MODE_URI:
79 27
			case SMW_URI_MODE_ANNOURI:
80
81
				// Whether the the url value was externally encoded or not
82 51
				if ( strpos( $value, "%" ) === false ) {
83 26
					$this->showUrlContextInRawFormat = false;
84
				}
85
86
				// If somehow the slash was encoded bring into one format
87 51
				$value = str_replace( "%2F", "/", $value );
88
89 51
				$parts = explode( ':', $value, 2 ); // try to split "schema:rest"
90 51
				if ( count( $parts ) == 1 ) { // possibly add "http" as default
91 1
					$value = 'http://' . $value;
92 1
					$parts[1] = $parts[0];
93 1
					$parts[0] = 'http';
94
				}
95
				// check against blacklist
96 51
				$uri_blacklist = explode( "\n", Message::get( 'smw_uri_blacklist', Message::TEXT, Message::CONTENT_LANGUAGE ) );
97 51
				foreach ( $uri_blacklist as $uri ) {
98 51
					$uri = trim( $uri );
99 51
					if ( $uri !== '' && $uri == mb_substr( $value, 0, mb_strlen( $uri ) ) ) { // disallowed URI!
100
						$this->addErrorMsg( array( 'smw_baduri', $value ) );
101 51
						return;
102
					}
103
				}
104
				// decompose general URI components
105 51
				$scheme = $parts[0];
106 51
				$parts = explode( '?', $parts[1], 2 ); // try to split "hier-part?queryfrag"
107 51
				if ( count( $parts ) == 2 ) {
108 10
					$hierpart = $parts[0];
109 10
					$parts = explode( '#', $parts[1], 2 ); // try to split "query#frag"
110 10
					$query = $parts[0];
111 10
					$fragment = ( count( $parts ) == 2 ) ? $parts[1] : '';
112
				} else {
113 42
					$query = '';
114 42
					$parts = explode( '#', $parts[0], 2 ); // try to split "hier-part#frag"
115 42
					$hierpart = $parts[0];
116 42
					$fragment = ( count( $parts ) == 2 ) ? $parts[1] : '';
117
				}
118
				// We do not validate the URI characters (the data item will do this) but we do some escaping:
119
				// encode most characters, but leave special symbols as given by user:
120 51
				$hierpart = str_replace( array( '%3A', '%2F', '%23', '%40', '%3F', '%3D', '%26', '%25' ), array( ':', '/', '#', '@', '?', '=', '&', '%' ), rawurlencode( $hierpart ) );
121 51
				$query = str_replace( array( '%3A', '%2F', '%23', '%40', '%3F', '%3D', '%26', '%25' ), array( ':', '/', '#', '@', '?', '=', '&', '%' ), rawurlencode( $query ) );
122 51
				$fragment = str_replace( array( '%3A', '%2F', '%23', '%40', '%3F', '%3D', '%26', '%25' ), array( ':', '/', '#', '@', '?', '=', '&', '%' ), rawurlencode( $fragment ) );
123
				/// NOTE: we do not support raw [ (%5D) and ] (%5E), although they are needed for ldap:// (but rarely in a wiki)
124
				/// NOTE: "+" gets encoded, as it is interpreted as space by most browsers when part of a URL;
125
				///       this prevents tel: from working directly, but we have a datatype for this anyway.
126
127 51
				if ( substr( $hierpart, 0, 2 ) === '//' ) {
128 51
					$hierpart = substr( $hierpart, 2 );
129
				}
130
131 51
				break;
132 9
			case SMW_URI_MODE_TEL:
133 3
				$scheme = 'tel';
134
135 3
				if ( substr( $value, 0, 4 ) === 'tel:' ) { // accept optional "tel"
136 1
					$value = substr( $value, 4 );
137 1
					$this->m_wikitext = $value;
138
				}
139
140 3
				$hierpart = preg_replace( '/(?<=[0-9]) (?=[0-9])/', '\1-\2', $value );
141 3
				$hierpart = str_replace( ' ', '', $hierpart );
142 3
				if ( substr( $hierpart, 0, 2 ) == '00' ) {
143
					$hierpart = '+' . substr( $hierpart, 2 );
144
				}
145
146 3
				if ( !$this->getOptionBy( self::OPT_QUERY_CONTEXT ) && ( ( strlen( preg_replace( '/[^0-9]/', '', $hierpart ) ) < 6 ) ||
147 3
					( preg_match( '<[-+./][-./]>', $hierpart ) ) ||
148 3
					( !self::isValidTelURI( 'tel:' . $hierpart ) ) ) ) { /// TODO: introduce error-message for "bad" phone number
149
					$this->addErrorMsg( array( 'smw_baduri', $this->m_wikitext ) );
150
					return;
151
				}
152 3
				break;
153 7
			case SMW_URI_MODE_EMAIL:
154 7
				$scheme = 'mailto';
155 7
				if ( strpos( $value, 'mailto:' ) === 0 ) { // accept optional "mailto"
156 1
					$value = substr( $value, 7 );
157 1
					$this->m_wikitext = $value;
158
				}
159
160 7
				if ( !$this->getOptionBy( self::OPT_QUERY_CONTEXT ) && !Sanitizer::validateEmail( $value ) ) {
161
					/// TODO: introduce error-message for "bad" email
162
					$this->addErrorMsg( array( 'smw_baduri', $value ) );
163
					return;
164
				}
165 7
				$hierpart = str_replace( array( '%3A', '%2F', '%23', '%40', '%3F', '%3D', '%26', '%25' ), array( ':', '/', '#', '@', '?', '=', '&', '%' ), rawurlencode( $value ) );
166
		}
167
168
		// Now create the URI data item:
169
		try {
170 55
			$this->m_dataitem = new SMWDIUri( $scheme, $hierpart, $query, $fragment, $this->m_typeid );
171
		} catch ( SMWDataItemException $e ) {
172
			$this->addErrorMsg( array( 'smw_baduri', $this->m_wikitext ) );
173
		}
174 55
	}
175
176
	/**
177
	 * Returns true if the argument is a valid RFC 3966 phone number.
178
	 * Only global phone numbers are supported, and no full validation
179
	 * of parameters (appended via ;param=value) is performed.
180
	 */
181 3
	protected static function isValidTelURI( $s ) {
182 3
		$tel_uri_regex = '<^tel:\+[0-9./-]*[0-9][0-9./-]*(;[0-9a-zA-Z-]+=(%[0-9a-zA-Z][0-9a-zA-Z]|[0-9a-zA-Z._~:/?#[\]@!$&\'()*+,;=-])*)*$>';
183 3
		return (bool) preg_match( $tel_uri_regex, $s );
184
	}
185
186
	/**
187
	 * @see SMWDataValue::loadDataItem()
188
	 * @param $dataitem SMWDataItem
189
	 * @return boolean
190
	 */
191 9
	protected function loadDataItem( SMWDataItem $dataItem ) {
192
193 9
		if ( $dataItem->getDIType() !== SMWDataItem::TYPE_URI ) {
194
			return false;
195
		}
196
197 9
		$this->m_dataitem = $dataItem;
198 9
		if ( $this->m_mode == SMW_URI_MODE_EMAIL ) {
199 1
			$this->m_wikitext = substr( $dataItem->getURI(), 7 );
200 9
		} elseif ( $this->m_mode == SMW_URI_MODE_TEL ) {
201 1
			$this->m_wikitext = substr( $dataItem->getURI(), 4 );
202
		} else {
203 8
			$this->m_wikitext = $dataItem->getURI();
204
		}
205
206 9
		$this->m_caption = $this->m_wikitext;
207 9
		$this->showUrlContextInRawFormat = false;
208
209 9
		return true;
210
	}
211
212 54
	public function getShortWikiText( $linked = null ) {
213
214 54
		list( $url, $caption ) = $this->decodeUriContext( $this->m_caption );
215
216 54
		if ( is_null( $linked ) || ( $linked === false ) || ( $url === '' ) ||
217 54
			( $this->m_outformat == '-' ) || ( $this->m_caption === '' ) ) {
218 21
			return $caption;
219 33
		} elseif ( $this->m_outformat == 'nowiki' ) {
220
			return $this->makeNonlinkedWikiText( $caption );
221
		} else {
222 33
			return '[' . $url . ' ' . $caption . ']';
223
		}
224
	}
225
226 39
	public function getShortHTMLText( $linker = null ) {
227
228 39
		list( $url, $caption ) = $this->decodeUriContext( $this->m_caption );
229
230 39
		if ( is_null( $linker ) || ( !$this->isValid() ) || ( $url === '' ) ||
231 19
			( $this->m_outformat == '-' ) || ( $this->m_outformat == 'nowiki' ) ||
232 39
			( $this->m_caption === '' ) || is_bool( $linker ) ) {
233 20
			return $caption;
234
		} else {
235 19
			return $linker->makeExternalLink( $url, $caption );
236
		}
237
	}
238
239 36
	public function getLongWikiText( $linked = null ) {
240
241 36
		if ( !$this->isValid() ) {
242
			return $this->getErrorText();
243
		}
244
245 36
		list( $url, $wikitext ) = $this->decodeUriContext( $this->m_wikitext );
246
247 36
		if ( is_null( $linked ) || ( $linked === false ) || ( $url === '' ) ||
248 36
			( $this->m_outformat == '-' ) || is_bool( $linked ) ) {
249 20
			return $wikitext;
250 16
		} elseif ( $this->m_outformat == 'nowiki' ) {
251
			return $this->makeNonlinkedWikiText( $wikitext );
252
		} else {
253 16
			return '[' . $url . ' ' . $wikitext . ']';
254
		}
255
	}
256
257 37
	public function getLongHTMLText( $linker = null ) {
258
259 37
		if ( !$this->isValid() ) {
260
			return $this->getErrorText();
261
		}
262
263 37
		list( $url, $wikitext ) = $this->decodeUriContext( $this->m_wikitext );
264
265 37
		if ( is_null( $linker ) || ( !$this->isValid() ) || ( $url === '' ) ||
266 37
			( $this->m_outformat == '-' ) || ( $this->m_outformat == 'nowiki' ) || is_bool( $linker ) ) {
267 20
			return $wikitext;
268
		} else {
269 17
			return $linker->makeExternalLink( $url, $wikitext );
270
		}
271
	}
272
273 45
	public function getWikiValue() {
274 45
		return $this->m_wikitext;
275
	}
276
277
	public function getURI() {
278
		return $this->getUriDataitem()->getURI();
279
	}
280
281 1
	protected function getServiceLinkParams() {
282
		// Create links to mapping services based on a wiki-editable message. The parameters
283
		// available to the message are:
284
		// $1: urlencoded version of URI/URL value (includes mailto: for emails)
285 1
		return array( rawurlencode( $this->getUriDataitem()->getURI() ) );
0 ignored issues
show
Bug Best Practice introduced by
The return type of return array(rawurlencod...Dataitem()->getURI())); (string[]) is incompatible with the return type of the parent method SMWDataValue::getServiceLinkParams of type boolean.

If you return a value from a function or method, it should be a sub-type of the type that is given by the parent type f.e. an interface, or abstract method. This is more formally defined by the Lizkov substitution principle, and guarantees that classes that depend on the parent type can use any instance of a child type interchangably. This principle also belongs to the SOLID principles for object oriented design.

Let’s take a look at an example:

class Author {
    private $name;

    public function __construct($name) {
        $this->name = $name;
    }

    public function getName() {
        return $this->name;
    }
}

abstract class Post {
    public function getAuthor() {
        return 'Johannes';
    }
}

class BlogPost extends Post {
    public function getAuthor() {
        return new Author('Johannes');
    }
}

class ForumPost extends Post { /* ... */ }

function my_function(Post $post) {
    echo strtoupper($post->getAuthor());
}

Our function my_function expects a Post object, and outputs the author of the post. The base class Post returns a simple string and outputting a simple string will work just fine. However, the child class BlogPost which is a sub-type of Post instead decided to return an object, and is therefore violating the SOLID principles. If a BlogPost were passed to my_function, PHP would not complain, but ultimately fail when executing the strtoupper call in its body.

Loading history...
286
	}
287
288
	/**
289
	 * Get a URL for hyperlinking this URI, or the empty string if this URI
290
	 * is not hyperlinked in MediaWiki.
291
	 * @return string
292
	 */
293 55
	public function getURL() {
294 55
		global $wgUrlProtocols;
295
296 55
		foreach ( $wgUrlProtocols as $prot ) {
297 55
			if ( ( $prot == $this->getUriDataitem()->getScheme() . ':' ) || ( $prot == $this->getUriDataitem()->getScheme() . '://' ) ) {
298 55
				return $this->getUriDataitem()->getURI();
299
			}
300
		}
301
302
		return '';
303
	}
304
305
	/**
306
	 * Helper function to get the current dataitem, or some dummy URI
307
	 * dataitem if the dataitem was not set. This makes it easier to
308
	 * write code that avoids errors even if the data was not
309
	 * initialized properly.
310
	 * @return SMWDIUri
311
	 */
312 55
	protected function getUriDataitem() {
313 55
		if ( isset( $this->m_dataitem ) ) {
314 55
			return $this->m_dataitem;
315
		} else { // note: use "noprotocol" to avoid accidental use in an MW link, see getURL()
316
			return new SMWDIUri( 'noprotocol', 'x', '', '', $this->m_typeid );
0 ignored issues
show
The call to SMWDIUri::__construct() has too many arguments starting with $this->m_typeid.

This check compares calls to functions or methods with their respective definitions. If the call has more arguments than are defined, it raises an issue.

If a function is defined several times with a different number of parameters, the check may pick up the wrong definition and report false positives. One codebase where this has been known to happen is Wordpress.

In this case you can add the @ignore PhpDoc annotation to the duplicate definition and it will be ignored.

Loading history...
317
		}
318
	}
319
320
	/**
321
	 * Helper function that changes a URL string in such a way that it
322
	 * can be used in wikitext without being turned into a hyperlink,
323
	 * while still displaying the same characters. The use of
324
	 * &lt;nowiki&gt; is avoided, since the resulting strings may be
325
	 * inserted during parsing, after this has been stripped.
326
	 *
327
	 * @since 1.8
328
	 */
329
	protected function makeNonlinkedWikiText( $url ) {
330
		return str_replace( ':', '&#58;', $url );
331
	}
332
333 55
	private function decodeUriContext( $context ) {
334
335
		// Prior to decoding turn any `-` into an internal representation to avoid
336
		// potential breakage
337 55
		if ( !$this->showUrlContextInRawFormat ) {
338 26
			$context = UrlEncoder::decode( str_replace( '-', '-2D', $context ) );
339
		}
340
341 55
		return array( $this->getURL(), $context );
342
	}
343
344
}
345