Completed
Push — 3.5 ( 1a9180...1bec8a )
by Daniel
24s
created

SimpleUrl::SimpleUrl()   A

Complexity

Conditions 3
Paths 4

Size

Total Lines 16
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 3
eloc 14
nc 4
nop 1
dl 0
loc 16
rs 9.4285
c 0
b 0
f 0
1
<?php
2
/**
3
 *  base include file for SimpleTest
4
 *  @package    SimpleTest
5
 *  @subpackage WebTester
6
 *  @version    $Id: url.php 1723 2008-04-08 00:34:10Z lastcraft $
7
 */
8
9
/**#@+
10
 *  include other SimpleTest class files
11
 */
12
require_once(dirname(__FILE__) . '/encoding.php');
13
/**#@-*/
14
15
/**
16
 *    URL parser to replace parse_url() PHP function which
17
 *    got broken in PHP 4.3.0. Adds some browser specific
18
 *    functionality such as expandomatics.
19
 *    Guesses a bit trying to separate the host from
20
 *    the path and tries to keep a raw, possibly unparsable,
21
 *    request string as long as possible.
22
 *    @package SimpleTest
23
 *    @subpackage WebTester
24
 */
25
class SimpleUrl {
26
    var $_scheme;
27
    var $_username;
28
    var $_password;
29
    var $_host;
30
    var $_port;
31
    var $_path;
32
    var $_request;
33
    var $_fragment;
34
    var $_x;
35
    var $_y;
36
    var $_target;
37
    var $_raw = false;
38
    
39
    /**
40
     *    Constructor. Parses URL into sections.
41
     *    @param string $url        Incoming URL.
42
     *    @access public
43
     */
44
    function SimpleUrl($url = '') {
45
        list($x, $y) = $this->_chompCoordinates($url);
46
        $this->setCoordinates($x, $y);
47
        $this->_scheme = $this->_chompScheme($url);
48
        list($this->_username, $this->_password) = $this->_chompLogin($url);
49
        $this->_host = $this->_chompHost($url);
50
        $this->_port = false;
51
        if (preg_match('/(.*?):(.*)/', $this->_host, $host_parts)) {
52
            $this->_host = $host_parts[1];
53
            $this->_port = (integer)$host_parts[2];
54
        }
55
        $this->_path = $this->_chompPath($url);
56
        $this->_request = $this->_parseRequest($this->_chompRequest($url));
57
        $this->_fragment = (strncmp($url, "#", 1) == 0 ? substr($url, 1) : false);
58
        $this->_target = false;
59
    }
60
    
61
    /**
62
     *    Extracts the X, Y coordinate pair from an image map.
63
     *    @param string $url   URL so far. The coordinates will be
64
     *                         removed.
65
     *    @return array        X, Y as a pair of integers.
66
     *    @access private
67
     */
68
    function _chompCoordinates(&$url) {
69
        if (preg_match('/(.*)\?(\d+),(\d+)$/', $url, $matches)) {
70
            $url = $matches[1];
71
            return array((integer)$matches[2], (integer)$matches[3]);
72
        }
73
        return array(false, false);
74
    }
75
    
76
    /**
77
     *    Extracts the scheme part of an incoming URL.
78
     *    @param string $url   URL so far. The scheme will be
79
     *                         removed.
80
     *    @return string       Scheme part or false.
81
     *    @access private
82
     */
83
    function _chompScheme(&$url) {
84
        if (preg_match('/^([^\/:]*):(\/\/)(.*)/', $url, $matches)) {
85
            $url = $matches[2] . $matches[3];
86
            return $matches[1];
87
        }
88
        return false;
89
    }
90
    
91
    /**
92
     *    Extracts the username and password from the
93
     *    incoming URL. The // prefix will be reattached
94
     *    to the URL after the doublet is extracted.
95
     *    @param string $url    URL so far. The username and
96
     *                          password are removed.
97
     *    @return array         Two item list of username and
98
     *                          password. Will urldecode() them.
99
     *    @access private
100
     */
101
    function _chompLogin(&$url) {
102
        $prefix = '';
103
        if (preg_match('/^(\/\/)(.*)/', $url, $matches)) {
104
            $prefix = $matches[1];
105
            $url = $matches[2];
106
        }
107
        if (preg_match('/^([^\/]*)@(.*)/', $url, $matches)) {
108
            $url = $prefix . $matches[2];
109
            $parts = preg_split('/:/', $matches[1]);
110
            return array(
111
                    urldecode($parts[0]),
112
                    isset($parts[1]) ? urldecode($parts[1]) : false);
113
        }
114
        $url = $prefix . $url;
115
        return array(false, false);
116
    }
117
    
118
    /**
119
     *    Extracts the host part of an incoming URL.
120
     *    Includes the port number part. Will extract
121
     *    the host if it starts with // or it has
122
     *    a top level domain or it has at least two
123
     *    dots.
124
     *    @param string $url    URL so far. The host will be
125
     *                          removed.
126
     *    @return string        Host part guess or false.
127
     *    @access private
128
     */
129
    function _chompHost(&$url) {
130
        if (preg_match('/^(\/\/)(.*?)(\/.*|\?.*|#.*|$)/', $url, $matches)) {
131
            $url = $matches[3];
132
            return $matches[2];
133
        }
134
        if (preg_match('/(.*?)(\.\.\/|\.\/|\/|\?|#|$)(.*)/', $url, $matches)) {
135
            $tlds = SimpleUrl::getAllTopLevelDomains();
136
            if (preg_match('/[a-z0-9\-]+\.(' . $tlds . ')/i', $matches[1])) {
137
                $url = $matches[2] . $matches[3];
138
                return $matches[1];
139
            } elseif (preg_match('/[a-z0-9\-]+\.[a-z0-9\-]+\.[a-z0-9\-]+/i', $matches[1])) {
140
                $url = $matches[2] . $matches[3];
141
                return $matches[1];
142
            }
143
        }
144
        return false;
145
    }
146
    
147
    /**
148
     *    Extracts the path information from the incoming
149
     *    URL. Strips this path from the URL.
150
     *    @param string $url     URL so far. The host will be
151
     *                           removed.
152
     *    @return string         Path part or '/'.
153
     *    @access private
154
     */
155
    function _chompPath(&$url) {
156
        if (preg_match('/(.*?)(\?|#|$)(.*)/', $url, $matches)) {
157
            $url = $matches[2] . $matches[3];
158
            return ($matches[1] ? $matches[1] : '');
159
        }
160
        return '';
161
    }
162
    
163
    /**
164
     *    Strips off the request data.
165
     *    @param string $url  URL so far. The request will be
166
     *                        removed.
167
     *    @return string      Raw request part.
168
     *    @access private
169
     */
170
    function _chompRequest(&$url) {
171
        if (preg_match('/\?(.*?)(#|$)(.*)/', $url, $matches)) {
172
            $url = $matches[2] . $matches[3];
173
            return $matches[1];
174
        }
175
        return '';
176
    }
177
        
178
    /**
179
     *    Breaks the request down into an object.
180
     *    @param string $raw           Raw request.
181
     *    @return SimpleFormEncoding    Parsed data.
182
     *    @access private
183
     */
184
    function _parseRequest($raw) {
185
        $this->_raw = $raw;
186
        $request = new SimpleGetEncoding();
187
        foreach (preg_split('/&/', $raw) as $pair) {
188
            if (preg_match('/(.*?)=(.*)/', $pair, $matches)) {
189
                $request->add($matches[1], urldecode($matches[2]));
190
            } elseif ($pair) {
191
                $request->add($pair, '');
192
            }
193
        }
194
        return $request;
195
    }
196
    
197
    /**
198
     *    Accessor for protocol part.
199
     *    @param string $default    Value to use if not present.
200
     *    @return string            Scheme name, e.g "http".
201
     *    @access public
202
     */
203
    function getScheme($default = false) {
204
        return $this->_scheme ? $this->_scheme : $default;
205
    }
206
    
207
    /**
208
     *    Accessor for user name.
209
     *    @return string    Username preceding host.
210
     *    @access public
211
     */
212
    function getUsername() {
213
        return $this->_username;
214
    }
215
    
216
    /**
217
     *    Accessor for password.
218
     *    @return string    Password preceding host.
219
     *    @access public
220
     */
221
    function getPassword() {
222
        return $this->_password;
223
    }
224
    
225
    /**
226
     *    Accessor for hostname and port.
227
     *    @param string $default    Value to use if not present.
228
     *    @return string            Hostname only.
229
     *    @access public
230
     */
231
    function getHost($default = false) {
232
        return $this->_host ? $this->_host : $default;
233
    }
234
    
235
    /**
236
     *    Accessor for top level domain.
237
     *    @return string       Last part of host.
238
     *    @access public
239
     */
240
    function getTld() {
241
        $path_parts = pathinfo($this->getHost());
242
        return (isset($path_parts['extension']) ? $path_parts['extension'] : false);
243
    }
244
    
245
    /**
246
     *    Accessor for port number.
247
     *    @return integer    TCP/IP port number.
248
     *    @access public
249
     */
250
    function getPort() {
251
        return $this->_port;
252
    }        
253
            
254
    /**
255
     *    Accessor for path.
256
     *    @return string    Full path including leading slash if implied.
257
     *    @access public
258
     */
259
    function getPath() {
260
        if (! $this->_path && $this->_host) {
261
            return '/';
262
        }
263
        return $this->_path;
264
    }
265
    
266
    /**
267
     *    Accessor for page if any. This may be a
268
     *    directory name if ambiguious.
269
     *    @return            Page name.
270
     *    @access public
271
     */
272
    function getPage() {
273
        if (! preg_match('/([^\/]*?)$/', $this->getPath(), $matches)) {
274
            return false;
275
        }
276
        return $matches[1];
277
    }
278
    
279
    /**
280
     *    Gets the path to the page.
281
     *    @return string       Path less the page.
282
     *    @access public
283
     */
284
    function getBasePath() {
285
        if (! preg_match('/(.*\/)[^\/]*?$/', $this->getPath(), $matches)) {
286
            return false;
287
        }
288
        return $matches[1];
289
    }
290
    
291
    /**
292
     *    Accessor for fragment at end of URL after the "#".
293
     *    @return string    Part after "#".
294
     *    @access public
295
     */
296
    function getFragment() {
297
        return $this->_fragment;
298
    }
299
    
300
    /**
301
     *    Sets image coordinates. Set to false to clear
302
     *    them.
303
     *    @param integer $x    Horizontal position.
304
     *    @param integer $y    Vertical position.
305
     *    @access public
306
     */
307
    function setCoordinates($x = false, $y = false) {
308
        if (($x === false) || ($y === false)) {
309
            $this->_x = $this->_y = false;
310
            return;
311
        }
312
        $this->_x = (integer)$x;
313
        $this->_y = (integer)$y;
314
    }
315
    
316
    /**
317
     *    Accessor for horizontal image coordinate.
318
     *    @return integer        X value.
319
     *    @access public
320
     */
321
    function getX() {
322
        return $this->_x;
323
    }
324
        
325
    /**
326
     *    Accessor for vertical image coordinate.
327
     *    @return integer        Y value.
328
     *    @access public
329
     */
330
    function getY() {
331
        return $this->_y;
332
    }
333
    
334
    /**
335
     *    Accessor for current request parameters
336
     *    in URL string form. Will return teh original request
337
     *    if at all possible even if it doesn't make much
338
     *    sense.
339
     *    @return string   Form is string "?a=1&b=2", etc.
340
     *    @access public
341
     */
342
    function getEncodedRequest() {
343
        if ($this->_raw) {
344
            $encoded = $this->_raw;
345
        } else {
346
            $encoded = $this->_request->asUrlRequest();
347
        }
348
        if ($encoded) {
349
            return '?' . preg_replace('/^\?/', '', $encoded);
350
        }
351
        return '';
352
    }
353
    
354
    /**
355
     *    Adds an additional parameter to the request.
356
     *    @param string $key            Name of parameter.
357
     *    @param string $value          Value as string.
358
     *    @access public
359
     */
360
    function addRequestParameter($key, $value) {
361
        $this->_raw = false;
362
        $this->_request->add($key, $value);
363
    }
364
    
365
    /**
366
     *    Adds additional parameters to the request.
367
     *    @param hash/SimpleFormEncoding $parameters   Additional
368
     *                                                parameters.
369
     *    @access public
370
     */
371
    function addRequestParameters($parameters) {
372
        $this->_raw = false;
373
        $this->_request->merge($parameters);
374
    }
375
    
376
    /**
377
     *    Clears down all parameters.
378
     *    @access public
379
     */
380
    function clearRequest() {
381
        $this->_raw = false;
382
        $this->_request = new SimpleGetEncoding();
383
    }
384
    
385
    /**
386
     *    Gets the frame target if present. Although
387
     *    not strictly part of the URL specification it
388
     *    acts as similarily to the browser.
389
     *    @return boolean/string    Frame name or false if none.
390
     *    @access public
391
     */
392
    function getTarget() {
393
        return $this->_target;
394
    }
395
    
396
    /**
397
     *    Attaches a frame target.
398
     *    @param string $frame        Name of frame.
399
     *    @access public
400
     */
401
    function setTarget($frame) {
402
        $this->_raw = false;
403
        $this->_target = $frame;
404
    }
405
    
406
    /**
407
     *    Renders the URL back into a string.
408
     *    @return string        URL in canonical form.
409
     *    @access public
410
     */
411
    function asString() {
412
        $path = $this->_path;
413
        $scheme = $identity = $host = $port = $encoded = $fragment = '';
414
        if ($this->_username && $this->_password) {
415
            $identity = $this->_username . ':' . $this->_password . '@';
416
        }
417
        if ($this->getHost()) {
418
            $scheme = $this->getScheme() ? $this->getScheme() : 'http';
419
            $scheme .= "://";
420
            $host = $this->getHost();
421
        }
422
        if ($this->getPort() && $this->getPort() != 80 ) {
423
            $port = ':'.$this->getPort();
424
        }
425
        if (substr($this->_path, 0, 1) == '/') {
426
            $path = $this->normalisePath($this->_path);
427
        }
428
        $encoded = $this->getEncodedRequest();
429
        $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
430
        $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
431
        return "$scheme$identity$host$port$path$encoded$fragment$coords";
432
    }
433
    
434
    /**
435
     *    Replaces unknown sections to turn a relative
436
     *    URL into an absolute one. The base URL can
437
     *    be either a string or a SimpleUrl object.
438
     *    @param string/SimpleUrl $base       Base URL.
439
     *    @access public
440
     */
441
    function makeAbsolute($base) {
442
        if (! is_object($base)) {
443
            $base = new SimpleUrl($base);
444
        }
445
        if ($this->getHost()) {
446
            $scheme = $this->getScheme();
447
            $host = $this->getHost();
448
            $port = $this->getPort() ? ':' . $this->getPort() : '';
449
            $identity = $this->getIdentity() ? $this->getIdentity() . '@' : '';
450
            if (! $identity) {
451
                $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
452
            }
453
        } else {
454
            $scheme = $base->getScheme();
455
            $host = $base->getHost();
456
            $port = $base->getPort() ? ':' . $base->getPort() : '';
457
            $identity = $base->getIdentity() ? $base->getIdentity() . '@' : '';
458
        }
459
        $path = $this->normalisePath($this->_extractAbsolutePath($base));
460
        $encoded = $this->getEncodedRequest();
461
        $fragment = $this->getFragment() ? '#'. $this->getFragment() : '';
462
        $coords = $this->getX() === false ? '' : '?' . $this->getX() . ',' . $this->getY();
463
        return new SimpleUrl("$scheme://$identity$host$port$path$encoded$fragment$coords");
464
    }
465
    
466
    /**
467
     *    Replaces unknown sections of the path with base parts
468
     *    to return a complete absolute one.
469
     *    @param string/SimpleUrl $base       Base URL.
470
     *    @param string                       Absolute path.
471
     *    @access private
472
     */
473
    function _extractAbsolutePath($base) {
474
        if ($this->getHost()) {
475
            return $this->_path;
476
        }
477
        if (! $this->_isRelativePath($this->_path)) {
478
            return $this->_path;
479
        }
480
        if ($this->_path) {
481
            return $base->getBasePath() . $this->_path;
482
        }
483
        return $base->getPath();
484
    }
485
    
486
    /**
487
     *    Simple test to see if a path part is relative.
488
     *    @param string $path        Path to test.
489
     *    @return boolean            True if starts with a "/".
490
     *    @access private
491
     */
492
    function _isRelativePath($path) {
493
        return (substr($path, 0, 1) != '/');
494
    }
495
    
496
    /**
497
     *    Extracts the username and password for use in rendering
498
     *    a URL.
499
     *    @return string/boolean    Form of username:password or false.
500
     *    @access public
501
     */
502
    function getIdentity() {
503
        if ($this->_username && $this->_password) {
504
            return $this->_username . ':' . $this->_password;
505
        }
506
        return false;
507
    }
508
    
509
    /**
510
     *    Replaces . and .. sections of the path.
511
     *    @param string $path    Unoptimised path.
512
     *    @return string         Path with dots removed if possible.
513
     *    @access public
514
     */
515
    function normalisePath($path) {
516
        $path = preg_replace('|/\./|', '/', $path);
517
        return preg_replace('|/[^/]+/\.\./|', '/', $path);
518
    }
519
    
520
    /**
521
     *    A pipe seperated list of all TLDs that result in two part
522
     *    domain names.
523
     *    @return string        Pipe separated list.
524
     *    @access public
525
     *    @static
526
     */
527
    function getAllTopLevelDomains() {
528
        return 'com|edu|net|org|gov|mil|int|biz|info|name|pro|aero|coop|museum';
529
    }
530
}
531
?>
532