Completed
Push — master ( eb6d5e...9d1a7a )
by Christian
10:30
created

HTMLPurifier_URI::validate()   F

Complexity

Conditions 20
Paths 1512

Size

Total Lines 106
Code Lines 47

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 20
eloc 47
nc 1512
nop 2
dl 0
loc 106
rs 2
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
/**
4
 * HTML Purifier's internal representation of a URI.
5
 * @note
6
 *      Internal data-structures are completely escaped. If the data needs
7
 *      to be used in a non-URI context (which is very unlikely), be sure
8
 *      to decode it first. The URI may not necessarily be well-formed until
9
 *      validate() is called.
10
 */
11
class HTMLPurifier_URI
12
{
13
    /**
14
     * @type string
15
     */
16
    public $scheme;
17
18
    /**
19
     * @type string
20
     */
21
    public $userinfo;
22
23
    /**
24
     * @type string
25
     */
26
    public $host;
27
28
    /**
29
     * @type int
30
     */
31
    public $port;
32
33
    /**
34
     * @type string
35
     */
36
    public $path;
37
38
    /**
39
     * @type string
40
     */
41
    public $query;
42
43
    /**
44
     * @type string
45
     */
46
    public $fragment;
47
48
    /**
49
     * @param string $scheme
50
     * @param string $userinfo
51
     * @param string $host
52
     * @param int $port
53
     * @param string $path
54
     * @param string $query
55
     * @param string $fragment
56
     * @note Automatically normalizes scheme and port
57
     */
58
    public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
59
    {
60
        $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
61
        $this->userinfo = $userinfo;
62
        $this->host = $host;
63
        $this->port = is_null($port) ? $port : (int)$port;
64
        $this->path = $path;
65
        $this->query = $query;
66
        $this->fragment = $fragment;
67
    }
68
69
    /**
70
     * Retrieves a scheme object corresponding to the URI's scheme/default
71
     * @param HTMLPurifier_Config $config
72
     * @param HTMLPurifier_Context $context
73
     * @return HTMLPurifier_URIScheme Scheme object appropriate for validating this URI
74
     */
75
    public function getSchemeObj($config, $context)
76
    {
77
        $registry = HTMLPurifier_URISchemeRegistry::instance();
78
        if ($this->scheme !== null) {
79
            $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
80
            if (!$scheme_obj) {
81
                return false;
82
            } // invalid scheme, clean it out
83
        } else {
84
            // no scheme: retrieve the default one
85
            $def = $config->getDefinition('URI');
86
            $scheme_obj = $def->getDefaultScheme($config, $context);
87
            if (!$scheme_obj) {
88
                // something funky happened to the default scheme object
89
                trigger_error(
90
                    'Default scheme object "' . $def->defaultScheme . '" was not readable',
91
                    E_USER_WARNING
92
                );
93
                return false;
94
            }
95
        }
96
        return $scheme_obj;
97
    }
98
99
    /**
100
     * Generic validation method applicable for all schemes. May modify
101
     * this URI in order to get it into a compliant form.
102
     * @param HTMLPurifier_Config $config
103
     * @param HTMLPurifier_Context $context
104
     * @return bool True if validation/filtering succeeds, false if failure
105
     */
106
    public function validate($config, $context)
107
    {
108
        // ABNF definitions from RFC 3986
109
        $chars_sub_delims = '!$&\'()*+,;=';
110
        $chars_gen_delims = ':/?#[]@';
111
        $chars_pchar = $chars_sub_delims . ':@';
112
113
        // validate host
114
        if (!is_null($this->host)) {
115
            $host_def = new HTMLPurifier_AttrDef_URI_Host();
116
            $this->host = $host_def->validate($this->host, $config, $context);
117
            if ($this->host === false) {
118
                $this->host = null;
119
            }
120
        }
121
122
        // validate scheme
123
        // NOTE: It's not appropriate to check whether or not this
124
        // scheme is in our registry, since a URIFilter may convert a
125
        // URI that we don't allow into one we do.  So instead, we just
126
        // check if the scheme can be dropped because there is no host
127
        // and it is our default scheme.
128
        if (!is_null($this->scheme) && is_null($this->host) || $this->host === '') {
129
            // support for relative paths is pretty abysmal when the
130
            // scheme is present, so axe it when possible
131
            $def = $config->getDefinition('URI');
132
            if ($def->defaultScheme === $this->scheme) {
133
                $this->scheme = null;
134
            }
135
        }
136
137
        // validate username
138
        if (!is_null($this->userinfo)) {
139
            $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
140
            $this->userinfo = $encoder->encode($this->userinfo);
141
        }
142
143
        // validate port
144
        if (!is_null($this->port)) {
145
            if ($this->port < 1 || $this->port > 65535) {
146
                $this->port = null;
147
            }
148
        }
149
150
        // validate path
151
        $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
152
        if (!is_null($this->host)) { // this catches $this->host === ''
153
            // path-abempty (hier and relative)
154
            // http://www.example.com/my/path
155
            // //www.example.com/my/path (looks odd, but works, and
156
            //                            recognized by most browsers)
157
            // (this set is valid or invalid on a scheme by scheme
158
            // basis, so we'll deal with it later)
159
            // file:///my/path
160
            // ///my/path
161
            $this->path = $segments_encoder->encode($this->path);
162
        } elseif ($this->path !== '') {
163
            if ($this->path[0] === '/') {
164
                // path-absolute (hier and relative)
165
                // http:/my/path
166
                // /my/path
167
                if (strlen($this->path) >= 2 && $this->path[1] === '/') {
168
                    // This could happen if both the host gets stripped
169
                    // out
170
                    // http://my/path
171
                    // //my/path
172
                    $this->path = '';
173
                } else {
174
                    $this->path = $segments_encoder->encode($this->path);
175
                }
176
            } elseif (!is_null($this->scheme)) {
177
                // path-rootless (hier)
178
                // http:my/path
179
                // Short circuit evaluation means we don't need to check nz
180
                $this->path = $segments_encoder->encode($this->path);
181
            } else {
182
                // path-noscheme (relative)
183
                // my/path
184
                // (once again, not checking nz)
185
                $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
186
                $c = strpos($this->path, '/');
187
                if ($c !== false) {
188
                    $this->path =
189
                        $segment_nc_encoder->encode(substr($this->path, 0, $c)) .
190
                        $segments_encoder->encode(substr($this->path, $c));
191
                } else {
192
                    $this->path = $segment_nc_encoder->encode($this->path);
193
                }
194
            }
195
        } else {
196
            // path-empty (hier and relative)
197
            $this->path = ''; // just to be safe
198
        }
199
200
        // qf = query and fragment
201
        $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
202
203
        if (!is_null($this->query)) {
204
            $this->query = $qf_encoder->encode($this->query);
205
        }
206
207
        if (!is_null($this->fragment)) {
208
            $this->fragment = $qf_encoder->encode($this->fragment);
209
        }
210
        return true;
211
    }
212
213
    /**
214
     * Convert URI back to string
215
     * @return string URI appropriate for output
216
     */
217
    public function toString()
218
    {
219
        // reconstruct authority
220
        $authority = null;
221
        // there is a rendering difference between a null authority
222
        // (http:foo-bar) and an empty string authority
223
        // (http:///foo-bar).
224
        if (!is_null($this->host)) {
225
            $authority = '';
226
            if (!is_null($this->userinfo)) {
227
                $authority .= $this->userinfo . '@';
228
            }
229
            $authority .= $this->host;
230
            if (!is_null($this->port)) {
231
                $authority .= ':' . $this->port;
232
            }
233
        }
234
235
        // Reconstruct the result
236
        // One might wonder about parsing quirks from browsers after
237
        // this reconstruction.  Unfortunately, parsing behavior depends
238
        // on what *scheme* was employed (file:///foo is handled *very*
239
        // differently than http:///foo), so unfortunately we have to
240
        // defer to the schemes to do the right thing.
241
        $result = '';
242
        if (!is_null($this->scheme)) {
243
            $result .= $this->scheme . ':';
244
        }
245
        if (!is_null($authority)) {
246
            $result .= '//' . $authority;
247
        }
248
        $result .= $this->path;
249
        if (!is_null($this->query)) {
250
            $result .= '?' . $this->query;
251
        }
252
        if (!is_null($this->fragment)) {
253
            $result .= '#' . $this->fragment;
254
        }
255
256
        return $result;
257
    }
258
259
    /**
260
     * Returns true if this URL might be considered a 'local' URL given
261
     * the current context.  This is true when the host is null, or
262
     * when it matches the host supplied to the configuration.
263
     *
264
     * Note that this does not do any scheme checking, so it is mostly
265
     * only appropriate for metadata that doesn't care about protocol
266
     * security.  isBenign is probably what you actually want.
267
     * @param HTMLPurifier_Config $config
268
     * @param HTMLPurifier_Context $context
269
     * @return bool
270
     */
271
    public function isLocal($config, $context)
272
    {
273
        if ($this->host === null) {
274
            return true;
275
        }
276
        $uri_def = $config->getDefinition('URI');
277
        if ($uri_def->host === $this->host) {
278
            return true;
279
        }
280
        return false;
281
    }
282
283
    /**
284
     * Returns true if this URL should be considered a 'benign' URL,
285
     * that is:
286
     *
287
     *      - It is a local URL (isLocal), and
288
     *      - It has a equal or better level of security
289
     * @param HTMLPurifier_Config $config
290
     * @param HTMLPurifier_Context $context
291
     * @return bool
292
     */
293
    public function isBenign($config, $context)
294
    {
295
        if (!$this->isLocal($config, $context)) {
296
            return false;
297
        }
298
299
        $scheme_obj = $this->getSchemeObj($config, $context);
300
        if (!$scheme_obj) {
301
            return false;
302
        } // conservative approach
303
304
        $current_scheme_obj = $config->getDefinition('URI')->getDefaultScheme($config, $context);
305
        if ($current_scheme_obj->secure) {
306
            if (!$scheme_obj->secure) {
307
                return false;
308
            }
309
        }
310
        return true;
311
    }
312
}
313
314
// vim: et sw=4 sts=4
315