Completed
Push — master ( 1be2e7...d38097 )
by Sam
23s
created

Convert::linkIfMatch()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 8
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 5
nc 2
nop 1
dl 0
loc 8
rs 9.4285
c 0
b 0
f 0
1
<?php
2
3
namespace SilverStripe\Core;
4
5
use SilverStripe\ORM\DB;
6
use SilverStripe\View\Parsers\URLSegmentFilter;
7
use InvalidArgumentException;
8
use SimpleXMLElement;
9
use Exception;
10
11
/**
12
 * Library of conversion functions, implemented as static methods.
13
 *
14
 * The methods are all of the form (format)2(format), where the format is one of
15
 *
16
 *  raw: A UTF8 string
17
 *  attr: A UTF8 string suitable for inclusion in an HTML attribute
18
 *  js: A UTF8 string suitable for inclusion in a double-quoted javascript string.
19
 *
20
 *  array: A PHP associative array
21
 *  json: JavaScript object notation
22
 *
23
 *  html: HTML source suitable for use in a page or email
24
 *  text: Plain-text content, suitable for display to a user as-is, or insertion in a plaintext email.
25
 *
26
 * Objects of type {@link ViewableData} can have an "escaping type",
27
 * which determines if they are automatically escaped before output by {@link SSViewer}.
28
 */
29
class Convert
30
{
31
32
    /**
33
     * Convert a value to be suitable for an XML attribute.
34
     *
35
     * @param array|string $val String to escape, or array of strings
36
     * @return array|string
37
     */
38
    public static function raw2att($val)
39
    {
40
        return self::raw2xml($val);
41
    }
42
43
    /**
44
     * Convert a value to be suitable for an HTML attribute.
45
     *
46
     * @param string|array $val String to escape, or array of strings
47
     * @return array|string
48
     */
49
    public static function raw2htmlatt($val)
50
    {
51
        return self::raw2att($val);
52
    }
53
54
    /**
55
     * Convert a value to be suitable for an HTML ID attribute. Replaces non
56
     * supported characters with a space.
57
     *
58
     * @see http://www.w3.org/TR/REC-html40/types.html#type-cdata
59
     *
60
     * @param array|string $val String to escape, or array of strings
61
     *
62
     * @return array|string
63
     */
64
    public static function raw2htmlname($val)
65
    {
66
        if (is_array($val)) {
67
            foreach ($val as $k => $v) {
68
                $val[$k] = self::raw2htmlname($v);
69
            }
70
71
            return $val;
72
        } else {
73
            return self::raw2att($val);
74
        }
75
    }
76
77
    /**
78
     * Convert a value to be suitable for an HTML ID attribute. Replaces non
79
     * supported characters with an underscore.
80
     *
81
     * @see http://www.w3.org/TR/REC-html40/types.html#type-cdata
82
     *
83
     * @param array|string $val String to escape, or array of strings
84
     *
85
     * @return array|string
86
     */
87
    public static function raw2htmlid($val)
88
    {
89
        if (is_array($val)) {
90
            foreach ($val as $k => $v) {
91
                $val[$k] = self::raw2htmlid($v);
92
            }
93
94
            return $val;
95
        } else {
96
            return trim(
97
                preg_replace(
98
                    '/_+/',
99
                    '_',
100
                    preg_replace('/[^a-zA-Z0-9\-_:.]+/', '_', $val)
101
                ),
102
                '_'
103
            );
104
        }
105
    }
106
107
    /**
108
     * Ensure that text is properly escaped for XML.
109
     *
110
     * @see http://www.w3.org/TR/REC-xml/#dt-escape
111
     * @param array|string $val String to escape, or array of strings
112
     * @return array|string
113
     */
114
    public static function raw2xml($val)
115
    {
116
        if (is_array($val)) {
117
            foreach ($val as $k => $v) {
118
                $val[$k] = self::raw2xml($v);
119
            }
120
            return $val;
121
        } else {
122
            return htmlspecialchars($val, ENT_QUOTES, 'UTF-8');
123
        }
124
    }
125
126
    /**
127
     * Ensure that text is properly escaped for Javascript.
128
     *
129
     * @param array|string $val String to escape, or array of strings
130
     * @return array|string
131
     */
132
    public static function raw2js($val)
133
    {
134
        if (is_array($val)) {
135
            foreach ($val as $k => $v) {
136
                $val[$k] = self::raw2js($v);
137
            }
138
            return $val;
139
        } else {
140
            return str_replace(
141
                // Intercepts some characters such as <, >, and & which can interfere
142
                array("\\", '"', "\n", "\r", "'", "<", ">", "&"),
143
                array("\\\\", '\"', '\n', '\r', "\\'", "\\x3c", "\\x3e", "\\x26"),
144
                $val
145
            );
146
        }
147
    }
148
149
    /**
150
     * Encode a value as a JSON encoded string.
151
     *
152
     * @param mixed $val Value to be encoded
153
     * @return string JSON encoded string
154
     */
155
    public static function raw2json($val)
156
    {
157
        return json_encode($val);
158
    }
159
160
    /**
161
     * Encode an array as a JSON encoded string.
162
     * THis is an alias to {@link raw2json()}
163
     *
164
     * @param array $val Array to convert
165
     * @return string JSON encoded string
166
     */
167
    public static function array2json($val)
168
    {
169
        return self::raw2json($val);
170
    }
171
172
    /**
173
     * Safely encodes a value (or list of values) using the current database's
174
     * safe string encoding method
175
     *
176
     * @param mixed|array $val Input value, or list of values as an array
177
     * @param boolean $quoted Flag indicating whether the value should be safely
178
     * quoted, instead of only being escaped. By default this function will
179
     * only escape the string (false).
180
     * @return string|array Safely encoded value in the same format as the input
181
     */
182
    public static function raw2sql($val, $quoted = false)
183
    {
184
        if (is_array($val)) {
185
            foreach ($val as $k => $v) {
186
                $val[$k] = self::raw2sql($v, $quoted);
187
            }
188
            return $val;
189
        } else {
190
            if ($quoted) {
191
                return DB::get_conn()->quoteString($val);
192
            } else {
193
                return DB::get_conn()->escapeString($val);
194
            }
195
        }
196
    }
197
198
    /**
199
     * Safely encodes a SQL symbolic identifier (or list of identifiers), such as a database,
200
     * table, or column name. Supports encoding of multi identfiers separated by
201
     * a delimiter (e.g. ".")
202
     *
203
     * @param string|array $identifier The identifier to escape. E.g. 'SiteTree.Title' or list of identifiers
204
     * to be joined via the separator.
205
     * @param string $separator The string that delimits subsequent identifiers
206
     * @return string The escaped identifier. E.g. '"SiteTree"."Title"'
207
     */
208
    public static function symbol2sql($identifier, $separator = '.')
209
    {
210
        return DB::get_conn()->escapeIdentifier($identifier, $separator);
211
    }
212
213
    /**
214
     * Convert XML to raw text.
215
     * @uses html2raw()
216
     * @todo Currently &#xxx; entries are stripped; they should be converted
217
     * @param mixed $val
218
     * @return array|string
219
     */
220
    public static function xml2raw($val)
221
    {
222
        if (is_array($val)) {
223
            foreach ($val as $k => $v) {
224
                $val[$k] = self::xml2raw($v);
225
            }
226
            return $val;
227
        } else {
228
            // More complex text needs to use html2raw instead
229
            if (strpos($val, '<') !== false) {
230
                return self::html2raw($val);
231
            } else {
232
                return html_entity_decode($val, ENT_QUOTES, 'UTF-8');
233
            }
234
        }
235
    }
236
237
    /**
238
     * Convert a JSON encoded string into an object.
239
     *
240
     * @param string $val
241
     * @return object|boolean
242
     */
243
    public static function json2obj($val)
244
    {
245
        return json_decode($val);
246
    }
247
248
    /**
249
     * Convert a JSON string into an array.
250
     *
251
     * @uses json2obj
252
     * @param string $val JSON string to convert
253
     * @return array|boolean
254
     */
255
    public static function json2array($val)
256
    {
257
        return json_decode($val, true);
258
    }
259
260
    /**
261
     * Converts an XML string to a PHP array
262
     * See http://phpsecurity.readthedocs.org/en/latest/Injection-Attacks.html#xml-external-entity-injection
263
     *
264
     * @uses recursiveXMLToArray()
265
     * @param string $val
266
     * @param boolean $disableDoctypes Disables the use of DOCTYPE, and will trigger an error if encountered.
267
     * false by default.
268
     * @param boolean $disableExternals Disables the loading of external entities. false by default.
269
     * @return array
270
     * @throws Exception
271
     */
272
    public static function xml2array($val, $disableDoctypes = false, $disableExternals = false)
273
    {
274
        // Check doctype
275
        if ($disableDoctypes && preg_match('/\<\!DOCTYPE.+]\>/', $val)) {
276
            throw new InvalidArgumentException('XML Doctype parsing disabled');
277
        }
278
279
        // Disable external entity loading
280
        $oldVal = null;
281
        if ($disableExternals) {
282
            $oldVal = libxml_disable_entity_loader($disableExternals);
283
        }
284
        try {
285
            $xml = new SimpleXMLElement($val);
286
            $result = self::recursiveXMLToArray($xml);
287
        } catch (Exception $ex) {
288
            if ($disableExternals) {
289
                libxml_disable_entity_loader($oldVal);
290
            }
291
            throw $ex;
292
        }
293
        if ($disableExternals) {
294
            libxml_disable_entity_loader($oldVal);
295
        }
296
        return $result;
297
    }
298
299
    /**
300
     * Convert a XML string to a PHP array recursively. Do not
301
     * call this function directly, Please use {@link Convert::xml2array()}
302
     *
303
     * @param SimpleXMLElement
304
     *
305
     * @return mixed
306
     */
307
    protected static function recursiveXMLToArray($xml)
308
    {
309
        $x = null;
310
        if ($xml instanceof SimpleXMLElement) {
311
            $attributes = $xml->attributes();
312
            foreach ($attributes as $k => $v) {
313
                if ($v) {
314
                    $a[$k] = (string) $v;
0 ignored issues
show
Coding Style Comprehensibility introduced by
$a was never initialized. Although not strictly required by PHP, it is generally a good practice to add $a = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
315
                }
316
            }
317
            $x = $xml;
318
            $xml = get_object_vars($xml);
319
        }
320
        if (is_array($xml)) {
321
            if (count($xml) == 0) {
322
                return (string)$x;
323
            } // for CDATA
324
            $r = [];
325
            foreach ($xml as $key => $value) {
326
                $r[$key] = self::recursiveXMLToArray($value);
327
            }
328
            // Attributes
329
            if (isset($a)) {
330
                $r['@'] = $a;
331
            }
332
            return $r;
333
        }
334
335
        return (string) $xml;
336
    }
337
338
    /**
339
     * Create a link if the string is a valid URL
340
     *
341
     * @param string $string The string to linkify
342
     * @return string A link to the URL if string is a URL
343
     */
344
    public static function linkIfMatch($string)
345
    {
346
        if (preg_match('/^[a-z+]+\:\/\/[a-zA-Z0-9$-_.+?&=!*\'()%]+$/', $string)) {
347
            return "<a style=\"white-space: nowrap\" href=\"$string\">$string</a>";
348
        } else {
349
            return $string;
350
        }
351
    }
352
353
    /**
354
     * Simple conversion of HTML to plaintext.
355
     *
356
     * @param string $data Input data
357
     * @param bool $preserveLinks
358
     * @param int $wordWrap
359
     * @param array $config
360
     * @return string
361
     */
362
    public static function html2raw($data, $preserveLinks = false, $wordWrap = 0, $config = null)
363
    {
364
        $defaultConfig = array(
365
            'PreserveLinks' => false,
366
            'ReplaceBoldAsterisk' => true,
367
            'CompressWhitespace' => true,
368
            'ReplaceImagesWithAlt' => true,
369
        );
370
        if (isset($config)) {
371
            $config = array_merge($defaultConfig, $config);
372
        } else {
373
            $config = $defaultConfig;
374
        }
375
376
        $data = preg_replace("/<style([^A-Za-z0-9>][^>]*)?>.*?<\/style[^>]*>/is", "", $data);
377
        $data = preg_replace("/<script([^A-Za-z0-9>][^>]*)?>.*?<\/script[^>]*>/is", "", $data);
378
379
        if ($config['ReplaceBoldAsterisk']) {
380
            $data = preg_replace('%<(strong|b)( [^>]*)?>|</(strong|b)>%i', '*', $data);
381
        }
382
383
        // Expand hyperlinks
384
        if (!$preserveLinks && !$config['PreserveLinks']) {
385
            $data = preg_replace_callback('/<a[^>]*href\s*=\s*"([^"]*)">(.*?)<\/a>/i', function ($matches) {
386
                return Convert::html2raw($matches[2]) . "[$matches[1]]";
387
            }, $data);
388
            $data = preg_replace_callback('/<a[^>]*href\s*=\s*([^ ]*)>(.*?)<\/a>/i', function ($matches) {
389
                return Convert::html2raw($matches[2]) . "[$matches[1]]";
390
            }, $data);
391
        }
392
393
        // Replace images with their alt tags
394
        if ($config['ReplaceImagesWithAlt']) {
395
            $data = preg_replace('/<img[^>]*alt *= *"([^"]*)"[^>]*>/i', ' \\1 ', $data);
396
            $data = preg_replace('/<img[^>]*alt *= *([^ ]*)[^>]*>/i', ' \\1 ', $data);
397
        }
398
399
        // Compress whitespace
400
        if ($config['CompressWhitespace']) {
401
            $data = preg_replace("/\s+/", " ", $data);
402
        }
403
404
        // Parse newline tags
405
        $data = preg_replace("/\s*<[Hh][1-6]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data);
406
        $data = preg_replace("/\s*<[Pp]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data);
407
        $data = preg_replace("/\s*<[Dd][Ii][Vv]([^A-Za-z0-9>][^>]*)?> */", "\n\n", $data);
408
        $data = preg_replace("/\n\n\n+/", "\n\n", $data);
409
410
        $data = preg_replace("/<[Bb][Rr]([^A-Za-z0-9>][^>]*)?> */", "\n", $data);
411
        $data = preg_replace("/<[Tt][Rr]([^A-Za-z0-9>][^>]*)?> */", "\n", $data);
412
        $data = preg_replace("/<\/[Tt][Dd]([^A-Za-z0-9>][^>]*)?> */", "    ", $data);
413
        $data = preg_replace('/<\/p>/i', "\n\n", $data);
414
415
        // Replace HTML entities
416
        $data = html_entity_decode($data, ENT_QUOTES, 'UTF-8');
417
        // Remove all tags (but optionally keep links)
418
419
        // strip_tags seemed to be restricting the length of the output
420
        // arbitrarily. This essentially does the same thing.
421
        if (!$preserveLinks && !$config['PreserveLinks']) {
422
            $data = preg_replace('/<\/?[^>]*>/', '', $data);
423
        } else {
424
            $data = strip_tags($data, '<a>');
425
        }
426
427
        // Wrap
428
        if ($wordWrap) {
429
            $data = wordwrap(trim($data), $wordWrap);
430
        }
431
        return trim($data);
432
    }
433
434
    /**
435
     * There are no real specifications on correctly encoding mailto-links,
436
     * but this seems to be compatible with most of the user-agents.
437
     * Does nearly the same as rawurlencode().
438
     * Please only encode the values, not the whole url, e.g.
439
     * "mailto:[email protected]?subject=" . Convert::raw2mailto($subject)
440
     *
441
     * @param $data string
442
     * @return string
443
     * @see http://www.ietf.org/rfc/rfc1738.txt
444
     */
445
    public static function raw2mailto($data)
446
    {
447
        return str_ireplace(
448
            array("\n",'?','=',' ','(',')','&','@','"','\'',';'),
449
            array('%0A','%3F','%3D','%20','%28','%29','%26','%40','%22','%27','%3B'),
450
            $data
451
        );
452
    }
453
454
    /**
455
     * Convert a string (normally a title) to a string suitable for using in
456
     * urls and other html attributes. Uses {@link URLSegmentFilter}.
457
     *
458
     * @param string
459
     * @return string
460
     */
461
    public static function raw2url($title)
462
    {
463
        $f = URLSegmentFilter::create();
464
        return $f->filter($title);
465
    }
466
467
    /**
468
     * Normalises newline sequences to conform to (an) OS specific format.
469
     *
470
     * @param string $data Text containing potentially mixed formats of newline
471
     * sequences including \r, \r\n, \n, or unicode newline characters
472
     * @param string $nl The newline sequence to normalise to. Defaults to that
473
     * specified by the current OS
474
     * @return string
475
     */
476
    public static function nl2os($data, $nl = PHP_EOL)
477
    {
478
        return preg_replace('~\R~u', $nl, $data);
479
    }
480
481
    /**
482
     * Encode a value into a string that can be used as part of a filename.
483
     * All string data must be UTF-8 encoded.
484
     *
485
     * @param mixed $val Value to be encoded
486
     * @return string
487
     */
488
    public static function base64url_encode($val)
489
    {
490
        return rtrim(strtr(base64_encode(json_encode($val)), '+/', '~_'), '=');
491
    }
492
493
    /**
494
     * Decode a value that was encoded with Convert::base64url_encode.
495
     *
496
     * @param string $val Value to be decoded
497
     * @return mixed Original value
498
     */
499
    public static function base64url_decode($val)
500
    {
501
        return json_decode(
502
            base64_decode(str_pad(strtr($val, '~_', '+/'), strlen($val) % 4, '=', STR_PAD_RIGHT)),
503
            true
504
        );
505
    }
506
507
    /**
508
     * Converts upper camel case names to lower camel case,
509
     * with leading upper case characters replaced with lower case.
510
     * Tries to retain word case.
511
     *
512
     * Examples:
513
     * - ID => id
514
     * - IDField => idField
515
     * - iDField => iDField
516
     *
517
     * @param $str
518
     * @return string
519
     */
520
    public static function upperCamelToLowerCamel($str)
521
    {
522
        $return = null;
523
        $matches = null;
524
        if (preg_match('/(^[A-Z]{1,})([A-Z]{1})([a-z]+.*)/', $str, $matches)) {
525
            // If string has trailing lowercase after more than one leading uppercase characters,
526
            // match everything but the last leading uppercase character.
527
            $return = implode('', [
528
                strtolower($matches[1]),
529
                $matches[2],
530
                $matches[3]
531
            ]);
532
        } elseif (preg_match('/(^[A-Z]{1})([a-z]+.*)/', $str, $matches)) {
533
            // If string has trailing lowercase after exactly one leading uppercase characters,
534
            // match everything but the last leading uppercase character.
535
            $return = implode('', [
536
                strtolower($matches[1]),
537
                $matches[2]
538
            ]);
539
        } elseif (preg_match('/^[A-Z]+$/', $str)) {
540
            // If string has leading uppercase without trailing lowercase,
541
            // just lowerase the whole thing.
542
            $return = strtolower($str);
543
        } else {
544
            // If string has no leading uppercase, just return.
545
            $return = $str;
546
        }
547
548
        return $return;
549
    }
550
}
551