Passed
Pull Request — 4.10 (#10376)
by Steve
36:32 queued 29:43
created

Convert::raw2url()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 1
dl 0
loc 4
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace SilverStripe\Core;
4
5
use InvalidArgumentException;
6
use SimpleXMLElement;
7
use SilverStripe\Dev\Deprecation;
8
use SilverStripe\ORM\DB;
9
use SilverStripe\View\Parsers\URLSegmentFilter;
10
11
/**
12
 * Library of conversion functions, implemented as static methods.
13
 *
14
 * The methods are all of the form (format)2(format), where the format is one of
15
 *
16
 *  raw: A UTF8 string
17
 *  attr: A UTF8 string suitable for inclusion in an HTML attribute
18
 *  js: A UTF8 string suitable for inclusion in a double-quoted javascript string.
19
 *
20
 *  array: A PHP associative array
21
 *  json: JavaScript object notation
22
 *
23
 *  html: HTML source suitable for use in a page or email
24
 *  text: Plain-text content, suitable for display to a user as-is, or insertion in a plaintext email.
25
 *
26
 * Objects of type {@link ViewableData} can have an "escaping type",
27
 * which determines if they are automatically escaped before output by {@link SSViewer}.
28
 */
29
class Convert
30
{
31
    /**
32
     * Convert a value to be suitable for an XML attribute.
33
     *
34
     * Warning: Does not escape array keys
35
     *
36
     * @param array|string $val String to escape, or array of strings
37
     * @return array|string
38
     */
39
    public static function raw2att($val)
40
    {
41
        return self::raw2xml($val);
42
    }
43
44
    /**
45
     * Convert a value to be suitable for an HTML attribute.
46
     *
47
     * Warning: Does not escape array keys
48
     *
49
     * @param string|array $val String to escape, or array of strings
50
     * @return array|string
51
     */
52
    public static function raw2htmlatt($val)
53
    {
54
        return self::raw2att($val);
55
    }
56
57
    /**
58
     * Convert a value to be suitable for an HTML ID attribute. Replaces non
59
     * supported characters with a space.
60
     *
61
     * Warning: Does not escape array keys
62
     *
63
     * @see http://www.w3.org/TR/REC-html40/types.html#type-cdata
64
     *
65
     * @param array|string $val String to escape, or array of strings
66
     *
67
     * @return array|string
68
     */
69
    public static function raw2htmlname($val)
70
    {
71
        if (is_array($val)) {
72
            foreach ($val as $k => $v) {
73
                $val[$k] = self::raw2htmlname($v);
74
            }
75
76
            return $val;
77
        }
78
79
        return self::raw2att($val);
80
    }
81
82
    /**
83
     * Convert a value to be suitable for an HTML ID attribute. Replaces non
84
     * supported characters with an underscore.
85
     *
86
     * Warning: Does not escape array keys
87
     *
88
     * @see http://www.w3.org/TR/REC-html40/types.html#type-cdata
89
     *
90
     * @param array|string $val String to escape, or array of strings
91
     *
92
     * @return array|string
93
     */
94
    public static function raw2htmlid($val)
95
    {
96
        if (is_array($val)) {
97
            foreach ($val as $k => $v) {
98
                $val[$k] = self::raw2htmlid($v);
99
            }
100
101
            return $val;
102
        }
103
104
        return trim(
105
            preg_replace(
106
                '/_+/',
107
                '_',
108
                preg_replace('/[^a-zA-Z0-9\-_:.]+/', '_', $val)
109
            ),
110
            '_'
111
        );
112
    }
113
114
    /**
115
     * Ensure that text is properly escaped for XML.
116
     *
117
     * Warning: Does not escape array keys
118
     *
119
     * @see http://www.w3.org/TR/REC-xml/#dt-escape
120
     * @param array|string $val String to escape, or array of strings
121
     * @return array|string
122
     */
123
    public static function raw2xml($val)
124
    {
125
        if (is_array($val)) {
126
            foreach ($val as $k => $v) {
127
                $val[$k] = self::raw2xml($v);
128
            }
129
            return $val;
130
        }
131
132
        return htmlspecialchars($val, ENT_QUOTES, 'UTF-8');
133
    }
134
135
    /**
136
     * Ensure that text is properly escaped for Javascript.
137
     *
138
     * Warning: Does not escape array keys
139
     *
140
     * @param array|string $val String to escape, or array of strings
141
     * @return array|string
142
     */
143
    public static function raw2js($val)
144
    {
145
        if (is_array($val)) {
146
            foreach ($val as $k => $v) {
147
                $val[$k] = self::raw2js($v);
148
            }
149
            return $val;
150
        }
151
152
        return str_replace(
153
            // Intercepts some characters such as <, >, and & which can interfere
154
            ["\\", '"', "\n", "\r", "'", '<', '>', '&'],
155
            ["\\\\", '\"', '\n', '\r', "\\'", "\\x3c", "\\x3e", "\\x26"],
156
            $val
157
        );
158
    }
159
160
    /**
161
     * Encode a value as a JSON encoded string. You can optionally pass a bitmask of
162
     * JSON constants as options through to the encode function.
163
     *
164
     * @deprecated 4.4.0:5.0.0 Use json_encode() instead
165
     * @param  mixed $val     Value to be encoded
166
     * @param  int   $options Optional bitmask of JSON constants
167
     * @return string           JSON encoded string
168
     */
169
    public static function raw2json($val, $options = 0)
170
    {
171
        Deprecation::notice('4.4', 'Please use json_encode() instead.');
172
173
        return json_encode($val, $options);
174
    }
175
176
    /**
177
     * Encode an array as a JSON encoded string.
178
     *
179
     * @deprecated 4.4.0:5.0.0 Use json_encode() instead
180
     * @param  array  $val     Array to convert
181
     * @param  int    $options Optional bitmask of JSON constants
182
     * @return string          JSON encoded string
183
     */
184
    public static function array2json($val, $options = 0)
185
    {
186
        Deprecation::notice('4.4', 'Please use json_encode() instead.');
187
188
        return json_encode($val, $options);
189
    }
190
191
    /**
192
     * Safely encodes a value (or list of values) using the current database's
193
     * safe string encoding method
194
     *
195
     * Warning: Does not encode array keys
196
     *
197
     * @param mixed|array $val Input value, or list of values as an array
198
     * @param boolean $quoted Flag indicating whether the value should be safely
199
     * quoted, instead of only being escaped. By default this function will
200
     * only escape the string (false).
201
     * @return string|array Safely encoded value in the same format as the input
202
     */
203
    public static function raw2sql($val, $quoted = false)
204
    {
205
        if (is_array($val)) {
206
            foreach ($val as $k => $v) {
207
                $val[$k] = self::raw2sql($v, $quoted);
208
            }
209
            return $val;
210
        }
211
212
        if ($quoted) {
213
            return DB::get_conn()->quoteString($val);
214
        }
215
216
        return DB::get_conn()->escapeString($val);
217
    }
218
219
    /**
220
     * Safely encodes a SQL symbolic identifier (or list of identifiers), such as a database,
221
     * table, or column name. Supports encoding of multi identfiers separated by
222
     * a delimiter (e.g. ".")
223
     *
224
     * @param string|array $identifier The identifier to escape. E.g. 'SiteTree.Title' or list of identifiers
225
     * to be joined via the separator.
226
     * @param string $separator The string that delimits subsequent identifiers
227
     * @return string The escaped identifier. E.g. '"SiteTree"."Title"'
228
     */
229
    public static function symbol2sql($identifier, $separator = '.')
230
    {
231
        return DB::get_conn()->escapeIdentifier($identifier, $separator);
232
    }
233
234
    /**
235
     * Convert XML to raw text.
236
     *
237
     * Warning: Does not decode array keys
238
     *
239
     * @uses html2raw()
240
     * @todo Currently &#xxx; entries are stripped; they should be converted
241
     * @param mixed $val
242
     * @return array|string
243
     */
244
    public static function xml2raw($val)
245
    {
246
        if (is_array($val)) {
247
            foreach ($val as $k => $v) {
248
                $val[$k] = self::xml2raw($v);
249
            }
250
            return $val;
251
        }
252
253
        // More complex text needs to use html2raw instead
254
        if (strpos($val, '<') !== false) {
255
            return self::html2raw($val);
256
        }
257
258
        return html_entity_decode($val, ENT_QUOTES, 'UTF-8');
259
    }
260
261
    /**
262
     * Convert a JSON encoded string into an object.
263
     *
264
     * @deprecated 4.4.0:5.0.0 Use json_decode() instead
265
     * @param string $val
266
     * @return object|boolean
267
     */
268
    public static function json2obj($val)
269
    {
270
        Deprecation::notice('4.4', 'Please use json_decode() instead.');
271
272
        return json_decode($val);
273
    }
274
275
    /**
276
     * Convert a JSON string into an array.
277
     *
278
     * @deprecated 4.4.0:5.0.0 Use json_decode() instead
279
     * @param string $val JSON string to convert
280
     * @return array|boolean
281
     */
282
    public static function json2array($val)
283
    {
284
        Deprecation::notice('4.4', 'Please use json_decode() instead.');
285
286
        return json_decode($val, true);
287
    }
288
289
    /**
290
     * Converts an XML string to a PHP array
291
     * See http://phpsecurity.readthedocs.org/en/latest/Injection-Attacks.html#xml-external-entity-injection
292
     *
293
     * @uses recursiveXMLToArray()
294
     * @param string $val
295
     * @param boolean $disableDoctypes Disables the use of DOCTYPE, and will trigger an error if encountered.
296
     * false by default.
297
     * @param boolean $disableExternals Does nothing because xml entities are removed
298
     * @deprecated 4.11.0:5.0.0
299
     * @return array
300
     * @throws Exception
301
     */
302
    public static function xml2array($val, $disableDoctypes = false, $disableExternals = false)
303
    {
304
        Deprecation::notice('4.10', 'Use a dedicated XML library instead');
305
306
        // Check doctype
307
        if ($disableDoctypes && strpos($val ?? '', '<!DOCTYPE') !== false) {
308
            throw new InvalidArgumentException('XML Doctype parsing disabled');
309
        }
310
311
        // CVE-2021-41559 Ensure entities are removed due to their inherent security risk via
312
        // XXE attacks and quadratic blowup attacks, and also lack of consistent support
313
        $val = preg_replace('/(?s)<!ENTITY.*?>/', '', $val ?? '');
314
315
        // If there's still an <!ENTITY> present, then it would be the result of a maliciously
316
        // crafted XML document e.g. <!ENTITY><!<!ENTITY>ENTITY ext SYSTEM "http://evil.com">
317
        if (strpos($val ?? '', '<!ENTITY') !== false) {
318
            throw new InvalidArgumentException('Malicious XML entity detected');
319
        }
320
321
        // This will throw an exception if the XML contains references to any internal entities
322
        // that were defined in an <!ENTITY /> before it was removed
323
        $xml = new SimpleXMLElement($val ?? '');
324
        return self::recursiveXMLToArray($xml);
325
    }
326
327
    /**
328
     * Convert a XML string to a PHP array recursively. Do not
329
     * call this function directly, Please use {@link Convert::xml2array()}
330
     *
331
     * @param SimpleXMLElement $xml
332
     *
333
     * @return mixed
334
     */
335
    protected static function recursiveXMLToArray($xml)
336
    {
337
        $x = null;
338
        if ($xml instanceof SimpleXMLElement) {
0 ignored issues
show
introduced by
$xml is always a sub-type of SimpleXMLElement.
Loading history...
339
            $attributes = $xml->attributes();
340
            foreach ($attributes as $k => $v) {
341
                if ($v) {
342
                    $a[$k] = (string) $v;
343
                }
344
            }
345
            $x = $xml;
346
            $xml = get_object_vars($xml);
347
        }
348
        if (is_array($xml)) {
0 ignored issues
show
introduced by
The condition is_array($xml) is always true.
Loading history...
349
            if (count($xml) === 0) {
350
                return (string)$x;
351
            } // for CDATA
352
            $r = [];
353
            foreach ($xml as $key => $value) {
354
                $r[$key] = self::recursiveXMLToArray($value);
355
            }
356
            // Attributes
357
            if (isset($a)) {
358
                $r['@'] = $a;
359
            }
360
            return $r;
361
        }
362
363
        return (string) $xml;
364
    }
365
366
    /**
367
     * Create a link if the string is a valid URL
368
     *
369
     * @param string $string The string to linkify
370
     * @return string A link to the URL if string is a URL
371
     */
372
    public static function linkIfMatch($string)
373
    {
374
        if (preg_match('/^[a-z+]+\:\/\/[a-zA-Z0-9$-_.+?&=!*\'()%]+$/', $string)) {
375
            return "<a style=\"white-space: nowrap\" href=\"$string\">$string</a>";
376
        }
377
378
        return $string;
379
    }
380
381
    /**
382
     * Simple conversion of HTML to plaintext.
383
     *
384
     * @param string $data Input data
385
     * @param bool $preserveLinks
386
     * @param int $wordWrap
387
     * @param array $config
388
     * @return string
389
     */
390
    public static function html2raw($data, $preserveLinks = false, $wordWrap = 0, $config = null)
391
    {
392
        $defaultConfig = [
393
            'PreserveLinks' => false,
394
            'ReplaceBoldAsterisk' => true,
395
            'CompressWhitespace' => true,
396
            'ReplaceImagesWithAlt' => true,
397
        ];
398
        if (isset($config)) {
399
            $config = array_merge($defaultConfig, $config);
400
        } else {
401
            $config = $defaultConfig;
402
        }
403
404
        $data = preg_replace("/<style([^A-Za-z0-9>][^>]*)?>.*?<\/style[^>]*>/is", '', $data);
405
        $data = preg_replace("/<script([^A-Za-z0-9>][^>]*)?>.*?<\/script[^>]*>/is", '', $data);
406
407
        if ($config['ReplaceBoldAsterisk']) {
408
            $data = preg_replace('%<(strong|b)( [^>]*)?>|</(strong|b)>%i', '*', $data);
409
        }
410
411
        // Expand hyperlinks
412
        if (!$preserveLinks && !$config['PreserveLinks']) {
413
            $data = preg_replace_callback('/<a[^>]*href\s*=\s*"([^"]*)">(.*?)<\/a>/ui', function ($matches) {
414
                return Convert::html2raw($matches[2]) . "[$matches[1]]";
415
            }, $data);
416
            $data = preg_replace_callback('/<a[^>]*href\s*=\s*([^ ]*)>(.*?)<\/a>/ui', function ($matches) {
417
                return Convert::html2raw($matches[2]) . "[$matches[1]]";
418
            }, $data);
419
        }
420
421
        // Replace images with their alt tags
422
        if ($config['ReplaceImagesWithAlt']) {
423
            $data = preg_replace('/<img[^>]*alt *= *"([^"]*)"[^>]*>/i', ' \\1 ', $data);
424
            $data = preg_replace('/<img[^>]*alt *= *([^ ]*)[^>]*>/i', ' \\1 ', $data);
425
        }
426
427
        // Compress whitespace
428
        if ($config['CompressWhitespace']) {
429
            $data = preg_replace("/\s+/u", ' ', $data);
430
        }
431
432
        // Parse newline tags
433
        $data = preg_replace("/\s*<[Hh][1-6]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data);
434
        $data = preg_replace("/\s*<[Pp]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data);
435
        $data = preg_replace("/\s*<[Dd][Ii][Vv]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data);
436
        $data = preg_replace("/\n\n\n+/", "\n\n", $data);
437
438
        $data = preg_replace('/<[Bb][Rr]([^A-Za-z0-9>][^>]*)?> */', "\n", $data);
439
        $data = preg_replace('/<[Tt][Rr]([^A-Za-z0-9>][^>]*)?> */', "\n", $data);
440
        $data = preg_replace("/<\/[Tt][Dd]([^A-Za-z0-9>][^>]*)?> */", '    ', $data);
441
        $data = preg_replace('/<\/p>/i', "\n\n", $data);
442
443
        // Replace HTML entities
444
        $data = html_entity_decode($data, ENT_QUOTES, 'UTF-8');
445
        // Remove all tags (but optionally keep links)
446
447
        // strip_tags seemed to be restricting the length of the output
448
        // arbitrarily. This essentially does the same thing.
449
        if (!$preserveLinks && !$config['PreserveLinks']) {
450
            $data = preg_replace('/<\/?[^>]*>/', '', $data);
451
        } else {
452
            $data = strip_tags($data, '<a>');
453
        }
454
455
        // Wrap
456
        if ($wordWrap) {
457
            $data = wordwrap(trim($data), $wordWrap);
458
        }
459
        return trim($data);
460
    }
461
462
    /**
463
     * There are no real specifications on correctly encoding mailto-links,
464
     * but this seems to be compatible with most of the user-agents.
465
     * Does nearly the same as rawurlencode().
466
     * Please only encode the values, not the whole url, e.g.
467
     * "mailto:[email protected]?subject=" . Convert::raw2mailto($subject)
468
     *
469
     * @param $data string
470
     * @return string
471
     * @see http://www.ietf.org/rfc/rfc1738.txt
472
     */
473
    public static function raw2mailto($data)
474
    {
475
        return str_ireplace(
476
            ["\n",'?','=',' ','(',')','&','@','"','\'',';'],
477
            ['%0A','%3F','%3D','%20','%28','%29','%26','%40','%22','%27','%3B'],
478
            $data
479
        );
480
    }
481
482
    /**
483
     * Convert a string (normally a title) to a string suitable for using in
484
     * urls and other html attributes. Uses {@link URLSegmentFilter}.
485
     *
486
     * @param string $title
487
     * @return string
488
     */
489
    public static function raw2url($title)
490
    {
491
        $f = URLSegmentFilter::create();
492
        return $f->filter($title);
493
    }
494
495
    /**
496
     * Normalises newline sequences to conform to (an) OS specific format.
497
     *
498
     * @param string $data Text containing potentially mixed formats of newline
499
     * sequences including \r, \r\n, \n, or unicode newline characters
500
     * @param string $nl The newline sequence to normalise to. Defaults to that
501
     * specified by the current OS
502
     * @return string
503
     */
504
    public static function nl2os($data, $nl = PHP_EOL)
505
    {
506
        return preg_replace('~\R~u', $nl, $data);
507
    }
508
509
    /**
510
     * Encode a value into a string that can be used as part of a filename.
511
     * All string data must be UTF-8 encoded.
512
     *
513
     * @param mixed $val Value to be encoded
514
     * @return string
515
     */
516
    public static function base64url_encode($val)
517
    {
518
        return rtrim(strtr(base64_encode(json_encode($val)), '+/', '~_'), '=');
519
    }
520
521
    /**
522
     * Decode a value that was encoded with Convert::base64url_encode.
523
     *
524
     * @param string $val Value to be decoded
525
     * @return mixed Original value
526
     */
527
    public static function base64url_decode($val)
528
    {
529
        return json_decode(
530
            base64_decode(str_pad(strtr($val, '~_', '+/'), strlen($val) % 4, '=', STR_PAD_RIGHT)),
531
            true
532
        );
533
    }
534
535
    /**
536
     * Converts upper camel case names to lower camel case,
537
     * with leading upper case characters replaced with lower case.
538
     * Tries to retain word case.
539
     *
540
     * Examples:
541
     * - ID => id
542
     * - IDField => idField
543
     * - iDField => iDField
544
     *
545
     * @param $str
546
     * @return string
547
     */
548
    public static function upperCamelToLowerCamel($str)
549
    {
550
        $return = null;
551
        $matches = null;
552
        if (preg_match('/(^[A-Z]{1,})([A-Z]{1})([a-z]+.*)/', $str, $matches)) {
553
            // If string has trailing lowercase after more than one leading uppercase characters,
554
            // match everything but the last leading uppercase character.
555
            $return = implode('', [
556
                strtolower($matches[1]),
557
                $matches[2],
558
                $matches[3]
559
            ]);
560
        } elseif (preg_match('/(^[A-Z]{1})([a-z]+.*)/', $str, $matches)) {
561
            // If string has trailing lowercase after exactly one leading uppercase characters,
562
            // match everything but the last leading uppercase character.
563
            $return = implode('', [
564
                strtolower($matches[1]),
565
                $matches[2]
566
            ]);
567
        } elseif (preg_match('/^[A-Z]+$/', $str)) {
568
            // If string has leading uppercase without trailing lowercase,
569
            // just lowerase the whole thing.
570
            $return = strtolower($str);
571
        } else {
572
            // If string has no leading uppercase, just return.
573
            $return = $str;
574
        }
575
576
        return $return;
577
    }
578
579
    /**
580
     * Turn a memory string, such as 512M into an actual number of bytes.
581
     * Preserves integer values like "1024" or "-1"
582
     *
583
     * @param string $memString A memory limit string, such as "64M"
584
     * @return int
585
     */
586
    public static function memstring2bytes($memString)
587
    {
588
        // Remove  non-unit characters from the size
589
        $unit = preg_replace('/[^bkmgtpezy]/i', '', $memString);
590
        // Remove non-numeric characters from the size
591
        $size = preg_replace('/[^0-9\.\-]/', '', $memString);
592
593
        if ($unit) {
594
            // Find the position of the unit in the ordered string which is the power
595
            // of magnitude to multiply a kilobyte by
596
            return (int)round($size * pow(1024, stripos('bkmgtpezy', $unit[0])));
597
        }
598
599
        return (int)round($size);
0 ignored issues
show
Bug introduced by
$size of type string is incompatible with the type double|integer expected by parameter $num of round(). ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

599
        return (int)round(/** @scrutinizer ignore-type */ $size);
Loading history...
600
    }
601
602
    /**
603
     * @param float $bytes
604
     * @param int $decimal decimal precision
605
     * @return string
606
     */
607
    public static function bytes2memstring($bytes, $decimal = 0)
608
    {
609
        $scales = ['B','K','M','G','T','P','E','Z','Y'];
610
        // Get scale
611
        $scale = (int)floor(log($bytes, 1024));
612
        if (!isset($scales[$scale])) {
613
            $scale = 2;
614
        }
615
616
        // Size
617
        $num = round($bytes / pow(1024, $scale), $decimal);
618
        return $num . $scales[$scale];
619
    }
620
621
    /**
622
     * Convert slashes in relative or asolute filesystem path. Defaults to DIRECTORY_SEPARATOR
623
     *
624
     * @param string $path
625
     * @param string $separator
626
     * @param bool $multiple Collapses multiple slashes or not
627
     * @return string
628
     */
629
    public static function slashes($path, $separator = DIRECTORY_SEPARATOR, $multiple = true)
630
    {
631
        if ($multiple) {
632
            return preg_replace('#[/\\\\]+#', $separator, $path);
633
        }
634
        return str_replace(['/', '\\'], $separator, $path);
635
    }
636
}
637