Passed
Pull Request — 4 (#10222)
by Steve
07:19
created

Convert::linkIfMatch()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 7
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 3
nc 2
nop 1
dl 0
loc 7
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace SilverStripe\Core;
4
5
use SilverStripe\Dev\Deprecation;
6
use SilverStripe\ORM\DB;
7
use SilverStripe\View\Parsers\URLSegmentFilter;
8
use InvalidArgumentException;
9
use SimpleXMLElement;
10
use Exception;
11
12
/**
13
 * Library of conversion functions, implemented as static methods.
14
 *
15
 * The methods are all of the form (format)2(format), where the format is one of
16
 *
17
 *  raw: A UTF8 string
18
 *  attr: A UTF8 string suitable for inclusion in an HTML attribute
19
 *  js: A UTF8 string suitable for inclusion in a double-quoted javascript string.
20
 *
21
 *  array: A PHP associative array
22
 *  json: JavaScript object notation
23
 *
24
 *  html: HTML source suitable for use in a page or email
25
 *  text: Plain-text content, suitable for display to a user as-is, or insertion in a plaintext email.
26
 *
27
 * Objects of type {@link ViewableData} can have an "escaping type",
28
 * which determines if they are automatically escaped before output by {@link SSViewer}.
29
 */
30
class Convert
31
{
32
33
    /**
34
     * Convert a value to be suitable for an XML attribute.
35
     *
36
     * Warning: Does not escape array keys
37
     *
38
     * @param array|string $val String to escape, or array of strings
39
     * @return array|string
40
     */
41
    public static function raw2att($val)
42
    {
43
        return self::raw2xml($val);
44
    }
45
46
    /**
47
     * Convert a value to be suitable for an HTML attribute.
48
     *
49
     * Warning: Does not escape array keys
50
     *
51
     * @param string|array $val String to escape, or array of strings
52
     * @return array|string
53
     */
54
    public static function raw2htmlatt($val)
55
    {
56
        return self::raw2att($val);
57
    }
58
59
    /**
60
     * Convert a value to be suitable for an HTML ID attribute. Replaces non
61
     * supported characters with a space.
62
     *
63
     * Warning: Does not escape array keys
64
     *
65
     * @see http://www.w3.org/TR/REC-html40/types.html#type-cdata
66
     *
67
     * @param array|string $val String to escape, or array of strings
68
     *
69
     * @return array|string
70
     */
71
    public static function raw2htmlname($val)
72
    {
73
        if (is_array($val)) {
74
            foreach ($val as $k => $v) {
75
                $val[$k] = self::raw2htmlname($v);
76
            }
77
78
            return $val;
79
        }
80
81
        return self::raw2att($val);
82
    }
83
84
    /**
85
     * Convert a value to be suitable for an HTML ID attribute. Replaces non
86
     * supported characters with an underscore.
87
     *
88
     * Warning: Does not escape array keys
89
     *
90
     * @see http://www.w3.org/TR/REC-html40/types.html#type-cdata
91
     *
92
     * @param array|string $val String to escape, or array of strings
93
     *
94
     * @return array|string
95
     */
96
    public static function raw2htmlid($val)
97
    {
98
        if (is_array($val)) {
99
            foreach ($val as $k => $v) {
100
                $val[$k] = self::raw2htmlid($v);
101
            }
102
103
            return $val;
104
        }
105
106
        return trim(
107
            (string) preg_replace(
108
                '/_+/',
109
                '_',
110
                preg_replace('/[^a-zA-Z0-9\-_:.]+/', '_', $val ?: '') ?: ''
111
            ),
112
            '_'
113
        );
114
    }
115
116
    /**
117
     * Ensure that text is properly escaped for XML.
118
     *
119
     * Warning: Does not escape array keys
120
     *
121
     * @see http://www.w3.org/TR/REC-xml/#dt-escape
122
     * @param array|string $val String to escape, or array of strings
123
     * @return array|string
124
     */
125
    public static function raw2xml($val)
126
    {
127
        if (is_array($val)) {
128
            foreach ($val as $k => $v) {
129
                $val[$k] = self::raw2xml($v);
130
            }
131
            return $val;
132
        }
133
134
        return htmlspecialchars((string) $val, ENT_QUOTES, 'UTF-8');
135
    }
136
137
    /**
138
     * Ensure that text is properly escaped for Javascript.
139
     *
140
     * Warning: Does not escape array keys
141
     *
142
     * @param array|string $val String to escape, or array of strings
143
     * @return array|string
144
     */
145
    public static function raw2js($val)
146
    {
147
        if (is_array($val)) {
148
            foreach ($val as $k => $v) {
149
                $val[$k] = self::raw2js($v);
150
            }
151
            return $val;
152
        }
153
154
        return str_replace(
155
            // Intercepts some characters such as <, >, and & which can interfere
156
            ["\\", '"', "\n", "\r", "'", '<', '>', '&'],
157
            ["\\\\", '\"', '\n', '\r', "\\'", "\\x3c", "\\x3e", "\\x26"],
158
            $val ?: ''
159
        );
160
    }
161
162
    /**
163
     * Encode a value as a JSON encoded string. You can optionally pass a bitmask of
164
     * JSON constants as options through to the encode function.
165
     *
166
     * @deprecated 4.4.0:5.0.0 Use json_encode() instead
167
     * @param  mixed $val     Value to be encoded
168
     * @param  int   $options Optional bitmask of JSON constants
169
     * @return string           JSON encoded string
170
     */
171
    public static function raw2json($val, $options = 0)
172
    {
173
        Deprecation::notice('4.4', 'Please use json_encode() instead.');
174
175
        return json_encode($val, (int) $options);
176
    }
177
178
    /**
179
     * Encode an array as a JSON encoded string.
180
     *
181
     * @deprecated 4.4.0:5.0.0 Use json_encode() instead
182
     * @param  array  $val     Array to convert
183
     * @param  int    $options Optional bitmask of JSON constants
184
     * @return string          JSON encoded string
185
     */
186
    public static function array2json($val, $options = 0)
187
    {
188
        Deprecation::notice('4.4', 'Please use json_encode() instead.');
189
190
        return json_encode($val, (int) $options);
191
    }
192
193
    /**
194
     * Safely encodes a value (or list of values) using the current database's
195
     * safe string encoding method
196
     *
197
     * Warning: Does not encode array keys
198
     *
199
     * @param mixed|array $val Input value, or list of values as an array
200
     * @param boolean $quoted Flag indicating whether the value should be safely
201
     * quoted, instead of only being escaped. By default this function will
202
     * only escape the string (false).
203
     * @return string|array Safely encoded value in the same format as the input
204
     */
205
    public static function raw2sql($val, $quoted = false)
206
    {
207
        if (is_array($val)) {
208
            foreach ($val as $k => $v) {
209
                $val[$k] = self::raw2sql($v, $quoted);
210
            }
211
            return $val;
212
        }
213
214
        if ($quoted) {
215
            return DB::get_conn()->quoteString($val);
216
        }
217
218
        return DB::get_conn()->escapeString($val);
219
    }
220
221
    /**
222
     * Safely encodes a SQL symbolic identifier (or list of identifiers), such as a database,
223
     * table, or column name. Supports encoding of multi identfiers separated by
224
     * a delimiter (e.g. ".")
225
     *
226
     * @param string|array $identifier The identifier to escape. E.g. 'SiteTree.Title' or list of identifiers
227
     * to be joined via the separator.
228
     * @param string $separator The string that delimits subsequent identifiers
229
     * @return string The escaped identifier. E.g. '"SiteTree"."Title"'
230
     */
231
    public static function symbol2sql($identifier, $separator = '.')
232
    {
233
        return DB::get_conn()->escapeIdentifier($identifier, $separator);
234
    }
235
236
    /**
237
     * Convert XML to raw text.
238
     *
239
     * Warning: Does not decode array keys
240
     *
241
     * @uses html2raw()
242
     * @todo Currently &#xxx; entries are stripped; they should be converted
243
     * @param mixed $val
244
     * @return array|string
245
     */
246
    public static function xml2raw($val)
247
    {
248
        if (is_array($val)) {
249
            foreach ($val as $k => $v) {
250
                $val[$k] = self::xml2raw($v);
251
            }
252
            return $val;
253
        }
254
255
        // More complex text needs to use html2raw instead
256
        if (strpos((string) $val, '<') !== false) {
257
            return self::html2raw($val);
258
        }
259
260
        return html_entity_decode((string) $val, ENT_QUOTES, 'UTF-8');
261
    }
262
263
    /**
264
     * Convert a JSON encoded string into an object.
265
     *
266
     * @deprecated 4.4.0:5.0.0 Use json_decode() instead
267
     * @param string $val
268
     * @return object|boolean
269
     */
270
    public static function json2obj($val)
271
    {
272
        Deprecation::notice('4.4', 'Please use json_decode() instead.');
273
274
        return json_decode((string) $val);
275
    }
276
277
    /**
278
     * Convert a JSON string into an array.
279
     *
280
     * @deprecated 4.4.0:5.0.0 Use json_decode() instead
281
     * @param string $val JSON string to convert
282
     * @return array|boolean
283
     */
284
    public static function json2array($val)
285
    {
286
        Deprecation::notice('4.4', 'Please use json_decode() instead.');
287
288
        return json_decode((string) $val, true);
289
    }
290
291
    /**
292
     * Converts an XML string to a PHP array
293
     * See http://phpsecurity.readthedocs.org/en/latest/Injection-Attacks.html#xml-external-entity-injection
294
     *
295
     * @uses recursiveXMLToArray()
296
     * @param string $val
297
     * @param boolean $disableDoctypes Disables the use of DOCTYPE, and will trigger an error if encountered.
298
     * false by default.
299
     * @param boolean $disableExternals Disables the loading of external entities. false by default. No-op in PHP 8.
300
     * @return array
301
     * @throws Exception
302
     */
303
    public static function xml2array($val, $disableDoctypes = false, $disableExternals = false)
304
    {
305
        // PHP 8 deprecates libxml_disable_entity_loader() as it is no longer needed
306
        if (\PHP_VERSION_ID >= 80000) {
307
            $disableExternals = false;
308
        }
309
310
        // Check doctype
311
        if ($disableDoctypes && preg_match('/\<\!DOCTYPE.+]\>/', (string) $val)) {
312
            throw new InvalidArgumentException('XML Doctype parsing disabled');
313
        }
314
315
        // Disable external entity loading
316
        $oldVal = null;
317
        if ($disableExternals) {
318
            $oldVal = libxml_disable_entity_loader((bool) $disableExternals);
319
        }
320
        try {
321
            $xml = new SimpleXMLElement($val);
322
            $result = self::recursiveXMLToArray($xml);
323
        } catch (Exception $ex) {
324
            if ($disableExternals) {
325
                libxml_disable_entity_loader($oldVal);
0 ignored issues
show
Bug introduced by
It seems like $oldVal can also be of type null; however, parameter $disable of libxml_disable_entity_loader() does only seem to accept boolean, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

325
                libxml_disable_entity_loader(/** @scrutinizer ignore-type */ $oldVal);
Loading history...
326
            }
327
            throw $ex;
328
        }
329
        if ($disableExternals) {
330
            libxml_disable_entity_loader((bool) $oldVal);
331
        }
332
        return $result;
333
    }
334
335
    /**
336
     * Convert a XML string to a PHP array recursively. Do not
337
     * call this function directly, Please use {@link Convert::xml2array()}
338
     *
339
     * @param SimpleXMLElement $xml
340
     *
341
     * @return mixed
342
     */
343
    protected static function recursiveXMLToArray($xml)
344
    {
345
        $x = null;
346
        if ($xml instanceof SimpleXMLElement) {
0 ignored issues
show
introduced by
$xml is always a sub-type of SimpleXMLElement.
Loading history...
347
            $attributes = $xml->attributes();
348
            foreach ($attributes as $k => $v) {
349
                if ($v) {
350
                    $a[$k] = (string) $v;
351
                }
352
            }
353
            $x = $xml;
354
            $xml = get_object_vars($xml);
355
        }
356
        if (is_array($xml)) {
0 ignored issues
show
introduced by
The condition is_array($xml) is always true.
Loading history...
357
            if (count($xml ?: []) === 0) {
358
                return (string)$x;
359
            } // for CDATA
360
            $r = [];
361
            foreach ($xml as $key => $value) {
362
                $r[$key] = self::recursiveXMLToArray($value);
363
            }
364
            // Attributes
365
            if (isset($a)) {
366
                $r['@'] = $a;
367
            }
368
            return $r;
369
        }
370
371
        return (string) $xml;
372
    }
373
374
    /**
375
     * Create a link if the string is a valid URL
376
     *
377
     * @param string $string The string to linkify
378
     * @return string A link to the URL if string is a URL
379
     */
380
    public static function linkIfMatch($string)
381
    {
382
        if (preg_match('/^[a-z+]+\:\/\/[a-zA-Z0-9$-_.+?&=!*\'()%]+$/', (string) $string)) {
383
            return "<a style=\"white-space: nowrap\" href=\"$string\">$string</a>";
384
        }
385
386
        return $string;
387
    }
388
389
    /**
390
     * Simple conversion of HTML to plaintext.
391
     *
392
     * @param string $data Input data
393
     * @param bool $preserveLinks
394
     * @param int $wordWrap
395
     * @param array $config
396
     * @return string
397
     */
398
    public static function html2raw($data, $preserveLinks = false, $wordWrap = 0, $config = null)
399
    {
400
        $defaultConfig = [
401
            'PreserveLinks' => false,
402
            'ReplaceBoldAsterisk' => true,
403
            'CompressWhitespace' => true,
404
            'ReplaceImagesWithAlt' => true,
405
        ];
406
        if (isset($config)) {
407
            $config = array_merge($defaultConfig, $config);
408
        } else {
409
            $config = $defaultConfig;
410
        }
411
412
        $data = preg_replace("/<style([^A-Za-z0-9>][^>]*)?>.*?<\/style[^>]*>/is", '', $data ?: '');
413
        $data = preg_replace("/<script([^A-Za-z0-9>][^>]*)?>.*?<\/script[^>]*>/is", '', $data ?: '');
414
415
        if ($config['ReplaceBoldAsterisk']) {
416
            $data = preg_replace('%<(strong|b)( [^>]*)?>|</(strong|b)>%i', '*', $data ?: '');
417
        }
418
419
        // Expand hyperlinks
420
        if (!$preserveLinks && !$config['PreserveLinks']) {
421
            $data = preg_replace_callback('/<a[^>]*href\s*=\s*"([^"]*)">(.*?)<\/a>/ui', function ($matches) {
422
                return Convert::html2raw($matches[2]) . "[$matches[1]]";
423
            }, $data ?: '');
424
            $data = preg_replace_callback('/<a[^>]*href\s*=\s*([^ ]*)>(.*?)<\/a>/ui', function ($matches) {
425
                return Convert::html2raw($matches[2]) . "[$matches[1]]";
426
            }, $data ?: '');
427
        }
428
429
        // Replace images with their alt tags
430
        if ($config['ReplaceImagesWithAlt']) {
431
            $data = preg_replace('/<img[^>]*alt *= *"([^"]*)"[^>]*>/i', ' \\1 ', $data ?: '');
432
            $data = preg_replace('/<img[^>]*alt *= *([^ ]*)[^>]*>/i', ' \\1 ', $data ?: '');
433
        }
434
435
        // Compress whitespace
436
        if ($config['CompressWhitespace']) {
437
            $data = preg_replace("/\s+/u", ' ', $data ?: '');
438
        }
439
440
        // Parse newline tags
441
        $data = preg_replace("/\s*<[Hh][1-6]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data ?: '');
442
        $data = preg_replace("/\s*<[Pp]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data ?: '');
443
        $data = preg_replace("/\s*<[Dd][Ii][Vv]([^A-Za-z0-9>][^>]*)?> */u", "\n\n", $data ?: '');
444
        $data = preg_replace("/\n\n\n+/", "\n\n", $data ?: '');
445
446
        $data = preg_replace('/<[Bb][Rr]([^A-Za-z0-9>][^>]*)?> */', "\n", $data ?: '');
447
        $data = preg_replace('/<[Tt][Rr]([^A-Za-z0-9>][^>]*)?> */', "\n", $data ?: '');
448
        $data = preg_replace("/<\/[Tt][Dd]([^A-Za-z0-9>][^>]*)?> */", '    ', $data ?: '');
449
        $data = preg_replace('/<\/p>/i', "\n\n", $data ?: '');
450
451
        // Replace HTML entities
452
        $data = html_entity_decode((string) $data, ENT_QUOTES, 'UTF-8');
453
        // Remove all tags (but optionally keep links)
454
455
        // strip_tags seemed to be restricting the length of the output
456
        // arbitrarily. This essentially does the same thing.
457
        if (!$preserveLinks && !$config['PreserveLinks']) {
458
            $data = preg_replace('/<\/?[^>]*>/', '', $data ?: '');
459
        } else {
460
            $data = strip_tags((string) $data, '<a>');
461
        }
462
463
        // Wrap
464
        if ($wordWrap) {
465
            $data = wordwrap(trim((string) $data), (int) $wordWrap);
466
        }
467
        return trim((string) $data);
468
    }
469
470
    /**
471
     * There are no real specifications on correctly encoding mailto-links,
472
     * but this seems to be compatible with most of the user-agents.
473
     * Does nearly the same as rawurlencode().
474
     * Please only encode the values, not the whole url, e.g.
475
     * "mailto:[email protected]?subject=" . Convert::raw2mailto($subject)
476
     *
477
     * @param $data string
478
     * @return string
479
     * @see http://www.ietf.org/rfc/rfc1738.txt
480
     */
481
    public static function raw2mailto($data)
482
    {
483
        return str_ireplace(
484
            ["\n",'?','=',' ','(',')','&','@','"','\'',';'],
485
            ['%0A','%3F','%3D','%20','%28','%29','%26','%40','%22','%27','%3B'],
486
            $data ?: ''
487
        );
488
    }
489
490
    /**
491
     * Convert a string (normally a title) to a string suitable for using in
492
     * urls and other html attributes. Uses {@link URLSegmentFilter}.
493
     *
494
     * @param string $title
495
     * @return string
496
     */
497
    public static function raw2url($title)
498
    {
499
        $f = URLSegmentFilter::create();
500
        return $f->filter($title);
501
    }
502
503
    /**
504
     * Normalises newline sequences to conform to (an) OS specific format.
505
     *
506
     * @param string $data Text containing potentially mixed formats of newline
507
     * sequences including \r, \r\n, \n, or unicode newline characters
508
     * @param string $nl The newline sequence to normalise to. Defaults to that
509
     * specified by the current OS
510
     * @return string
511
     */
512
    public static function nl2os($data, $nl = PHP_EOL)
513
    {
514
        return preg_replace('~\R~u', $nl ?: '', $data ?: '');
515
    }
516
517
    /**
518
     * Encode a value into a string that can be used as part of a filename.
519
     * All string data must be UTF-8 encoded.
520
     *
521
     * @param mixed $val Value to be encoded
522
     * @return string
523
     */
524
    public static function base64url_encode($val)
525
    {
526
        return rtrim(strtr(base64_encode((string) json_encode($val)), '+/', '~_'), '=');
527
    }
528
529
    /**
530
     * Decode a value that was encoded with Convert::base64url_encode.
531
     *
532
     * @param string $val Value to be decoded
533
     * @return mixed Original value
534
     */
535
    public static function base64url_decode($val)
536
    {
537
        return json_decode(
538
            (string) base64_decode(str_pad(strtr((string) $val, '~_', '+/'), strlen((string) $val) % 4, '=', STR_PAD_RIGHT)),
539
            true
540
        );
541
    }
542
543
    /**
544
     * Converts upper camel case names to lower camel case,
545
     * with leading upper case characters replaced with lower case.
546
     * Tries to retain word case.
547
     *
548
     * Examples:
549
     * - ID => id
550
     * - IDField => idField
551
     * - iDField => iDField
552
     *
553
     * @param $str
554
     * @return string
555
     */
556
    public static function upperCamelToLowerCamel($str)
557
    {
558
        $return = null;
559
        $matches = null;
560
        if (preg_match('/(^[A-Z]{1,})([A-Z]{1})([a-z]+.*)/', (string) $str, $matches)) {
561
            // If string has trailing lowercase after more than one leading uppercase characters,
562
            // match everything but the last leading uppercase character.
563
            $return = implode('', [
564
                strtolower($matches[1]),
565
                $matches[2],
566
                $matches[3]
567
            ]);
568
        } elseif (preg_match('/(^[A-Z]{1})([a-z]+.*)/', (string) $str, $matches)) {
569
            // If string has trailing lowercase after exactly one leading uppercase characters,
570
            // match everything but the last leading uppercase character.
571
            $return = implode('', [
572
                strtolower($matches[1]),
573
                $matches[2]
574
            ]);
575
        } elseif (preg_match('/^[A-Z]+$/', (string) $str)) {
576
            // If string has leading uppercase without trailing lowercase,
577
            // just lowerase the whole thing.
578
            $return = strtolower((string) $str);
579
        } else {
580
            // If string has no leading uppercase, just return.
581
            $return = $str;
582
        }
583
584
        return $return;
585
    }
586
587
    /**
588
     * Turn a memory string, such as 512M into an actual number of bytes.
589
     * Preserves integer values like "1024" or "-1"
590
     *
591
     * @param string $memString A memory limit string, such as "64M"
592
     * @return int
593
     */
594
    public static function memstring2bytes($memString)
595
    {
596
        // Remove  non-unit characters from the size
597
        $unit = preg_replace('/[^bkmgtpezy]/i', '', $memString ?: '');
598
        // Remove non-numeric characters from the size
599
        $size = preg_replace('/[^0-9\.\-]/', '', $memString ?: '');
600
601
        if ($unit) {
602
            // Find the position of the unit in the ordered string which is the power
603
            // of magnitude to multiply a kilobyte by
604
            return (int)round($size * pow(1024, stripos('bkmgtpezy', $unit[0])));
605
        }
606
607
        return (int)round($size ?: 0.0);
0 ignored issues
show
Bug introduced by
It seems like $size ?: 0.0 can also be of type string; however, parameter $num of round() does only seem to accept double|integer, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

607
        return (int)round(/** @scrutinizer ignore-type */ $size ?: 0.0);
Loading history...
608
    }
609
610
    /**
611
     * @param float $bytes
612
     * @param int $decimal decimal precision
613
     * @return string
614
     */
615
    public static function bytes2memstring($bytes, $decimal = 0)
616
    {
617
        $scales = ['B','K','M','G','T','P','E','Z','Y'];
618
        // Get scale
619
        $scale = (int)floor(log((float) $bytes, 1024));
620
        if (!isset($scales[$scale])) {
621
            $scale = 2;
622
        }
623
624
        // Size
625
        $num = round($bytes / pow(1024, $scale), (int) $decimal);
626
        return $num . $scales[$scale];
627
    }
628
629
    /**
630
     * Convert slashes in relative or absolute filesystem path. Defaults to DIRECTORY_SEPARATOR
631
     *
632
     * @param string $path
633
     * @param string $separator
634
     * @param bool $multiple Collapses multiple slashes or not
635
     * @return string
636
     */
637
    public static function slashes($path, $separator = DIRECTORY_SEPARATOR, $multiple = true)
638
    {
639
        if ($multiple) {
640
            return preg_replace('#[/\\\\]+#', $separator ?: '', $path ?: '');
641
        }
642
        return str_replace(['/', '\\'], $separator ?: '', $path ?: '');
643
    }
644
}
645