Completed
Push — latest ( 7e49ae...5ac9c7 )
by Colin
20s queued 12s
created

TextNormalizer::normalize()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 15
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 4
CRAP Score 2.1481

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 6
c 1
b 0
f 0
dl 0
loc 15
ccs 4
cts 6
cp 0.6667
rs 10
cc 2
nc 2
nop 2
crap 2.1481
1
<?php
2
3
/*
4
 * This file is part of the league/commonmark package.
5
 *
6
 * (c) Colin O'Dell <[email protected]>
7
 *
8
 * For the full copyright and license information, please view the LICENSE
9
 * file that was distributed with this source code.
10
 */
11
12
declare(strict_types=1);
13
14
namespace League\CommonMark\Normalizer;
15
16
/***
17
 * Normalize text input using the steps given by the CommonMark spec to normalize labels
18
 *
19
 * @see https://spec.commonmark.org/0.29/#matches
20
 *
21
 * @psalm-immutable
22
 */
23
final class TextNormalizer implements TextNormalizerInterface
24
{
25
    /**
26
     * Source: https://github.com/symfony/polyfill-mbstring/blob/master/Mbstring.php
27
     */
28
    private const CASE_FOLD = [
29
        ['µ', 'ſ', "\xCD\x85", 'ς', "\xCF\x90", "\xCF\x91", "\xCF\x95", "\xCF\x96", "\xCF\xB0", "\xCF\xB1", "\xCF\xB5", "\xE1\xBA\x9B", "\xE1\xBE\xBE", "\xC3\x9F", "\xE1\xBA\x9E"],
30
        ['μ', 's', 'ι',        'σ', 'β',        'θ',        'φ',        'π',        'κ',        'ρ',        'ε',        "\xE1\xB9\xA1", 'ι',            'ss',       'ss'],
31
    ];
32
33
    /**
34
     * {@inheritdoc}
35
     *
36
     * @psalm-pure
37
     */
38 543
    public function normalize(string $text, $context = null): string
39
    {
40
        // Collapse internal whitespace to single space and remove
41
        // leading/trailing whitespace
42 543
        $text = \preg_replace('/\s+/', ' ', \trim($text));
43
        \assert(\is_string($text));
44
45 543
        if (! \defined('MB_CASE_FOLD')) {
46
            // We're not on a version of PHP (7.3+) which has this feature
47
            $text = \str_replace(self::CASE_FOLD[0], self::CASE_FOLD[1], $text);
48
49
            return \mb_strtolower($text, 'UTF-8');
50
        }
51
52 543
        return \mb_convert_case($text, \MB_CASE_FOLD, 'UTF-8');
53
    }
54
}
55