Passed
Push — scrutinizer-migrate-to-new-eng... ( 58afd6 )
by Alexander
18:11
created

Utf8Controller   A

Complexity

Total Complexity 26

Size/Duplication

Total Lines 108
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 2

Importance

Changes 0
Metric Value
wmc 26
lcom 1
cbo 2
dl 0
loc 108
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * @link http://www.yiiframework.com/
4
 * @copyright Copyright (c) 2008 Yii Software LLC
5
 * @license http://www.yiiframework.com/license/
6
 */
7
8
namespace yii\build\controllers;
9
10
use yii\console\Controller;
11
use yii\helpers\Console;
12
use yii\helpers\FileHelper;
13
14
/**
15
 * Check files for broken UTF8 and non-printable characters.
16
 *
17
 * @author Carsten Brandt <[email protected]>
18
 */
19
class Utf8Controller extends Controller
20
{
21
    public $defaultAction = 'check-guide';
22
23
    /**
24
     * Check guide for non-printable characters that may break docs generation.
25
     *
26
     * @param string $directory the directory to check. If not specified, the default
27
     * guide directory will be checked.
28
     */
29
    public function actionCheckGuide($directory = null)
30
    {
31
        if ($directory === null) {
32
            $directory = \dirname(\dirname(__DIR__)) . '/docs';
33
        }
34
        if (is_file($directory)) {
35
            $files = [$directory];
36
        } else {
37
            $files = FileHelper::findFiles($directory, [
38
                'only' => ['*.md'],
39
            ]);
40
        }
41
42
        foreach ($files as $file) {
43
            $content = file_get_contents($file);
44
            $chars = preg_split('//u', $content, null, PREG_SPLIT_NO_EMPTY);
45
46
            $line = 1;
47
            $pos = 0;
48
            foreach ($chars as $c) {
49
                $ord = $this->unicodeOrd($c);
50
51
                $pos++;
52
                if ($ord == 0x000A) {
53
                    $line++;
54
                    $pos = 0;
55
                }
56
57
                if ($ord === false) {
58
                    $this->found('BROKEN UTF8', $c, $line, $pos, $file);
59
                    continue;
60
                }
61
62
                // http://unicode-table.com/en/blocks/general-punctuation/
63
                if (0x2000 <= $ord && $ord <= 0x200F
64
                 || 0x2028 <= $ord && $ord <= 0x202E
65
                 || 0x205f <= $ord && $ord <= 0x206F
66
                    ) {
67
                    $this->found('UNSUPPORTED SPACE CHARACTER', $c, $line, $pos, $file);
68
                    continue;
69
                }
70
                if ($ord < 0x0020 && $ord != 0x000A && $ord != 0x0009 ||
71
                    0x0080 <= $ord && $ord < 0x009F) {
72
                    $this->found('CONTROL CHARARCTER', $c, $line, $pos, $file);
73
                    continue;
74
                }
75
//                if ($ord > 0x009F) {
76
//                    $this->found("NON ASCII CHARARCTER", $c, $line, $pos, $file);
77
//                    continue;
78
//                }
79
            }
80
        }
81
    }
82
83
    private $_foundFiles = [];
84
85
    private function found($what, $char, $line, $pos, $file)
86
    {
87
        if (!isset($this->_foundFiles[$file])) {
88
            $this->stdout("$file: \n", Console::BOLD);
89
            $this->_foundFiles[$file] = $file;
90
        }
91
92
        $hexcode = dechex($this->unicodeOrd($char));
93
        $hexcode = str_repeat('0', max(4 - \strlen($hexcode), 0)) . $hexcode;
94
95
        $this->stdout("  at $line:$pos FOUND $what: 0x$hexcode '$char' http://unicode-table.com/en/$hexcode/\n");
96
    }
97
98
    /**
99
     * Equivalent for ord() just for unicode.
100
     *
101
     * http://stackoverflow.com/a/10333324/1106908
102
     *
103
     * @param $c
104
     * @return bool|int
105
     */
106
    private function unicodeOrd($c)
107
    {
108
        $h = \ord($c[0]);
109
        if ($h <= 0x7F) {
110
            return $h;
111
        } elseif ($h < 0xC2) {
112
            return false;
113
        } elseif ($h <= 0xDF) {
114
            return ($h & 0x1F) << 6 | (\ord($c[1]) & 0x3F);
115
        } elseif ($h <= 0xEF) {
116
            return ($h & 0x0F) << 12 | (\ord($c[1]) & 0x3F) << 6
117
                                     | (\ord($c[2]) & 0x3F);
118
        } elseif ($h <= 0xF4) {
119
            return ($h & 0x0F) << 18 | (\ord($c[1]) & 0x3F) << 12
120
                                     | (\ord($c[2]) & 0x3F) << 6
121
                                     | (\ord($c[3]) & 0x3F);
122
        }
123
124
        return false;
125
    }
126
}
127