1 | <?php |
||
19 | class Utf8Controller extends Controller |
||
20 | { |
||
21 | public $defaultAction = 'check-guide'; |
||
22 | |||
23 | /** |
||
24 | * Check guide for non-printable characters that may break docs generation. |
||
25 | * |
||
26 | * @param string $directory the directory to check. If not specified, the default |
||
27 | * guide directory will be checked. |
||
28 | */ |
||
29 | public function actionCheckGuide($directory = null) |
||
30 | { |
||
31 | if ($directory === null) { |
||
32 | $directory = \dirname(\dirname(__DIR__)) . '/docs'; |
||
33 | } |
||
34 | if (is_file($directory)) { |
||
35 | $files = [$directory]; |
||
36 | } else { |
||
37 | $files = FileHelper::findFiles($directory, [ |
||
38 | 'only' => ['*.md'], |
||
39 | ]); |
||
40 | } |
||
41 | |||
42 | foreach ($files as $file) { |
||
43 | $content = file_get_contents($file); |
||
44 | $chars = preg_split('//u', $content, null, PREG_SPLIT_NO_EMPTY); |
||
45 | |||
46 | $line = 1; |
||
47 | $pos = 0; |
||
48 | foreach ($chars as $c) { |
||
49 | $ord = $this->unicodeOrd($c); |
||
50 | |||
51 | $pos++; |
||
52 | if ($ord == 0x000A) { |
||
53 | $line++; |
||
54 | $pos = 0; |
||
55 | } |
||
56 | |||
57 | if ($ord === false) { |
||
58 | $this->found('BROKEN UTF8', $c, $line, $pos, $file); |
||
59 | continue; |
||
60 | } |
||
61 | |||
62 | // http://unicode-table.com/en/blocks/general-punctuation/ |
||
63 | if (0x2000 <= $ord && $ord <= 0x200F |
||
64 | || 0x2028 <= $ord && $ord <= 0x202E |
||
65 | || 0x205f <= $ord && $ord <= 0x206F |
||
66 | ) { |
||
67 | $this->found('UNSUPPORTED SPACE CHARACTER', $c, $line, $pos, $file); |
||
68 | continue; |
||
69 | } |
||
70 | if ($ord < 0x0020 && $ord != 0x000A && $ord != 0x0009 || |
||
71 | 0x0080 <= $ord && $ord < 0x009F) { |
||
72 | $this->found('CONTROL CHARARCTER', $c, $line, $pos, $file); |
||
73 | continue; |
||
74 | } |
||
75 | // if ($ord > 0x009F) { |
||
76 | // $this->found("NON ASCII CHARARCTER", $c, $line, $pos, $file); |
||
77 | // continue; |
||
78 | // } |
||
79 | } |
||
80 | } |
||
81 | } |
||
82 | |||
83 | private $_foundFiles = []; |
||
84 | |||
85 | private function found($what, $char, $line, $pos, $file) |
||
86 | { |
||
87 | if (!isset($this->_foundFiles[$file])) { |
||
88 | $this->stdout("$file: \n", Console::BOLD); |
||
89 | $this->_foundFiles[$file] = $file; |
||
90 | } |
||
91 | |||
92 | $hexcode = dechex($this->unicodeOrd($char)); |
||
93 | $hexcode = str_repeat('0', max(4 - \strlen($hexcode), 0)) . $hexcode; |
||
94 | |||
95 | $this->stdout(" at $line:$pos FOUND $what: 0x$hexcode '$char' http://unicode-table.com/en/$hexcode/\n"); |
||
96 | } |
||
97 | |||
98 | /** |
||
99 | * Equivalent for ord() just for unicode. |
||
100 | * |
||
101 | * http://stackoverflow.com/a/10333324/1106908 |
||
102 | * |
||
103 | * @param $c |
||
104 | * @return bool|int |
||
105 | */ |
||
106 | private function unicodeOrd($c) |
||
107 | { |
||
108 | $h = \ord($c[0]); |
||
109 | if ($h <= 0x7F) { |
||
110 | return $h; |
||
111 | } elseif ($h < 0xC2) { |
||
112 | return false; |
||
113 | } elseif ($h <= 0xDF) { |
||
114 | return ($h & 0x1F) << 6 | (\ord($c[1]) & 0x3F); |
||
115 | } elseif ($h <= 0xEF) { |
||
116 | return ($h & 0x0F) << 12 | (\ord($c[1]) & 0x3F) << 6 |
||
117 | | (\ord($c[2]) & 0x3F); |
||
118 | } elseif ($h <= 0xF4) { |
||
119 | return ($h & 0x0F) << 18 | (\ord($c[1]) & 0x3F) << 12 |
||
120 | | (\ord($c[2]) & 0x3F) << 6 |
||
121 | | (\ord($c[3]) & 0x3F); |
||
122 | } |
||
123 | |||
124 | return false; |
||
125 | } |
||
126 | } |
||
127 |