1
|
|
|
<?php
|
2
|
|
|
/**
|
3
|
|
|
* This file is part of the O2System Framework package.
|
4
|
|
|
*
|
5
|
|
|
* For the full copyright and license information, please view the LICENSE
|
6
|
|
|
* file that was distributed with this source code.
|
7
|
|
|
*
|
8
|
|
|
* @author Steeve Andrian Salim
|
9
|
|
|
* @copyright Copyright (c) Steeve Andrian Salim
|
10
|
|
|
*/
|
11
|
|
|
|
12
|
|
|
// ------------------------------------------------------------------------
|
13
|
|
|
|
14
|
|
|
namespace O2System\Security\Filters;
|
15
|
|
|
|
16
|
|
|
// ------------------------------------------------------------------------
|
17
|
|
|
|
18
|
|
|
/*
|
19
|
|
|
* ------------------------------------------------------
|
20
|
|
|
* Important charset-related stuff
|
21
|
|
|
* ------------------------------------------------------
|
22
|
|
|
*
|
23
|
|
|
* Configure mbstring and/or iconv if they are enabled
|
24
|
|
|
* and set MB_ENABLED and ICONV_ENABLED constants, so
|
25
|
|
|
* that we don't repeatedly do extension_loaded() or
|
26
|
|
|
* function_exists() calls.
|
27
|
|
|
*
|
28
|
|
|
* Note: UTF-8 class depends on this. It used to be done
|
29
|
|
|
* in it's constructor, but it's _not_ class-specific.
|
30
|
|
|
*
|
31
|
|
|
*/
|
32
|
|
|
$charset = strtoupper(o2system()->config[ 'charset' ]);
|
|
|
|
|
33
|
|
|
ini_set('default_charset', $charset);
|
34
|
|
|
|
35
|
|
|
if (extension_loaded('mbstring')) {
|
36
|
|
|
define('MB_ENABLED', true);
|
37
|
|
|
|
38
|
|
|
// mbstring.internal_encoding is deprecated starting with PHP 5.6
|
39
|
|
|
// and it's usage triggers E_DEPRECATED messages.
|
40
|
|
|
if (is_php('5.6', '<=')) {
|
|
|
|
|
41
|
|
|
@ini_set('mbstring.internal_encoding', $charset);
|
|
|
|
|
42
|
|
|
}
|
43
|
|
|
|
44
|
|
|
// This is required for mb_convert_encoding() to strip invalid characters.
|
45
|
|
|
// That's utilized by UTF8 Class, but it's also done for consistency with iconv.
|
46
|
|
|
mb_substitute_character('none');
|
47
|
|
|
} else {
|
48
|
|
|
define('MB_ENABLED', false);
|
49
|
|
|
}
|
50
|
|
|
|
51
|
|
|
// There's an ICONV_IMPL constant, but the PHP manual says that using
|
52
|
|
|
// iconv's predefined constants is "strongly discouraged".
|
53
|
|
|
if (extension_loaded('iconv')) {
|
54
|
|
|
define('ICONV_ENABLED', true);
|
55
|
|
|
|
56
|
|
|
// iconv.internal_encoding is deprecated starting with PHP 5.6
|
57
|
|
|
// and it's usage triggers E_DEPRECATED messages.
|
58
|
|
|
|
59
|
|
|
if (is_php('5.6', '<=')) {
|
60
|
|
|
@ini_set('iconv.internal_encoding', $charset);
|
61
|
|
|
}
|
62
|
|
|
} else {
|
63
|
|
|
define('ICONV_ENABLED', false);
|
64
|
|
|
}
|
65
|
|
|
|
66
|
|
|
if (is_php('5.6')) {
|
67
|
|
|
ini_set('php.internal_encoding', $charset);
|
68
|
|
|
}
|
69
|
|
|
|
70
|
|
|
|
71
|
|
|
class Utf8
|
72
|
|
|
{
|
73
|
|
|
protected $isEnabled = false;
|
74
|
|
|
|
75
|
|
|
/**
|
76
|
|
|
* Class constructor
|
77
|
|
|
*
|
78
|
|
|
* Determines if UTF-8 support is to be enabled.
|
79
|
|
|
*
|
80
|
|
|
* @access public
|
81
|
|
|
*/
|
82
|
|
|
public function __construct()
|
83
|
|
|
{
|
84
|
|
|
if (
|
85
|
|
|
defined('PREG_BAD_UTF8_ERROR') // PCRE must support UTF-8
|
86
|
|
|
AND (ICONV_ENABLED === true || MB_ENABLED === true) // iconv or mbstring must be installed
|
|
|
|
|
87
|
|
|
AND strtoupper(o2system()->config[ 'charset' ]) === 'UTF-8' // Application charset must be UTF-8
|
|
|
|
|
88
|
|
|
) {
|
89
|
|
|
$this->isEnabled = true;
|
90
|
|
|
logger()->debug('LOG_DEBUG_UTF8_SUPPORT_ENABLED');
|
91
|
|
|
} else {
|
92
|
|
|
$this->isEnabled = false;
|
93
|
|
|
logger()->debug('LOG_DEBUG_UTF8_SUPPORT_DISABLED');
|
94
|
|
|
}
|
95
|
|
|
|
96
|
|
|
logger()->debug('LOG_DEBUG_CLASS_INITIALIZED', [__CLASS__]);
|
97
|
|
|
}
|
98
|
|
|
|
99
|
|
|
// --------------------------------------------------------------------
|
100
|
|
|
|
101
|
|
|
public function isEnabled()
|
102
|
|
|
{
|
103
|
|
|
return (bool)$this->isEnabled;
|
104
|
|
|
}
|
105
|
|
|
|
106
|
|
|
/**
|
107
|
|
|
* Clean UTF-8 strings
|
108
|
|
|
*
|
109
|
|
|
* Ensures strings contain only valid UTF-8 characters.
|
110
|
|
|
*
|
111
|
|
|
* @param string $string String to clean
|
112
|
|
|
*
|
113
|
|
|
* @return string
|
114
|
|
|
*/
|
115
|
|
|
public function cleanString($string)
|
116
|
|
|
{
|
117
|
|
|
if ($this->isAscii($string) === false) {
|
118
|
|
|
if (MB_ENABLED) {
|
119
|
|
|
$string = mb_convert_encoding($string, 'UTF-8', 'UTF-8');
|
120
|
|
|
} elseif (ICONV_ENABLED) {
|
121
|
|
|
$string = @iconv('UTF-8', 'UTF-8//IGNORE', $string);
|
122
|
|
|
}
|
123
|
|
|
}
|
124
|
|
|
|
125
|
|
|
return $string;
|
126
|
|
|
}
|
127
|
|
|
|
128
|
|
|
// --------------------------------------------------------------------
|
129
|
|
|
|
130
|
|
|
/**
|
131
|
|
|
* Is ASCII?
|
132
|
|
|
*
|
133
|
|
|
* Tests if a string is standard 7-bit ASCII or not.
|
134
|
|
|
*
|
135
|
|
|
* @param string $string String to check
|
136
|
|
|
*
|
137
|
|
|
* @return bool
|
138
|
|
|
*/
|
139
|
|
|
public function isAscii($string)
|
140
|
|
|
{
|
141
|
|
|
return (preg_match('/[^\x00-\x7F]/S', $string) === 0);
|
142
|
|
|
}
|
143
|
|
|
|
144
|
|
|
// --------------------------------------------------------------------
|
145
|
|
|
|
146
|
|
|
/**
|
147
|
|
|
* Remove ASCII control characters
|
148
|
|
|
*
|
149
|
|
|
* Removes all ASCII control characters except horizontal tabs,
|
150
|
|
|
* line feeds, and carriage returns, as all others can cause
|
151
|
|
|
* problems in XML.
|
152
|
|
|
*
|
153
|
|
|
* @param string $string String to clean
|
154
|
|
|
*
|
155
|
|
|
* @return string
|
156
|
|
|
*/
|
157
|
|
|
public function safeAsciiForXML($string)
|
158
|
|
|
{
|
159
|
|
|
return remove_invisible_characters($string, false);
|
160
|
|
|
}
|
161
|
|
|
|
162
|
|
|
// --------------------------------------------------------------------
|
163
|
|
|
|
164
|
|
|
/**
|
165
|
|
|
* Convert to UTF-8
|
166
|
|
|
*
|
167
|
|
|
* Attempts to convert a string to UTF-8.
|
168
|
|
|
*
|
169
|
|
|
* @param string $string Input string
|
170
|
|
|
* @param string $encoding Input encoding
|
171
|
|
|
*
|
172
|
|
|
* @return string $str encoded in UTF-8 or FALSE on failure
|
173
|
|
|
*/
|
174
|
|
|
public function convertString($string, $encoding)
|
175
|
|
|
{
|
176
|
|
|
if (MB_ENABLED) {
|
177
|
|
|
return mb_convert_encoding($string, 'UTF-8', $encoding);
|
178
|
|
|
} elseif (ICONV_ENABLED) {
|
179
|
|
|
return @iconv($encoding, 'UTF-8', $string);
|
180
|
|
|
}
|
181
|
|
|
|
182
|
|
|
return false;
|
|
|
|
|
183
|
|
|
}
|
184
|
|
|
} |