StringUtils::normalizeString()   A
last analyzed

Complexity

Conditions 3
Paths 3

Size

Total Lines 17
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 17
rs 9.4285
c 0
b 0
f 0
cc 3
eloc 7
nc 3
nop 2
1
<?php
2
3
namespace Ridibooks\Platform\Common;
4
5
class StringUtils
6
{
7
	/* DB에서 찾아변경할때
0 ignored issues
show
Unused Code Comprehensibility introduced by
38% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
8
	SELECT id, title, unhex(replace(hex(title),'C2A0','20'))
9
	FROM tb_book
10
	WHERE hex(title) LIKE concat('%C2A0%')
11
	*/
12
	const UNICODE_NON_BREAKING_SPACE = "\xc2\xa0";
13
	const UNICODE_ZERO_WIDTH_SPACE = "\xe2\x80\x8b";
14
15
	public static function removeSpecificCharaters($string)
16
	{
17
		return preg_replace('/[^\w' . preg_quote('|') . ']/u', '', $string);
18
	}
19
20
	public static function removeTag($string)
21
	{
22
		$string = str_replace("<", "&lt;", $string);
23
		$string = str_replace(">", "&gt;", $string);
24
		$string = str_replace(" ", "&nbsp;", $string);
25
26
		return $string;
27
	}
28
29
	/**
30
	 * 해당 문자열이 비어있는가 체크.
31
	 * null과 비어있는 문자(' ')를 체크 하기 위해 사용
32
	 * @param $string
33
	 * @return bool
34
	 */
35
	public static function isEmpty($string)
36
	{
37
		if (is_null($string) || trim($string) === '') {
38
			return true;
39
		} else {
40
			return false;
41
		}
42
	}
43
44
	/**
45
	 * 해당 문자열이 주민등록번호인지 체크.
46
	 * 년도는 00 ~ 99 [0-9]{2}
47
	 * 월은 01 ~ 12 0[1-9] || 1[012]
48
	 * 일은 01 ~ 31 0[1-9] || 1[0-9] || 2[0-9] || 3[01]
49
	 * 숫자6자리와 숫자7자리 사이의 - 는 없을수도 있음 -?
50
	 * 뒷7자리 숫자의 첫자리는 성별 [012349]
51
	 * 성별 뒤의 5자리 숫자는 주소지 등 [0-9]{5}
52
	 * 마지막의 숫자는 앞의 숫자가 유효한지를 나타내는 check digit 를 나타내지만 정규식에서 체크하는 것은 한계
53
	 * [0-9]
54
	 *
55
	 * @param string $jumin
56
	 *
57
	 * @return bool
58
	 */
59
	public static function isJumin(string $jumin)
60
	{
61
		return preg_match("/[0-9]{2}(0[1-9]|1[012])(0[1-9]|1[0-9]|2[0-9]|3[01])-?[012349][0-9]{5}[0-9]/i", $jumin) > 0;
62
	}
63
64
	/**주민등록번호를 출력형식으로 변환한다.
65
	 * @deprecated
66
	 * @param string $jumin
67
	 * @return string
68
	 */
69
	public static function maskJuminForDisplay($jumin)
70
	{
71
		return substr($jumin, 0, 6) . "-" . substr($jumin, 6, 1) . "******";
72
	}
73
74
	public static function normalizeString($str, $is_single_line = false)
75
	{
76
		$str = self::normalizeSpace($str, $is_single_line);
77
		/*
78
		 * Unicode의 형식을 NFC로 맞춘다.
79
		 * iconv -l을 사용해보았을때 MAC에서만 UTF-8-MAC을 지원하기 때문에 iconv를 사용하지 않는다.
80
		 * 이슈: https://app.asana.com/0/9476649488676/157381871168492
81
		 */
82
		if (!\Normalizer::isNormalized($str)) {
83
			$normalized_string = \Normalizer::normalize($str);
84
			if ($normalized_string !== false) {
85
				$str = $normalized_string;
86
			}
87
		}
88
89
		return $str;
90
	}
91
92
	private static function normalizeSpace($str, $is_single_line = false)
93
	{
94
		if ($is_single_line) {
95
			$replace = [
96
				StringUtils::UNICODE_ZERO_WIDTH_SPACE,
97
				StringUtils::UNICODE_NON_BREAKING_SPACE,
98
				"\r",
99
				"\t",
100
				"\n"
101
			];
102
		} else {
103
			$replace = [StringUtils::UNICODE_ZERO_WIDTH_SPACE, StringUtils::UNICODE_NON_BREAKING_SPACE];
104
		}
105
		$str = str_replace($replace, ' ', $str);
106
107
		return $str;
108
	}
109
110
	/**UTF-8 non-breaking-space 제거
111
	 * @param string $string
112
	 * @return string
113
	 */
114
	public static function removeNonBreakingSpace($string)
115
	{
116
		return str_replace(self::UNICODE_NON_BREAKING_SPACE, "", $string);
117
	}
118
119
	/**UTF-8 zero width space 제거
120
	 * @param string $string
121
	 * @return string
122
	 */
123
	public static function removeZeroWidthSpace($string)
124
	{
125
		return str_replace(self::UNICODE_ZERO_WIDTH_SPACE, "", $string);
126
	}
127
128
	/**하이픈(-) 제거한다.
129
	 * @param $string
130
	 * @return string
131
	 */
132
	public static function removeHyphen($string)
133
	{
134
		return trim(str_replace('-', '', $string));
135
	}
136
137
	/**TODO ArrayUtil 만들어서 이동 && method 설명 추가할 것
138
	 * @see http://php.net/manual/en/class.simplexmlelement.php
139
	 * @param $xmlstring string
140
	 * @return array
141
	 */
142
	public static function xml2array($xmlstring)
143
	{
144
		$result = [];
145
		self::normalizeSimpleXML(simplexml_load_string($xmlstring, null, LIBXML_NOCDATA), $result);
146
147
		return $result;
148
	}
149
150
	/**TODO ArrayUtil 만들어서 이동 && method 설명 추가할 것
151
	 * @param $obj
152
	 * @param $result
153
	 */
154
	private static function normalizeSimpleXML($obj, &$result)
155
	{
156
		$data = $obj;
157
		if (is_object($data)) {
158
			$data = get_object_vars($data);
159
		}
160
		if (is_array($data)) {
161
			foreach ($data as $key => $value) {
162
				$res = null;
163
				self::normalizeSimpleXML($value, $res);
164
				if (($key == '@attributes') && ($key)) {
165
					$result = $res;
166
				} else {
167
					$result[$key] = $res;
168
				}
169
			}
170
		} else {
171
			$result = $data;
172
		}
173
	}
174
175
	/**
176
	 * @param $explain
177
	 * @return mixed
178
	 *
179
	 * <p><강추> 아스란 연대기</p>
180
	 * 위와 같은 html과 text가 섞여서 들어오는 입력(내부나 외부-북큐브)에서 HTML 제거용
181
	 */
182
	public static function stripTagsOnlyEnglishBegin($explain)
183
	{
184
		return preg_replace('/<(\/?)[a-z][^<>]*>/i', '', $explain);
185
	}
186
187
	/**TODO method 설명 추가할 것
188
	 * @param $comma_separated
189
	 * @return array
190
	 */
191
	public static function commaSeparatedToArray($comma_separated)
192
	{
193
		return array_filter(explode(',', $comma_separated));
194
	}
195
196
	public static function explodeByLine($new_values_by_line)
197
	{
198
		return preg_split("/[^\n\S]*\r?\n[^\n\S]*/", $new_values_by_line);
199
	}
200
201
	public static function decodeCsv($input)
202
	{
203
		preg_match_all("/\s*\r?\n|,|\"[^\"]*\"|[^\",\r\n]+/s", $input, $mats, PREG_SET_ORDER);
204
		$rets = [];
205
		$ret = [];
206
		$cursor = '';
207
		foreach ($mats as $mat) {
0 ignored issues
show
Bug introduced by
The expression $mats of type null|array<integer,array<integer,string>> is not guaranteed to be traversable. How about adding an additional type check?

There are different options of fixing this problem.

  1. If you want to be on the safe side, you can add an additional type-check:

    $collection = json_decode($data, true);
    if ( ! is_array($collection)) {
        throw new \RuntimeException('$collection must be an array.');
    }
    
    foreach ($collection as $item) { /** ... */ }
    
  2. If you are sure that the expression is traversable, you might want to add a doc comment cast to improve IDE auto-completion and static analysis:

    /** @var array $collection */
    $collection = json_decode($data, true);
    
    foreach ($collection as $item) { /** .. */ }
    
  3. Mark the issue as a false-positive: Just hover the remove button, in the top-right corner of this issue for more options.

Loading history...
208
			$str = $mat[0];
209
			if (substr($str, -1) == "\n") {
210
				$ret[] = $cursor;
211
				$rets[] = $ret;
212
				$ret = [];
213
				$cursor = '';
214
			} elseif ($str[0] == ',') {
215
				$ret[] = $cursor;
216
				$cursor = '';
217
			} elseif ($str[0] == '"') {
218
				if (strlen($cursor) == 0) {
219
					$cursor = substr($str, 1, -1);
220
				} else {
221
					$cursor .= '"' . substr($str, 1, -1);
222
				}
223
			} else {
224
				$cursor = $str;
225
			}
226
		}
227
		if (strlen($cursor)) {
228
			$ret[] = $cursor;
229
			$rets[] = $ret;
230
		}
231
232
		return $rets;
233
	}
234
235
	public static function encodeCsv($input)
236
	{
237
		$ret = '';
238
		foreach ($input as $row) {
239
			foreach ($row as $dat) {
240
				$ret .= '"' . str_replace('"', '""', $dat) . '",';
241
			}
242
			$ret .= "\n";
243
		}
244
245
		return $ret;
246
	}
247
248
	public static function basenameUtf8($input)
249
	{
250
		$input = str_replace('\\', '\/', $input);
251
		preg_match('/\/?([^\/]+)$/', $input, $mat);
252
253
		return $mat[1];
254
	}
255
256
	/**
257
	 * 배열을 chunk로 나누어 implode 시킨다.
258
	 * ex) ['A1', 'A2', 'A3', 'B1', 'B2', 'B3']의 배열에서
259
	 * (';', "\n", 3, $pieces) => "A1;A2;A3\nB1;B2;B3"
260
	 * @param string $glue_in_chunk chunk 내부에서의 구분자
261
	 * @param string $glue_between_chunks chunk 끼리의 구분자
262
	 * @param int $chunk_size chunk 내부 사이즈
263
	 * @param array $pieces
264
	 * @return string
265
	 * @throws \Exception
266
	 */
267
	public static function implodeByChunk($glue_in_chunk, $glue_between_chunks, $chunk_size, array $pieces)
268
	{
269
		if (count($pieces) % $chunk_size !== 0) {
270
			throw new \Exception('chunk size error');
271
		}
272
273
		$result = '';
274
275
		// 구분자 혼동방지를 위한 replace
276
		foreach ($pieces as &$str) {
277
			$str = str_replace($glue_in_chunk, '', $str);
278
			$str = str_replace($glue_between_chunks, '', $str);
279
		}
280
281
		$rows = array_chunk($pieces, $chunk_size);
282
		foreach ($rows as $row) {
283
			if (!array_filter($row)) {
284
				continue;
285
			}
286
287
			$result .= trim(implode($glue_in_chunk, $row)) . $glue_between_chunks;
288
		}
289
290
		return trim($result);
291
	}
292
293
	/**
294
	 * 두 문자열을 서로 치환한다.
295
	 * ex) &lt;b&gt;<b> => <b>&lt;b&gt;
296
	 * @param string $string
297
	 * @param string $sub_string1
298
	 * @param string $sub_string2
299
	 * @return string
300
	 */
301
	public static function swapTwoSubStrings($string, $sub_string1, $sub_string2)
302
	{
303
		$length = strlen($string);
304
		for ($i = 0; $i <= $length; $i++) {
305
			if (substr($string, $i, strlen($sub_string1)) == $sub_string1) {
306
				$string = substr_replace($string, $sub_string2, $i, strlen($sub_string1));
307
				$length -= strlen($sub_string1);
308
				$length += strlen($sub_string2);
309
				$i += strlen($sub_string2) - 1;    // for문내에서 $i++이기에
310
			} elseif (substr($string, $i, strlen($sub_string2)) == $sub_string2) {
311
				$string = substr_replace($string, $sub_string1, $i, strlen($sub_string2));
312
				$length -= strlen($sub_string2);
313
				$length += strlen($sub_string1);
314
				$i += strlen($sub_string1) - 1;    // for문내에서 $i++이기에
315
			}
316
		}
317
318
		return $string;
319
	}
320
321
	private static $non_printable_ascii = null;
322
	private static $unicode_non_breaking_space = "\xc2\xa0";
323
	private static $unicode_zero_width_space = "\xe2\x80\x8b";
324
	private static $unicode_bom_utf8 = "\xef\xbb\xbf";
325
326
	public static function removeUnnecessaryCharacter($string)
327
	{
328
		self::initializeNonPrintableAscii();
329
		$removes = self::$non_printable_ascii;
330
		$removes[] = self::$unicode_bom_utf8;
331
		$removes[] = self::$unicode_non_breaking_space;
332
		$removes[] = self::$unicode_zero_width_space;
333
334
		return str_replace($removes, "", $string);
335
	}
336
337
	public static function convertNonBreakingSpaceToSpace(string $string): string
338
	{
339
		return str_replace(self::$unicode_non_breaking_space, ' ', $string);
340
	}
341
342
	public static function removeNonPrintableAscii($string)
343
	{
344
		self::initializeNonPrintableAscii();
345
346
		return str_replace(self::$non_printable_ascii, "", $string);
347
	}
348
349
	private static function initializeNonPrintableAscii()
350
	{
351
		if (self::$non_printable_ascii === null) {
352
			$__non_printable_ascii = [
353
				0,
354
				1,
355
				2,
356
				3,
357
				4,
358
				5,
359
				6,
360
				7,
361
				8,
362
				9,
363
				11,
364
				12,
365
				14,
366
				15,
367
				16,
368
				17,
369
				18,
370
				19,
371
				20,
372
				21,
373
				22,
374
				23,
375
				24,
376
				25,
377
				26,
378
				27,
379
				28,
380
				29,
381
				30,
382
				31,
383
				127,
384
			];
385
			foreach ($__non_printable_ascii as $k => $v) {
386
				$__non_printable_ascii[$k] = chr($v);
387
			}
388
			self::$non_printable_ascii = $__non_printable_ascii;
389
		}
390
	}
391
}
392