CharacterExaminer::contains()   D
last analyzed

Complexity

Conditions 9
Paths 9

Size

Total Lines 32
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 14
CRAP Score 9

Importance

Changes 0
Metric Value
dl 0
loc 32
ccs 14
cts 14
cp 1
rs 4.909
c 0
b 0
f 0
cc 9
eloc 14
nc 9
nop 2
crap 9
1
<?php
2
3
namespace Onoi\Tesa;
4
5
/**
6
 * @license GNU GPL v2+
7
 * @since 0.1
8
 *
9
 * @author mwjames
10
 */
11
class CharacterExaminer {
12
13
	const CYRILLIC = 'CYRILLIC';
14
	const LATIN = 'LATIN';
15
	const HIRAGANA_KATAKANA = 'HIRAGANA_KATAKANA';
16
	const HANGUL = 'HANGUL';
17
	const CJK_UNIFIED = 'CJK_UNIFIED';
18
	const HAN = 'HAN';
19
20
	/**
21
	 * @see http://jrgraphix.net/research/unicode_blocks.php
22
	 * @since 0.1
23
	 *
24
	 * @param string $type
25
	 * @param string $text
26
	 *
27
	 * @return boolean
28
	 */
29 5
	public static function contains( $type, $text ) {
30
31 5
		if ( $type === self::CYRILLIC ) {
32 1
			return preg_match('/\p{Cyrillic}/u', $text ) > 0;
33
		}
34
35 5
		if ( $type === self::LATIN ) {
36 3
			return preg_match('/\p{Latin}/u', $text ) > 0;
37
		}
38
39 4
		if ( $type === self::HAN ) {
40 3
			return preg_match('/\p{Han}/u', $text ) > 0;
41
		}
42
43 4
		if ( $type === self::HIRAGANA_KATAKANA ) {
44 1
			return preg_match('/[\x{3040}-\x{309F}]/u', $text ) > 0 || preg_match('/[\x{30A0}-\x{30FF}]/u', $text ) > 0; // isHiragana || isKatakana
45
		}
46
47 3
		if ( $type === self::HANGUL ) {
48 1
			return preg_match('/[\x{3130}-\x{318F}]/u', $text ) > 0 || preg_match('/[\x{AC00}-\x{D7AF}]/u', $text ) > 0;
49
		}
50
51
		// @see https://en.wikipedia.org/wiki/CJK_Unified_Ideographs
52
		// Chinese, Japanese and Korean (CJK) scripts share common characters
53
		// known as CJK characters
54
55 2
		if ( $type === self::CJK_UNIFIED ) {
56 1
			return preg_match('/[\x{4e00}-\x{9fa5}]/u', $text ) > 0;
57
		}
58
59 1
		return false;
60
	}
61
62
}
63