Normalizer   A
last analyzed

Complexity

Total Complexity 10

Size/Duplication

Total Lines 88
Duplicated Lines 0 %

Coupling/Cohesion

Components 0
Dependencies 1

Test Coverage

Coverage 100%

Importance

Changes 0
Metric Value
wmc 10
lcom 0
cbo 1
dl 0
loc 88
ccs 30
cts 30
cp 1
rs 10
c 0
b 0
f 0

4 Methods

Rating   Name   Duplication   Size   Complexity  
A applyTransliteration() 0 3 1
B convertDoubleWidth() 0 24 3
A toLowercase() 0 3 1
B reduceLengthTo() 0 19 5
1
<?php
2
3
namespace Onoi\Tesa;
4
5
use Onoi\Tesa\Tokenizer\Tokenizer;
6
use Onoi\Tesa\Synonymizer\Synonymizer;
7
use Onoi\Tesa\StopwordAnalyzer\StopwordAnalyzer;
8
use RuntimeException;
9
10
/**
11
 * @license GNU GPL v2+
12
 * @since 0.1
13
 *
14
 * @author mwjames
15
 */
16
class Normalizer {
17
18
	/**
19
	 * @since 0.1
20
	 *
21
	 * @param string $text
22
	 *
23
	 * @param integer $flag
24
	 */
25 1
	public static function applyTransliteration( $text, $flag = Transliterator::DIACRITICS ) {
26 1
		return Transliterator::transliterate( $text, $flag );
27
	}
28
29
	/**
30
	 * @see Localizer::convertDoubleWidth
31
	 *
32
	 * @since 0.1
33
	 *
34
	 * @param string $text
35
	 *
36
	 * @return string
37
	 */
38 1
	public static function convertDoubleWidth( $text ) {
39 1
		static $full = null;
40 1
		static $half = null;
41
42
		//,。/?《》〈〉;:“”"〃'`[]{}\|~!-=_+)(()*…—─%¥#
43
		//,./?«»();:“”
44
45 1
		if ( $full === null ) {
46 1
			$fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
47 1
			$halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
48
49
			// http://php.net/manual/en/function.str-split.php, mb_str_split
50 1
			$length = mb_strlen( $fullWidth, "UTF-8" );
51 1
			$full = array();
52
53 1
			for ( $i = 0; $i < $length; $i += 1 ) {
54 1
				$full[] = mb_substr( $fullWidth, $i, 1, "UTF-8" );
55 1
			}
56
57 1
			$half = str_split( $halfWidth );
58 1
		}
59
60 1
		return str_replace( $full, $half, trim( $text ) );
61
	}
62
63
64
	/**
65
	 * @since 0.1
66
	 *
67
	 * @param string $text
68
	 *
69
	 * @return string
70
	 */
71 5
	public static function toLowercase( $text ) {
72 5
		return mb_strtolower( $text, mb_detect_encoding( $text ) );
73
	}
74
75
	/**
76
	 * @since 0.1
77
	 *
78
	 * @param string $text
79
	 * @param integer|null $length
80
	 *
81
	 * @return string
82
	 */
83 1
	public static function reduceLengthTo( $text, $length = null ) {
84
85 1
		if ( $length === null || mb_strlen( $text ) <= $length ) {
86 1
			return $text;
87
		}
88
89 1
		$encoding = mb_detect_encoding( $text );
90 1
		$lastWholeWordPosition = $length;
91
92 1
		if ( strpos( $text, ' ' ) !== false ) {
93 1
			$lastWholeWordPosition = strrpos( mb_substr( $text, 0, $length, $encoding ), ' ' ); // last whole word
94 1
		}
95
96 1
		if ( $lastWholeWordPosition > 0 ) {
97 1
			$length = $lastWholeWordPosition;
98 1
		}
99
100 1
		return mb_substr( $text, 0, $length, $encoding );
101
	}
102
103
}
104