Completed
Push — master ( 9401a4...2cc7a2 )
by mw
13:59
created

Normalizer::reduceLengthTo()   B

Complexity

Conditions 5
Paths 5

Size

Total Lines 19
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 19
rs 8.8571
cc 5
eloc 10
nc 5
nop 2
1
<?php
2
3
namespace Onoi\Tesa;
4
5
use Onoi\Tesa\Tokenizer\Tokenizer;
6
use Onoi\Tesa\Synonymizer\Synonymizer;
7
use Onoi\Tesa\StopwordAnalyzer\StopwordAnalyzer;
8
use RuntimeException;
9
10
/**
11
 * @license GNU GPL v2+
12
 * @since 0.1
13
 *
14
 * @author mwjames
15
 */
16
class Normalizer {
17
18
	/**
19
	 * @since 0.1
20
	 *
21
	 * @param string $text
22
	 *
23
	 * @param integer $flag
24
	 */
25
	public static function applyTransliteration( $text, $flag = Transliterator::DIACRITICS ) {
26
		return Transliterator::transliterate( $text, $flag );
27
	}
28
29
	/**
30
	 * @see Localizer::convertDoubleWidth
31
	 *
32
	 * @since 0.1
33
	 *
34
	 * @param string $text
35
	 *
36
	 * @return string
37
	 */
38
	public static function convertDoubleWidth( $text ) {
39
		static $full = null;
40
		static $half = null;
41
42
		//,。/?《》〈〉;:“”"〃'`[]{}\|~!-=_+)(()*…—─%¥#
43
		//,./?«»();:“”
44
45
		if ( $full === null ) {
46
			$fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
47
			$halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
48
49
			// http://php.net/manual/en/function.str-split.php, mb_str_split
50
			$length = mb_strlen( $fullWidth, "UTF-8" );
51
			$full = array();
52
53
			for ( $i = 0; $i < $length; $i += 1 ) {
54
				$full[] = mb_substr( $fullWidth, $i, 1, "UTF-8" );
55
			}
56
57
			$half = str_split( $halfWidth );
58
		}
59
60
		return str_replace( $full, $half, trim( $text ) );
61
	}
62
63
64
	/**
65
	 * @since 0.1
66
	 *
67
	 * @param string $text
68
	 *
69
	 * @return string
70
	 */
71
	public static function toLowercase( $text ) {
72
		return mb_strtolower( $text, mb_detect_encoding( $text ) );
73
	}
74
75
	/**
76
	 * @since 0.1
77
	 *
78
	 * @param string $text
79
	 * @param integer|null $length
80
	 *
81
	 * @return string
82
	 */
83
	public static function reduceLengthTo( $text, $length = null ) {
84
85
		if ( $length === null || mb_strlen( $text ) <= $length ) {
86
			return $text;
87
		}
88
89
		$encoding = mb_detect_encoding( $text );
90
		$lastWholeWordPosition = $length;
91
92
		if ( strpos( $text, ' ' ) !== false ) {
93
			$lastWholeWordPosition = strrpos( mb_substr( $text, 0, $length, $encoding ), ' ' ); // last whole word
94
		}
95
96
		if ( $lastWholeWordPosition > 0 ) {
97
			$length = $lastWholeWordPosition;
98
		}
99
100
		return mb_substr( $text, 0, $length, $encoding );
101
	}
102
103
}
104