Completed
Push — master ( 83b1b2...0f3bb4 )
by mw
02:04
created

testSanitizeByStopwordsToIncludeExemptionWithMinLengthRestriction()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 15
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
c 1
b 0
f 0
dl 0
loc 15
rs 9.4286
cc 1
eloc 9
nc 1
nop 0
1
<?php
2
3
namespace Onoi\Tesa\Tests;
4
5
use Onoi\Tesa\Sanitizer;
6
use Onoi\Tesa\Tokenizer;
7
use Onoi\Tesa\StopwordAnalyzer;
8
9
/**
10
 * @covers \Onoi\Tesa\Sanitizer
11
 * @group onoi-tesa
12
 *
13
 * @license GNU GPL v2+
14
 * @since 0.1
15
 *
16
 * @author mwjames
17
 */
18
class SanitizerTest extends \PHPUnit_Framework_TestCase {
19
20
	public function testTransliteration() {
21
22
		$instance = new Sanitizer( 'ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž' );
23
		$instance->applyTransliteration();
24
25
		$this->assertEquals(
26
			'AAAAAEAaaaaaeaOOOOOOEOoooooeoEEEEeeeeðCcÐIIIIiiiiUUUUEuuuueNnSsYyyZz',
27
			$instance
28
		);
29
	}
30
31
	public function testToLowercase() {
32
33
		$instance = new Sanitizer( 'ÀÁÂÃÄÅ ABC 텍스트의 テスト часто הוא פשוט' );
34
		$instance->toLowercase();
35
36
		$this->assertEquals(
37
			'àáâãäå abc 텍스트의 テスト часто הוא פשוט',
38
			$instance
39
		);
40
	}
41
42
	public function testReduceLengthTo() {
43
44
		$instance = new Sanitizer( 'ABCDEF' );
45
		$instance->reduceLengthTo( 3 );
46
47
		$this->assertEquals(
48
			3,
49
			mb_strlen( $instance )
50
		);
51
52
		$instance->reduceLengthTo( 10 );
53
54
		$this->assertEquals(
55
			3,
56
			mb_strlen( $instance )
57
		);
58
	}
59
60
	public function testReduceLengthToNearestWholeWordForLatinString() {
61
62
		$instance = new Sanitizer( 'abc def gh in 123' );
63
		$instance->reduceLengthTo( 12 );
64
65
		$this->assertEquals(
66
			10,
67
			mb_strlen( $instance )
68
		);
69
70
		$this->assertEquals(
71
			'abc def gh',
72
			$instance
73
		);
74
	}
75
76
	public function testReduceLengthToNearestWholeWordForNonLatinString() {
77
78
		if ( version_compare( phpversion(), '5.4', '<' ) ) {
79
			$this->markTestSkipped(
80
				"Boo, PHP 5.3 returns with `Failed asserting that 9 matches expected 3`"
81
			);
82
		}
83
84
		$instance = new Sanitizer( '一 二 三' );
85
		$instance->reduceLengthTo( 3 );
86
87
		$this->assertEquals(
88
			3,
89
			mb_strlen( $instance )
90
		);
91
92
		$this->assertEquals(
93
			'一 二',
94
			$instance
95
		);
96
	}
97
98
	public function testToContainKoreanCharacters() {
99
100
		$instance = new Sanitizer( '한국어 텍스트의 예' );
101
102
		$this->assertTrue(
103
			$instance->containsKoreanCharacters()
104
		);
105
	}
106
107
	public function testToContainJapaneseCharacters() {
108
109
		$instance = new Sanitizer( 'IQテスト' );
110
111
		$this->assertTrue(
112
			$instance->containsJapaneseCharacters()
113
		);
114
	}
115
116
	public function testToContainChineseCharacters() {
117
118
		$instance = new Sanitizer( '才可以过关' );
119
120
		$this->assertTrue(
121
			$instance->containsChineseCharacters()
122
		);
123
	}
124
125
	public function testSanitizeByStopwords() {
126
127
		$instance = new Sanitizer( 'Foo bar foobar' );
128
129
		$stopwordAnalyzer = new StopwordAnalyzer();
130
		$stopwordAnalyzer->setCustomStopwordList( array( 'zh' => array( 'bar' ) ) );
131
132
		$this->assertEquals(
133
			'Foo foobar',
134
			$instance->sanitizeBy( $stopwordAnalyzer )
135
		);
136
	}
137
138
	public function testSanitizeByStopwordsToIncludeExemptionWithMinLengthRestriction() {
139
140
		$instance = new Sanitizer( 'Foo bar foobar' );
141
142
		$instance->setOption( ONOI_TESA_CHARACTER_MIN_LENGTH, 4 );
143
		$instance->setOption( ONOI_TESA_WORD_WHITELIST, array( 'bar' ) );
144
145
		$stopwordAnalyzer = new StopwordAnalyzer();
146
		$stopwordAnalyzer->setCustomStopwordList( array( 'zh' => array( 'bar' ) ) );
147
148
		$this->assertEquals(
149
			'bar foobar',
150
			$instance->sanitizeBy( $stopwordAnalyzer )
151
		);
152
	}
153
154
	public function testTrySanitizeByStopwordsForNoAvailableToken() {
155
156
		$instance = new Sanitizer( '' );
157
158
		$stopwordAnalyzer = new StopwordAnalyzer();
159
160
		$this->assertEquals(
161
			'',
162
			$instance->sanitizeBy( $stopwordAnalyzer )
163
		);
164
	}
165
166
	public function testTrySanitizeByStopwordsWithProximityCheck() {
167
168
		$instance = new Sanitizer( 'foo foo テスト テスト' );
169
170
		$stopwordAnalyzer = new StopwordAnalyzer();
171
172
		$this->assertEquals(
173
			'foo テスト',
174
			$instance->sanitizeBy( $stopwordAnalyzer )
175
		);
176
	}
177
178
	public function testReplace() {
179
180
		$instance = new Sanitizer( 'テスト' );
181
		$instance->replace( array( 'テスト' ), array( 'Test' ) );
0 ignored issues
show
Documentation introduced by
array('テスト') is of type array<integer,string,{"0":"string"}>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
Documentation introduced by
array('Test') is of type array<integer,string,{"0":"string"}>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
182
183
		$this->assertEquals(
184
			'Test',
185
			$instance
186
		);
187
	}
188
189
	/**
190
	 * @dataProvider stringProvider
191
	 */
192
	public function testGetTokens( $string, $flag, $expected ) {
193
194
		$instance = new Sanitizer( $string );
195
196
		$this->assertEquals(
197
			$expected,
198
			$instance->getTokens( $flag )
199
		);
200
	}
201
202
	public function stringProvider() {
203
204
		$provider[] = array(
0 ignored issues
show
Coding Style Comprehensibility introduced by
$provider was never initialized. Although not strictly required by PHP, it is generally a good practice to add $provider = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
205
			'A test string (that has no);deep meaning',
206
			Tokenizer::LAZY,
207
			array( 'A', 'test', 'string', '(that', 'has', 'no);deep', 'meaning' )
208
		);
209
210
		$provider[] = array(
211
			'A test string (that has no);deep meaning',
212
			Tokenizer::STRICT,
213
			array( 'A', 'test', 'string', 'that', 'has', 'no' , 'deep', 'meaning' )
214
		);
215
216
		$provider[] = array(
217
			'Abc def',
218
			null,
219
			false
220
		);
221
222
		return $provider;
223
	}
224
225
}
226