Completed
Push — master ( 15e850...27125e )
by mw
02:06
created

tests/phpunit/Unit/SanitizerTest.php (3 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
namespace Onoi\Tesa\Tests;
4
5
use Onoi\Tesa\Sanitizer;
6
use Onoi\Tesa\Tokenizer;
7
use Onoi\Tesa\StopwordAnalyzer;
8
9
/**
10
 * @covers \Onoi\Tesa\Sanitizer
11
 * @group onoi-tesa
12
 *
13
 * @license GNU GPL v2+
14
 * @since 0.1
15
 *
16
 * @author mwjames
17
 */
18
class SanitizerTest extends \PHPUnit_Framework_TestCase {
19
20
	public function testTransliteration() {
21
22
		$instance = new Sanitizer( 'ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž' );
23
		$instance->applyTransliteration();
24
25
		$this->assertEquals(
26
			'AAAAAEAaaaaaeaOOOOOOEOoooooeoEEEEeeeeðCcÐIIIIiiiiUUUUEuuuueNnSsYyyZz',
27
			$instance
28
		);
29
	}
30
31
	public function testToLowercase() {
32
33
		$instance = new Sanitizer( 'ÀÁÂÃÄÅ ABC 텍스트의 テスト часто הוא פשוט' );
34
		$instance->toLowercase();
35
36
		$this->assertEquals(
37
			'àáâãäå abc 텍스트의 テスト часто הוא פשוט',
38
			$instance
39
		);
40
	}
41
42
	public function testReduceLengthTo() {
43
44
		$instance = new Sanitizer( 'ABCDEF' );
45
		$instance->reduceLengthTo( 3 );
46
47
		$this->assertEquals(
48
			3,
49
			mb_strlen( $instance )
50
		);
51
52
		$instance->reduceLengthTo( 10 );
53
54
		$this->assertEquals(
55
			3,
56
			mb_strlen( $instance )
57
		);
58
	}
59
60
	public function testReduceLengthToNearestWholeWordForLatinString() {
61
62
		$instance = new Sanitizer( 'abc def gh in 123' );
63
		$instance->reduceLengthTo( 12 );
64
65
		$this->assertEquals(
66
			10,
67
			mb_strlen( $instance )
68
		);
69
70
		$this->assertEquals(
71
			'abc def gh',
72
			$instance
73
		);
74
	}
75
76
	public function testReduceLengthToNearestWholeWordForNonLatinString() {
77
78
		if ( version_compare( phpversion(), '5.4', '<' ) ) {
79
			$this->markTestSkipped(
80
				"Boo, PHP 5.3 returns with `Failed asserting that 9 matches expected 3`"
81
			);
82
		}
83
84
		$instance = new Sanitizer( '一 二 三' );
85
		$instance->reduceLengthTo( 3 );
86
87
		$this->assertEquals(
88
			3,
89
			mb_strlen( $instance )
90
		);
91
92
		$this->assertEquals(
93
			'一 二',
94
			$instance
95
		);
96
	}
97
98
	public function testToContainKoreanCharacters() {
99
100
		$instance = new Sanitizer( '한국어 텍스트의 예' );
101
102
		$this->assertTrue(
103
			$instance->containsKoreanCharacters()
104
		);
105
	}
106
107
	public function testToContainJapaneseCharacters() {
108
109
		$instance = new Sanitizer( 'IQテスト' );
110
111
		$this->assertTrue(
112
			$instance->containsJapaneseCharacters()
113
		);
114
	}
115
116
	public function testToContainChineseCharacters() {
117
118
		$instance = new Sanitizer( '才可以过关' );
119
120
		$this->assertTrue(
121
			$instance->containsChineseCharacters()
122
		);
123
	}
124
125
	public function testSanitizeByStopwords() {
126
127
		$instance = new Sanitizer( 'Foo bar foobar' );
128
129
		$stopwordAnalyzer = new StopwordAnalyzer();
130
		$stopwordAnalyzer->setCustomStopwordList( array( 'zh' => array( 'bar' ) ) );
131
132
		$this->assertEquals(
133
			'Foo foobar',
134
			$instance->sanitizeBy( $stopwordAnalyzer )
135
		);
136
	}
137
138
	public function testTrySanitizeByStopwordsForNoAvailableToken() {
139
140
		$instance = new Sanitizer( '' );
141
142
		$stopwordAnalyzer = new StopwordAnalyzer();
143
144
		$this->assertEquals(
145
			'',
146
			$instance->sanitizeBy( $stopwordAnalyzer )
147
		);
148
	}
149
150
	public function testReplace() {
151
152
		$instance = new Sanitizer( 'テスト' );
153
		$instance->replace( array( 'テスト' ), array( 'Test' ) );
0 ignored issues
show
array('テスト') is of type array<integer,string,{"0":"string"}>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
array('Test') is of type array<integer,string,{"0":"string"}>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
154
155
		$this->assertEquals(
156
			'Test',
157
			$instance
158
		);
159
	}
160
161
	/**
162
	 * @dataProvider stringProvider
163
	 */
164
	public function testGetTokens( $string, $flag, $expected ) {
165
166
		$instance = new Sanitizer( $string );
167
168
		$this->assertEquals(
169
			$expected,
170
			$instance->getTokens( $flag )
171
		);
172
	}
173
174
	public function stringProvider() {
175
176
		$provider[] = array(
0 ignored issues
show
Coding Style Comprehensibility introduced by
$provider was never initialized. Although not strictly required by PHP, it is generally a good practice to add $provider = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
177
			'A test string (that has no);deep meaning',
178
			Tokenizer::LAZY,
179
			array( 'A', 'test', 'string', '(that', 'has', 'no);deep', 'meaning' )
180
		);
181
182
		$provider[] = array(
183
			'A test string (that has no);deep meaning',
184
			Tokenizer::STRICT,
185
			array( 'A', 'test', 'string', 'that', 'has', 'no' , 'deep', 'meaning' )
186
		);
187
188
		$provider[] = array(
189
			'Abc def',
190
			null,
191
			false
192
		);
193
194
		return $provider;
195
	}
196
197
}
198