Completed
Push — master ( 0f3bb4...97a511 )
by mw
02:24
created

testSanitizeByStopwordsToIncludeExemptionWithMinLengthRestriction()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 15
Code Lines 9

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 15
rs 9.4286
cc 1
eloc 9
nc 1
nop 0
1
<?php
2
3
namespace Onoi\Tesa\Tests;
4
5
use Onoi\Tesa\Sanitizer;
6
use Onoi\Tesa\Tokenizer;
7
use Onoi\Tesa\StopwordAnalyzer;
8
9
/**
10
 * @covers \Onoi\Tesa\Sanitizer
11
 * @group onoi-tesa
12
 *
13
 * @license GNU GPL v2+
14
 * @since 0.1
15
 *
16
 * @author mwjames
17
 */
18
class SanitizerTest extends \PHPUnit_Framework_TestCase {
19
20
	public function testTransliteration() {
21
22
		$instance = new Sanitizer( 'ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž' );
23
		$instance->applyTransliteration();
24
25
		$this->assertEquals(
26
			'AAAAAEAaaaaaeaOOOOOOEOoooooeoEEEEeeeeðCcÐIIIIiiiiUUUUEuuuueNnSsYyyZz',
27
			$instance
28
		);
29
	}
30
31
	public function testToLowercase() {
32
33
		$instance = new Sanitizer( 'ÀÁÂÃÄÅ ABC 텍스트의 テスト часто הוא פשוט' );
34
		$instance->toLowercase();
35
36
		$this->assertEquals(
37
			'àáâãäå abc 텍스트의 テスト часто הוא פשוט',
38
			$instance
39
		);
40
	}
41
42
	public function testReduceLengthTo() {
43
44
		$instance = new Sanitizer( 'ABCDEF' );
45
		$instance->reduceLengthTo( 3 );
46
47
		$this->assertEquals(
48
			3,
49
			mb_strlen( $instance )
50
		);
51
52
		$instance->reduceLengthTo( 10 );
53
54
		$this->assertEquals(
55
			3,
56
			mb_strlen( $instance )
57
		);
58
	}
59
60
	public function testReduceLengthToNearestWholeWordForLatinString() {
61
62
		$instance = new Sanitizer( 'abc def gh in 123' );
63
		$instance->reduceLengthTo( 12 );
64
65
		$this->assertEquals(
66
			10,
67
			mb_strlen( $instance )
68
		);
69
70
		$this->assertEquals(
71
			'abc def gh',
72
			$instance
73
		);
74
	}
75
76
	public function testReduceLengthToNearestWholeWordForNonLatinString() {
77
78
		if ( version_compare( phpversion(), '5.4', '<' ) ) {
79
			$this->markTestSkipped(
80
				"Boo, PHP 5.3 returns with `Failed asserting that 9 matches expected 3`"
81
			);
82
		}
83
84
		$instance = new Sanitizer( '一 二 三' );
85
		$instance->reduceLengthTo( 3 );
86
87
		$this->assertEquals(
88
			3,
89
			mb_strlen( $instance )
90
		);
91
92
		$this->assertEquals(
93
			'一 二',
94
			$instance
95
		);
96
	}
97
98
	public function testSanitizeByStopwords() {
99
100
		$instance = new Sanitizer( 'Foo bar foobar' );
101
102
		$stopwordAnalyzer = new StopwordAnalyzer();
103
		$stopwordAnalyzer->setCustomStopwordList( array( 'zh' => array( 'bar' ) ) );
104
105
		$this->assertEquals(
106
			'Foo foobar',
107
			$instance->sanitizeBy( $stopwordAnalyzer )
108
		);
109
	}
110
111
	public function testSanitizeByStopwordsToIncludeExemptionWithMinLengthRestriction() {
112
113
		$instance = new Sanitizer( 'Foo bar foobar' );
114
115
		$instance->setOption( ONOI_TESA_CHARACTER_MIN_LENGTH, 4 );
116
		$instance->setOption( ONOI_TESA_WORD_WHITELIST, array( 'bar' ) );
117
118
		$stopwordAnalyzer = new StopwordAnalyzer();
119
		$stopwordAnalyzer->setCustomStopwordList( array( 'zh' => array( 'bar' ) ) );
120
121
		$this->assertEquals(
122
			'bar foobar',
123
			$instance->sanitizeBy( $stopwordAnalyzer )
124
		);
125
	}
126
127
	public function testTrySanitizeByStopwordsForNoAvailableToken() {
128
129
		$instance = new Sanitizer( '' );
130
131
		$stopwordAnalyzer = new StopwordAnalyzer();
132
133
		$this->assertEquals(
134
			'',
135
			$instance->sanitizeBy( $stopwordAnalyzer )
136
		);
137
	}
138
139
	public function testTrySanitizeByStopwordsWithProximityCheck() {
140
141
		$instance = new Sanitizer( 'foo foo テスト テスト' );
142
143
		$stopwordAnalyzer = new StopwordAnalyzer();
144
145
		$this->assertEquals(
146
			'foo テスト',
147
			$instance->sanitizeBy( $stopwordAnalyzer )
148
		);
149
	}
150
151
	public function testReplace() {
152
153
		$instance = new Sanitizer( 'テスト' );
154
		$instance->replace( array( 'テスト' ), array( 'Test' ) );
0 ignored issues
show
Documentation introduced by
array('テスト') is of type array<integer,string,{"0":"string"}>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
Documentation introduced by
array('Test') is of type array<integer,string,{"0":"string"}>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
155
156
		$this->assertEquals(
157
			'Test',
158
			$instance
159
		);
160
	}
161
162
	/**
163
	 * @dataProvider stringProvider
164
	 */
165
	public function testGetTokens( $string, $flag, $expected ) {
166
167
		$instance = new Sanitizer( $string );
168
169
		$this->assertEquals(
170
			$expected,
171
			$instance->getTokens( $flag )
172
		);
173
	}
174
175
	public function stringProvider() {
176
177
		$provider[] = array(
0 ignored issues
show
Coding Style Comprehensibility introduced by
$provider was never initialized. Although not strictly required by PHP, it is generally a good practice to add $provider = array(); before regardless.

Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.

Let’s take a look at an example:

foreach ($collection as $item) {
    $myArray['foo'] = $item->getFoo();

    if ($item->hasBar()) {
        $myArray['bar'] = $item->getBar();
    }

    // do something with $myArray
}

As you can see in this example, the array $myArray is initialized the first time when the foreach loop is entered. You can also see that the value of the bar key is only written conditionally; thus, its value might result from a previous iteration.

This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.

Loading history...
178
			'A test string (that has no);deep meaning',
179
			Tokenizer::LAZY,
180
			array( 'A', 'test', 'string', '(that', 'has', 'no);deep', 'meaning' )
181
		);
182
183
		$provider[] = array(
184
			'A test string (that has no);deep meaning',
185
			Tokenizer::STRICT,
186
			array( 'A', 'test', 'string', 'that', 'has', 'no' , 'deep', 'meaning' )
187
		);
188
189
		$provider[] = array(
190
			'Abc def',
191
			null,
192
			false
193
		);
194
195
		return $provider;
196
	}
197
198
}
199