SanitizerTest   A
last analyzed

Complexity

Total Complexity 11

Size/Duplication

Total Lines 206
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 2

Importance

Changes 0
Metric Value
wmc 11
lcom 1
cbo 2
dl 0
loc 206
rs 10
c 0
b 0
f 0

10 Methods

Rating   Name   Duplication   Size   Complexity  
A setUp() 0 3 1
A testTransliteration() 0 10 1
A testToLowercase() 0 10 1
A testReduceLengthTo() 0 17 1
A testReduceLengthToNearestWholeWordForLatinString() 0 15 1
A testReduceLengthToNearestWholeWordForNonLatinString() 0 21 2
A testReplace() 0 10 1
B testSanitizeWithSimpleStopwordList() 0 32 1
B testSanitizeByStopwordsToIncludeExemptionWithMinLengthRestriction() 0 39 1
B testTrySanitizeByStopwordsWithProximityCheck() 0 34 1
1
<?php
2
3
namespace Onoi\Tesa\Tests;
4
5
use Onoi\Tesa\Sanitizer;
6
use Onoi\Tesa\SanitizerFactory;
7
8
/**
9
 * @covers \Onoi\Tesa\Sanitizer
10
 * @group onoi-tesa
11
 *
12
 * @license GNU GPL v2+
13
 * @since 0.1
14
 *
15
 * @author mwjames
16
 */
17
class SanitizerTest extends \PHPUnit_Framework_TestCase {
18
19
	private $sanitizerFactory;
20
21
	protected function setUp() {
22
		$this->sanitizerFactory = new SanitizerFactory();
23
	}
24
25
	public function testTransliteration() {
26
27
		$instance = new Sanitizer( 'ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž' );
28
		$instance->applyTransliteration();
29
30
		$this->assertEquals(
31
			'AAAAAEAaaaaaeaOOOOOOEOoooooeoEEEEeeeeðCcÐIIIIiiiiUUUUEuuuueNnSsYyyZz',
32
			$instance
33
		);
34
	}
35
36
	public function testToLowercase() {
37
38
		$instance = new Sanitizer( 'ÀÁÂÃÄÅ ABC 텍스트의 テスト часто הוא פשוט' );
39
		$instance->toLowercase();
40
41
		$this->assertEquals(
42
			'àáâãäå abc 텍스트의 テスト часто הוא פשוט',
43
			$instance
44
		);
45
	}
46
47
	public function testReduceLengthTo() {
48
49
		$instance = new Sanitizer( 'ABCDEF' );
50
		$instance->reduceLengthTo( 3 );
51
52
		$this->assertEquals(
53
			3,
54
			mb_strlen( $instance )
55
		);
56
57
		$instance->reduceLengthTo( 10 );
58
59
		$this->assertEquals(
60
			3,
61
			mb_strlen( $instance )
62
		);
63
	}
64
65
	public function testReduceLengthToNearestWholeWordForLatinString() {
66
67
		$instance = new Sanitizer( 'abc def gh in 123' );
68
		$instance->reduceLengthTo( 12 );
69
70
		$this->assertEquals(
71
			10,
72
			mb_strlen( $instance )
73
		);
74
75
		$this->assertEquals(
76
			'abc def gh',
77
			$instance
78
		);
79
	}
80
81
	public function testReduceLengthToNearestWholeWordForNonLatinString() {
82
83
		if ( version_compare( phpversion(), '5.4', '<' ) ) {
84
			$this->markTestSkipped(
85
				"Boo, PHP 5.3 returns with `Failed asserting that 9 matches expected 3`"
86
			);
87
		}
88
89
		$instance = new Sanitizer( '一 二 三' );
90
		$instance->reduceLengthTo( 3 );
91
92
		$this->assertEquals(
93
			3,
94
			mb_strlen( $instance )
95
		);
96
97
		$this->assertEquals(
98
			'一 二',
99
			$instance
100
		);
101
	}
102
103
	public function testReplace() {
104
105
		$instance = new Sanitizer( 'テスト' );
106
		$instance->replace( array( 'テスト' ), array( 'Test' ) );
0 ignored issues
show
Documentation introduced by
array('テスト') is of type array<integer,string,{"0":"string"}>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
Documentation introduced by
array('Test') is of type array<integer,string,{"0":"string"}>, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
107
108
		$this->assertEquals(
109
			'Test',
110
			$instance
111
		);
112
	}
113
114
	public function testSanitizeWithSimpleStopwordList() {
115
116
		$text = 'Foo bar foobar';
117
118
		$tokenizer = $this->getMockBuilder( '\Onoi\Tesa\Tokenizer\Tokenizer' )
119
			->disableOriginalConstructor()
120
			->getMockForAbstractClass();
121
122
		$tokenizer->expects( $this->once() )
123
			->method( 'tokenize' )
124
			->with( $this->equalTo( $text ) )
125
			->will( $this->returnValue( array( 'Foo', 'bar', 'foobar' ) ) );
126
127
		$synonymizer = $this->getMockBuilder( '\Onoi\Tesa\Synonymizer\Synonymizer' )
128
			->disableOriginalConstructor()
129
			->getMockForAbstractClass();
130
131
		$synonymizer->expects( $this->any() )
132
			->method( 'synonymize' )
133
			->will($this->returnArgument( 0 ) );
134
135
		$instance = new Sanitizer( $text );
136
137
		$stopwordAnalyzer = $this->sanitizerFactory->newArrayStopwordAnalyzer(
138
			array( 'bar' )
139
		);
140
141
		$this->assertEquals(
142
			'Foo foobar',
143
			$instance->sanitizeWith( $tokenizer, $stopwordAnalyzer, $synonymizer )
144
		);
145
	}
146
147
	public function testSanitizeByStopwordsToIncludeExemptionWithMinLengthRestriction() {
148
149
		$text = 'Foo bar foobar';
150
151
		$tokenizer = $this->getMockBuilder( '\Onoi\Tesa\Tokenizer\Tokenizer' )
152
			->disableOriginalConstructor()
153
			->getMockForAbstractClass();
154
155
		$tokenizer->expects( $this->once() )
156
			->method( 'isWordTokenizer' )
157
			->will( $this->returnValue( true ) );
158
159
		$tokenizer->expects( $this->once() )
160
			->method( 'tokenize' )
161
			->with( $this->equalTo( $text ) )
162
			->will( $this->returnValue( array( 'Foo', 'bar', 'foobar' ) ) );
163
164
		$synonymizer = $this->getMockBuilder( '\Onoi\Tesa\Synonymizer\Synonymizer' )
165
			->disableOriginalConstructor()
166
			->getMockForAbstractClass();
167
168
		$synonymizer->expects( $this->any() )
169
			->method( 'synonymize' )
170
			->will($this->returnArgument( 0 ) );
171
172
		$instance = new Sanitizer( $text );
173
174
		$stopwordAnalyzer = $this->sanitizerFactory->newArrayStopwordAnalyzer(
175
			array( 'bar' )
176
		);
177
178
		$instance->setOption( Sanitizer::MIN_LENGTH, 4 );
179
		$instance->setOption( Sanitizer::WHITELIST, array( 'bar' ) );
180
181
		$this->assertEquals(
182
			'bar foobar',
183
			$instance->sanitizeWith( $tokenizer, $stopwordAnalyzer, $synonymizer )
184
		);
185
	}
186
187
	public function testTrySanitizeByStopwordsWithProximityCheck() {
188
189
		$text = 'foo foo テスト テスト';
190
191
		$tokenizer = $this->getMockBuilder( '\Onoi\Tesa\Tokenizer\Tokenizer' )
192
			->disableOriginalConstructor()
193
			->getMockForAbstractClass();
194
195
		$tokenizer->expects( $this->once() )
196
			->method( 'isWordTokenizer' )
197
			->will( $this->returnValue( true ) );
198
199
		$tokenizer->expects( $this->once() )
200
			->method( 'tokenize' )
201
			->with( $this->equalTo( $text ) )
202
			->will( $this->returnValue( array( 'foo', 'foo', 'テスト', 'テスト' ) ) );
203
204
		$synonymizer = $this->getMockBuilder( '\Onoi\Tesa\Synonymizer\Synonymizer' )
205
			->disableOriginalConstructor()
206
			->getMockForAbstractClass();
207
208
		$synonymizer->expects( $this->any() )
209
			->method( 'synonymize' )
210
			->will($this->returnArgument( 0 ) );
211
212
		$instance = new Sanitizer( $text );
213
214
		$stopwordAnalyzer = $this->sanitizerFactory->newArrayStopwordAnalyzer();
215
216
		$this->assertEquals(
217
			'foo テスト',
218
			$instance->sanitizeWith( $tokenizer, $stopwordAnalyzer, $synonymizer )
219
		);
220
	}
221
222
}
223