These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | namespace Onoi\Tesa\Tests; |
||
4 | |||
5 | use Onoi\Tesa\Sanitizer; |
||
6 | use Onoi\Tesa\Tokenizer; |
||
7 | use Onoi\Tesa\StopwordAnalyzer; |
||
8 | |||
9 | /** |
||
10 | * @covers \Onoi\Tesa\Sanitizer |
||
11 | * @group onoi-tesa |
||
12 | * |
||
13 | * @license GNU GPL v2+ |
||
14 | * @since 0.1 |
||
15 | * |
||
16 | * @author mwjames |
||
17 | */ |
||
18 | class SanitizerTest extends \PHPUnit_Framework_TestCase { |
||
19 | |||
20 | public function testTransliteration() { |
||
21 | |||
22 | $instance = new Sanitizer( 'ÀÁÂÃÄÅàáâãäåÒÓÔÕÕÖØòóôõöøÈÉÊËèéêëðÇçÐÌÍÎÏìíîïÙÚÛÜùúûüÑñŠšŸÿýŽž' ); |
||
23 | $instance->applyTransliteration(); |
||
24 | |||
25 | $this->assertEquals( |
||
26 | 'AAAAAEAaaaaaeaOOOOOOEOoooooeoEEEEeeeeðCcÐIIIIiiiiUUUUEuuuueNnSsYyyZz', |
||
27 | $instance |
||
28 | ); |
||
29 | } |
||
30 | |||
31 | public function testToLowercase() { |
||
32 | |||
33 | $instance = new Sanitizer( 'ÀÁÂÃÄÅ ABC 텍스트의 テスト часто הוא פשוט' ); |
||
34 | $instance->toLowercase(); |
||
35 | |||
36 | $this->assertEquals( |
||
37 | 'àáâãäå abc 텍스트의 テスト часто הוא פשוט', |
||
38 | $instance |
||
39 | ); |
||
40 | } |
||
41 | |||
42 | public function testReduceLengthTo() { |
||
43 | |||
44 | $instance = new Sanitizer( 'ABCDEF' ); |
||
45 | $instance->reduceLengthTo( 3 ); |
||
46 | |||
47 | $this->assertEquals( |
||
48 | 3, |
||
49 | mb_strlen( $instance ) |
||
50 | ); |
||
51 | |||
52 | $instance->reduceLengthTo( 10 ); |
||
53 | |||
54 | $this->assertEquals( |
||
55 | 3, |
||
56 | mb_strlen( $instance ) |
||
57 | ); |
||
58 | } |
||
59 | |||
60 | public function testReduceLengthToNearestWholeWordForLatinString() { |
||
61 | |||
62 | $instance = new Sanitizer( 'abc def gh in 123' ); |
||
63 | $instance->reduceLengthTo( 12 ); |
||
64 | |||
65 | $this->assertEquals( |
||
66 | 10, |
||
67 | mb_strlen( $instance ) |
||
68 | ); |
||
69 | |||
70 | $this->assertEquals( |
||
71 | 'abc def gh', |
||
72 | $instance |
||
73 | ); |
||
74 | } |
||
75 | |||
76 | public function testReduceLengthToNearestWholeWordForNonLatinString() { |
||
77 | |||
78 | if ( version_compare( phpversion(), '5.4', '<' ) ) { |
||
79 | $this->markTestSkipped( |
||
80 | "Boo, PHP 5.3 returns with `Failed asserting that 9 matches expected 3`" |
||
81 | ); |
||
82 | } |
||
83 | |||
84 | $instance = new Sanitizer( '一 二 三' ); |
||
85 | $instance->reduceLengthTo( 3 ); |
||
86 | |||
87 | $this->assertEquals( |
||
88 | 3, |
||
89 | mb_strlen( $instance ) |
||
90 | ); |
||
91 | |||
92 | $this->assertEquals( |
||
93 | '一 二', |
||
94 | $instance |
||
95 | ); |
||
96 | } |
||
97 | |||
98 | public function testToContainKoreanCharacters() { |
||
99 | |||
100 | $instance = new Sanitizer( '한국어 텍스트의 예' ); |
||
101 | |||
102 | $this->assertTrue( |
||
103 | $instance->containsKoreanCharacters() |
||
104 | ); |
||
105 | } |
||
106 | |||
107 | public function testToContainJapaneseCharacters() { |
||
108 | |||
109 | $instance = new Sanitizer( 'IQテスト' ); |
||
110 | |||
111 | $this->assertTrue( |
||
112 | $instance->containsJapaneseCharacters() |
||
113 | ); |
||
114 | } |
||
115 | |||
116 | public function testToContainChineseCharacters() { |
||
117 | |||
118 | $instance = new Sanitizer( '才可以过关' ); |
||
119 | |||
120 | $this->assertTrue( |
||
121 | $instance->containsChineseCharacters() |
||
122 | ); |
||
123 | } |
||
124 | |||
125 | public function testSanitizeByStopwords() { |
||
126 | |||
127 | $instance = new Sanitizer( 'Foo bar foobar' ); |
||
128 | |||
129 | $stopwordAnalyzer = new StopwordAnalyzer(); |
||
130 | $stopwordAnalyzer->setCustomStopwordList( array( 'zh' => array( 'bar' ) ) ); |
||
131 | |||
132 | $this->assertEquals( |
||
133 | 'Foo foobar', |
||
134 | $instance->sanitizeBy( $stopwordAnalyzer ) |
||
135 | ); |
||
136 | } |
||
137 | |||
138 | public function testTrySanitizeByStopwordsForNoAvailableToken() { |
||
139 | |||
140 | $instance = new Sanitizer( '' ); |
||
141 | |||
142 | $stopwordAnalyzer = new StopwordAnalyzer(); |
||
143 | |||
144 | $this->assertEquals( |
||
145 | '', |
||
146 | $instance->sanitizeBy( $stopwordAnalyzer ) |
||
147 | ); |
||
148 | } |
||
149 | |||
150 | public function testReplace() { |
||
151 | |||
152 | $instance = new Sanitizer( 'テスト' ); |
||
153 | $instance->replace( array( 'テスト' ), array( 'Test' ) ); |
||
154 | |||
155 | $this->assertEquals( |
||
156 | 'Test', |
||
157 | $instance |
||
158 | ); |
||
159 | } |
||
160 | |||
161 | /** |
||
162 | * @dataProvider stringProvider |
||
163 | */ |
||
164 | public function testGetTokens( $string, $flag, $expected ) { |
||
165 | |||
166 | $instance = new Sanitizer( $string ); |
||
167 | |||
168 | $this->assertEquals( |
||
169 | $expected, |
||
170 | $instance->getTokens( $flag ) |
||
171 | ); |
||
172 | } |
||
173 | |||
174 | public function stringProvider() { |
||
175 | |||
176 | $provider[] = array( |
||
0 ignored issues
–
show
|
|||
177 | 'A test string (that has no);deep meaning', |
||
178 | Tokenizer::LAZY, |
||
179 | array( 'A', 'test', 'string', '(that', 'has', 'no);deep', 'meaning' ) |
||
180 | ); |
||
181 | |||
182 | $provider[] = array( |
||
183 | 'A test string (that has no);deep meaning', |
||
184 | Tokenizer::STRICT, |
||
185 | array( 'A', 'test', 'string', 'that', 'has', 'no' , 'deep', 'meaning' ) |
||
186 | ); |
||
187 | |||
188 | $provider[] = array( |
||
189 | 'Abc def', |
||
190 | null, |
||
191 | false |
||
192 | ); |
||
193 | |||
194 | return $provider; |
||
195 | } |
||
196 | |||
197 | } |
||
198 |
Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.
Let’s take a look at an example:
As you can see in this example, the array
$myArray
is initialized the first time when the foreach loop is entered. You can also see that the value of thebar
key is only written conditionally; thus, its value might result from a previous iteration.This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.