This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | namespace Onoi\Tesa\Tests; |
||
4 | |||
5 | use Onoi\Tesa\Tokenizer\IcuWordBoundaryTokenizer; |
||
6 | |||
7 | /** |
||
8 | * @covers \Onoi\Tesa\Tokenizer\IcuWordBoundaryTokenizer |
||
9 | * @group onoi-tesa |
||
10 | * |
||
11 | * @license GNU GPL v2+ |
||
12 | * @since 0.1 |
||
13 | * |
||
14 | * @author mwjames |
||
15 | */ |
||
16 | class IcuWordBoundaryTokenizerTest extends \PHPUnit_Framework_TestCase { |
||
17 | |||
18 | protected function setUp() { |
||
19 | $instance = new IcuWordBoundaryTokenizer(); |
||
20 | |||
21 | if ( !$instance->isAvailable() || INTL_ICU_VERSION != '54.1' ) { |
||
22 | $this->markTestSkipped( 'ICU extension is not available or does not match the expected version constraint.' ); |
||
23 | } |
||
24 | } |
||
25 | |||
26 | public function testCanConstruct() { |
||
27 | |||
28 | $this->assertInstanceOf( |
||
29 | '\Onoi\Tesa\Tokenizer\IcuWordBoundaryTokenizer', |
||
30 | new IcuWordBoundaryTokenizer() |
||
31 | ); |
||
32 | } |
||
33 | |||
34 | /** |
||
35 | * @dataProvider stringProvider |
||
36 | */ |
||
37 | public function testTokenize( $string, $expected ) { |
||
38 | |||
39 | $instance = new IcuWordBoundaryTokenizer(); |
||
40 | |||
41 | $this->assertEquals( |
||
42 | $expected, |
||
43 | $instance->tokenize( $string ) |
||
44 | ); |
||
45 | } |
||
46 | |||
47 | public function testSetOption() { |
||
48 | |||
49 | $tokenizer = $this->getMockBuilder( '\Onoi\Tesa\Tokenizer\Tokenizer' ) |
||
50 | ->disableOriginalConstructor() |
||
51 | ->getMockForAbstractClass(); |
||
52 | |||
53 | $tokenizer->expects( $this->once() ) |
||
54 | ->method( 'setOption' ); |
||
55 | |||
56 | $instance = new IcuWordBoundaryTokenizer( |
||
57 | $tokenizer |
||
58 | ); |
||
59 | |||
60 | $instance->setOption( |
||
61 | IcuWordBoundaryTokenizer::REGEX_EXEMPTION, |
||
62 | array( 'Foo' ) |
||
63 | ); |
||
64 | } |
||
65 | |||
66 | public function testGeneralSetters() { |
||
67 | |||
68 | $tokenizer = $this->getMockBuilder( '\Onoi\Tesa\Tokenizer\Tokenizer' ) |
||
69 | ->disableOriginalConstructor() |
||
70 | ->getMockForAbstractClass(); |
||
71 | |||
72 | $instance = new IcuWordBoundaryTokenizer( |
||
73 | $tokenizer |
||
74 | ); |
||
75 | |||
76 | $instance->setLocale( 'en' ); |
||
77 | $instance->setWordTokenizerAttribute( false ); |
||
78 | |||
79 | $this->assertFalse( |
||
80 | $instance->isWordTokenizer() |
||
81 | ); |
||
82 | } |
||
83 | |||
84 | public function stringProvider() { |
||
85 | |||
86 | $provider[] = array( |
||
0 ignored issues
–
show
|
|||
87 | "安全テスト", |
||
88 | array( '安全', 'テスト' ) |
||
89 | ); |
||
90 | |||
91 | // Would expect 'すもも', 'も', 'もも', 'も', 'もも', 'の', 'うち', '。' |
||
92 | $provider[] = array( |
||
93 | "すもももももももものうち。", |
||
94 | array( 'すもも', 'も', 'も', 'も', 'も', 'も', 'もの', 'うち', '。' ) |
||
95 | ); |
||
96 | |||
97 | $provider[] = array( |
||
98 | "李も桃も桃のうち。", |
||
99 | array( '李', 'も', '桃', 'も', '桃', 'の', 'うち', '。' ) |
||
100 | ); |
||
101 | |||
102 | $provider[] = array( |
||
103 | "إسرائيل", |
||
104 | array( 'إسرائيل' ) |
||
105 | ); |
||
106 | |||
107 | $provider[] = array( |
||
108 | "검색엔ㅇㅏ진", |
||
109 | array( '검색엔', 'ㅇㅏ', '진' ) |
||
110 | ); |
||
111 | |||
112 | $provider[] = array( |
||
113 | "검색엔ㅇㅏ진1234abcdfrA", |
||
114 | array( '검색엔', 'ㅇㅏ', '진', '1234abcdfrA' ) |
||
115 | ); |
||
116 | |||
117 | $provider[] = array( |
||
118 | "1234abcdfrA", |
||
119 | array( '1234abcdfrA' ) |
||
120 | ); |
||
121 | |||
122 | $provider[] = array( |
||
123 | "公明執ようなSNSもストーカー行為の対象に", |
||
124 | array( |
||
125 | '公明', '執よう','な','SNS', 'も', |
||
126 | 'ストーカー', '行為', 'の', '対象', 'に' |
||
127 | ) |
||
128 | ); |
||
129 | |||
130 | $provider[] = array( |
||
131 | "公明執", |
||
132 | array( '公明', '執' ) |
||
133 | ); |
||
134 | |||
135 | $provider[] = array( |
||
136 | "IQテスト", |
||
137 | array( 'IQ', 'テスト' ) |
||
138 | ); |
||
139 | |||
140 | $provider[] = array( |
||
141 | "foo テスト bar", |
||
142 | array( 'foo', 'テスト', 'bar' ) |
||
143 | ); |
||
144 | |||
145 | $provider[] = array( |
||
146 | "foo テスト bar 123abc ^&'", |
||
147 | array( 'foo', 'テスト', 'bar', '123abc', '^', '&', "'" ) |
||
148 | ); |
||
149 | |||
150 | $provider[] = array( |
||
151 | "was discovered in 1957 and first sold as a medication in 1971", |
||
152 | array( |
||
153 | 'was', 'discovered', 'in', '1957', 'and', |
||
154 | 'first', 'sold', 'as', 'a', 'medication', 'in', '1971' |
||
155 | ) |
||
156 | ); |
||
157 | |||
158 | // See JaTinySegmenterTokenizerTest for comparison |
||
159 | $provider[] = array( |
||
160 | '日本語の新聞記事であれば文字単位で95%程度の精度で分かち書きが行えます。 ', |
||
161 | array( |
||
162 | '日本語', 'の', '新聞', '記事', 'で', |
||
163 | 'あれ', 'ば', '文字', '単位', |
||
164 | 'で', '95', '%', '程度', |
||
165 | 'の', '精度', 'で', '分かち書き', |
||
166 | 'が', '行', 'え', 'ます', '。' |
||
167 | ) |
||
168 | ); |
||
169 | |||
170 | return $provider; |
||
171 | } |
||
172 | |||
173 | } |
||
174 |
Adding an explicit array definition is generally preferable to implicit array definition as it guarantees a stable state of the code.
Let’s take a look at an example:
As you can see in this example, the array
$myArray
is initialized the first time when the foreach loop is entered. You can also see that the value of thebar
key is only written conditionally; thus, its value might result from a previous iteration.This might or might not be intended. To make your intention clear, your code more readible and to avoid accidental bugs, we recommend to add an explicit initialization $myArray = array() either outside or inside the foreach loop.