This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include
, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | namespace Onoi\Tesa\Tokenizer; |
||
4 | |||
5 | use RuntimeException; |
||
6 | |||
7 | /** |
||
8 | * PHP Version of the TinySegmenter as a super compact Japanese tokenizer. |
||
9 | * - https://github.com/setchi/codeute/blob/71c09c86cd1ce1cf9c8ca4d20b1db60b3784227a/fuel/app/classes/model/lib/tiny_segmenter.php |
||
10 | * |
||
11 | * TinySegmenter was originally developed by Taku Kudo <taku(at)chasen.org>. |
||
12 | * Pulished under the BSD license http://chasen.org/~taku/software/TinySegmenter/LICENCE.txt |
||
13 | * |
||
14 | * PHP Version was developed by xnights <programming.magic(at)gmail.com>. |
||
15 | * For details, see http://programming-magic.com/?id=172 |
||
16 | * |
||
17 | * The model is based on the http://research.nii.ac.jp/src/list.html corpus |
||
18 | * together with an optimized L1-norm regularization. |
||
19 | * |
||
20 | * - https://github.com/shogo82148/TinySegmenterMaker |
||
21 | * |
||
22 | * @since 0.1 |
||
23 | */ |
||
24 | class JaTinySegmenterTokenizer implements Tokenizer { |
||
25 | |||
26 | private $patterns_ = array( |
||
27 | "[一二三四五六七八九十百千万億兆]"=>"M", // numbers (japanese) |
||
28 | "[一-龠々〆ヵヶ]"=>"H", // kanji & misc characters |
||
29 | "[ぁ-ん]"=>"I", // hiragana |
||
30 | "[ァ-ヴーア-ン゙ー]"=>"K", // katakana |
||
31 | "[a-zA-Za-zA-Z]"=>"A", // ascii / romaji letters |
||
32 | "[0-90-9]"=>"N", // ascii / romaji numbers |
||
33 | ); |
||
34 | |||
35 | /** |
||
36 | * @var Tokenizer |
||
37 | */ |
||
38 | private $tokenizer; |
||
39 | |||
40 | /** |
||
41 | * This is kept static on purpose. |
||
42 | * @var array |
||
43 | */ |
||
44 | private static $model; |
||
45 | |||
46 | /** |
||
47 | * @var string |
||
48 | */ |
||
49 | private $modelFile; |
||
0 ignored issues
–
show
|
|||
50 | |||
51 | /** |
||
52 | * @since 0.1 |
||
53 | * |
||
54 | * @param Tokenizer $tokenizer |
||
55 | */ |
||
56 | 9 | public function __construct( Tokenizer $tokenizer = null ) { |
|
57 | 9 | $this->tokenizer = $tokenizer; |
|
58 | 9 | } |
|
59 | |||
60 | /** |
||
61 | * @since 0.1 |
||
62 | * |
||
63 | * {@inheritDoc} |
||
64 | */ |
||
65 | public function setOption( $name, $value ) { |
||
66 | if ( $this->tokenizer !== null ) { |
||
67 | $this->tokenizer->setOption( $name, $value ); |
||
68 | } |
||
69 | } |
||
70 | |||
71 | /** |
||
72 | * @since 0.1 |
||
73 | * |
||
74 | * {@inheritDoc} |
||
75 | */ |
||
76 | public function isWordTokenizer() { |
||
77 | return false; |
||
78 | } |
||
79 | |||
80 | /** |
||
81 | * @since 0.1 |
||
82 | * |
||
83 | * {@inheritDoc} |
||
84 | */ |
||
85 | 8 | public function tokenize( $string ) { |
|
86 | |||
87 | 8 | if ( $this->tokenizer !== null ) { |
|
88 | 1 | $string = implode( " ", $this->tokenizer->tokenize( $string ) ); |
|
89 | 1 | } |
|
90 | |||
91 | 8 | return $this->loadModel()->segment( $string ); |
|
92 | } |
||
93 | |||
94 | 8 | private function loadModel() { |
|
95 | |||
96 | 8 | if ( self::$model !== null ) { |
|
97 | 7 | return $this; |
|
98 | } |
||
99 | |||
100 | 1 | $contents = null; |
|
0 ignored issues
–
show
$contents is not used, you could remove the assignment.
This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently. $myVar = 'Value';
$higher = false;
if (rand(1, 6) > 3) {
$higher = true;
} else {
$higher = false;
}
Both the
Loading history...
|
|||
101 | 1 | $file = __DIR__ . '/model/rwcp.model.json'; |
|
102 | |||
103 | 1 | if ( ( $contents = @file_get_contents( $file ) ) !== false ) { |
|
104 | 1 | self::$model = json_decode( $contents, true ); |
|
0 ignored issues
–
show
It seems like
json_decode($contents, true) of type * is incompatible with the declared type array of property $model .
Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property. Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..
Loading history...
|
|||
105 | 1 | } |
|
106 | |||
107 | 1 | if ( $contents === false || json_last_error() !== JSON_ERROR_NONE ) { |
|
108 | throw new RuntimeException( "Couldn't read the model from {$file}." ); |
||
109 | } |
||
110 | |||
111 | 1 | return $this; |
|
112 | } |
||
113 | |||
114 | 8 | protected function segment( $input, $encoding = null ) { |
|
115 | |||
116 | 8 | if ( !$input ) { |
|
117 | return array(); |
||
118 | } |
||
119 | |||
120 | 8 | if ( !$encoding ) { |
|
121 | 8 | $encoding = mb_detect_encoding( $input ); |
|
122 | 8 | } |
|
123 | |||
124 | 8 | if ( $encoding !== 'UTF-8' ) { |
|
125 | $input = mb_convert_encoding( $input, 'UTF-8', $encoding ); |
||
126 | } |
||
127 | |||
128 | 8 | $result = array(); |
|
129 | 8 | $seg = array( "B3", "B2", "B1" ); |
|
130 | |||
131 | 8 | $ctype = array( "O", "O", "O" ); |
|
132 | 8 | $o = $this->mb_string_to_array_( $input ); |
|
133 | |||
134 | 8 | for ( $i = 0; $i<count($o); ++$i ) { |
|
0 ignored issues
–
show
It seems like you are calling the size function
count() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.
If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration: for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}
// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
Consider avoiding function calls on each iteration of the
for loop.
If you have a function call in the test part of a // count() is called on each iteration
for ($i=0; $i < count($collection); $i++) { }
// count() is only called once
for ($i=0, $c=count($collection); $i<$c; $i++) { }
Loading history...
|
|||
135 | 8 | $seg[] = $o[$i]; |
|
136 | 8 | $ctype[] = $this->ctype_( $o[$i] ); |
|
137 | 8 | } |
|
138 | |||
139 | 8 | $seg[] = "E1"; |
|
140 | 8 | $seg[] = "E2"; |
|
141 | 8 | $seg[] = "E3"; |
|
142 | 8 | $ctype[] = "O"; |
|
143 | 8 | $ctype[] = "O"; |
|
144 | 8 | $ctype[] = "O"; |
|
145 | 8 | $word = $seg[3]; |
|
146 | 8 | $p1 = "U"; |
|
147 | 8 | $p2 = "U"; |
|
148 | 8 | $p3 = "U"; |
|
149 | |||
150 | 8 | for($i = 4; $i<count($seg)-3; ++$i){ |
|
0 ignored issues
–
show
Consider avoiding function calls on each iteration of the
for loop.
If you have a function call in the test part of a // count() is called on each iteration
for ($i=0; $i < count($collection); $i++) { }
// count() is only called once
for ($i=0, $c=count($collection); $i<$c; $i++) { }
Loading history...
|
|||
151 | 8 | $score = self::$model["BIAS"]; |
|
152 | 8 | $w1 = $seg[$i-3]; |
|
153 | 8 | $w2 = $seg[$i-2]; |
|
154 | 8 | $w3 = $seg[$i-1]; |
|
155 | 8 | $w4 = $seg[$i]; |
|
156 | 8 | $w5 = $seg[$i+1]; |
|
157 | 8 | $w6 = $seg[$i+2]; |
|
158 | 8 | $c1 = $ctype[$i-3]; |
|
159 | 8 | $c2 = $ctype[$i-2]; |
|
160 | 8 | $c3 = $ctype[$i-1]; |
|
161 | 8 | $c4 = $ctype[$i]; |
|
162 | 8 | $c5 = $ctype[$i+1]; |
|
163 | 8 | $c6 = $ctype[$i+2]; |
|
164 | 8 | $score += $this->ts_(@self::$model["UP1"][$p1]); |
|
165 | 8 | $score += $this->ts_(@self::$model["UP2"][$p2]); |
|
166 | 8 | $score += $this->ts_(@self::$model["UP3"][$p3]); |
|
167 | 8 | $score += $this->ts_(@self::$model["BP1"][$p1 . $p2]); |
|
168 | 8 | $score += $this->ts_(@self::$model["BP2"][$p2 . $p3]); |
|
169 | 8 | $score += $this->ts_(@self::$model["UW1"][$w1]); |
|
170 | 8 | $score += $this->ts_(@self::$model["UW2"][$w2]); |
|
171 | 8 | $score += $this->ts_(@self::$model["UW3"][$w3]); |
|
172 | 8 | $score += $this->ts_(@self::$model["UW4"][$w4]); |
|
173 | 8 | $score += $this->ts_(@self::$model["UW5"][$w5]); |
|
174 | 8 | $score += $this->ts_(@self::$model["UW6"][$w6]); |
|
175 | 8 | $score += $this->ts_(@self::$model["BW1"][$w2 . $w3]); |
|
176 | 8 | $score += $this->ts_(@self::$model["BW2"][$w3 . $w4]); |
|
177 | 8 | $score += $this->ts_(@self::$model["BW3"][$w4 . $w5]); |
|
178 | 8 | $score += $this->ts_(@self::$model["TW1"][$w1 . $w2 . $w3]); |
|
179 | 8 | $score += $this->ts_(@self::$model["TW2"][$w2 . $w3 . $w4]); |
|
180 | 8 | $score += $this->ts_(@self::$model["TW3"][$w3 . $w4 . $w5]); |
|
181 | 8 | $score += $this->ts_(@self::$model["TW4"][$w4 . $w5 . $w6]); |
|
182 | 8 | $score += $this->ts_(@self::$model["UC1"][$c1]); |
|
183 | 8 | $score += $this->ts_(@self::$model["UC2"][$c2]); |
|
184 | 8 | $score += $this->ts_(@self::$model["UC3"][$c3]); |
|
185 | 8 | $score += $this->ts_(@self::$model["UC4"][$c4]); |
|
186 | 8 | $score += $this->ts_(@self::$model["UC5"][$c5]); |
|
187 | 8 | $score += $this->ts_(@self::$model["UC6"][$c6]); |
|
188 | 8 | $score += $this->ts_(@self::$model["BC1"][$c2 . $c3]); |
|
189 | 8 | $score += $this->ts_(@self::$model["BC2"][$c3 . $c4]); |
|
190 | 8 | $score += $this->ts_(@self::$model["BC3"][$c4 . $c5]); |
|
191 | 8 | $score += $this->ts_(@self::$model["TC1"][$c1 . $c2 . $c3]); |
|
192 | 8 | $score += $this->ts_(@self::$model["TC2"][$c2 . $c3 . $c4]); |
|
193 | 8 | $score += $this->ts_(@self::$model["TC3"][$c3 . $c4 . $c5]); |
|
194 | 8 | $score += $this->ts_(@self::$model["TC4"][$c4 . $c5 . $c6]); |
|
195 | // $score += $this->ts_(@self::$model["TC5"][$c4 . $c5 . $c6]); |
||
196 | 8 | $score += $this->ts_(@self::$model["UQ1"][$p1 . $c1]); |
|
197 | 8 | $score += $this->ts_(@self::$model["UQ2"][$p2 . $c2]); |
|
198 | 8 | $score += $this->ts_(@self::$model["UQ1"][$p3 . $c3]); |
|
199 | 8 | $score += $this->ts_(@self::$model["BQ1"][$p2 . $c2 . $c3]); |
|
200 | 8 | $score += $this->ts_(@self::$model["BQ2"][$p2 . $c3 . $c4]); |
|
201 | 8 | $score += $this->ts_(@self::$model["BQ3"][$p3 . $c2 . $c3]); |
|
202 | 8 | $score += $this->ts_(@self::$model["BQ4"][$p3 . $c3 . $c4]); |
|
203 | 8 | $score += $this->ts_(@self::$model["TQ1"][$p2 . $c1 . $c2 . $c3]); |
|
204 | 8 | $score += $this->ts_(@self::$model["TQ2"][$p2 . $c2 . $c3 . $c4]); |
|
205 | 8 | $score += $this->ts_(@self::$model["TQ3"][$p3 . $c1 . $c2 . $c3]); |
|
206 | 8 | $score += $this->ts_(@self::$model["TQ4"][$p3 . $c2 . $c3 . $c4]); |
|
207 | |||
208 | 8 | $p = "O"; |
|
209 | |||
210 | 8 | if ( $score > 0 ) { |
|
211 | |||
212 | 8 | if ( $word !== '' && $word !== ' ' ) { |
|
213 | 8 | $result[] = $word; |
|
214 | 8 | } |
|
215 | |||
216 | 8 | $word = ""; |
|
217 | 8 | $p = "B"; |
|
218 | 8 | } |
|
219 | |||
220 | 8 | $p1 = $p2; |
|
221 | 8 | $p2 = $p3; |
|
222 | 8 | $p3 = $p; |
|
223 | |||
224 | 8 | if ( $seg[$i] !== '' && $seg[$i] !== ' ' ) { |
|
225 | 8 | $word .= $seg[$i]; |
|
226 | 8 | } |
|
227 | 8 | } |
|
228 | |||
229 | 8 | $result[] = $word; |
|
230 | |||
231 | 8 | if ( $encoding !== 'UTF-8') { |
|
232 | foreach( $result as &$str ) { |
||
233 | $str = mb_convert_encoding( $str, $encoding, 'UTF-8' ); |
||
234 | } |
||
235 | } |
||
236 | |||
237 | 8 | return $result; |
|
238 | } |
||
239 | |||
240 | 8 | private function ctype_( $str ) { |
|
241 | |||
242 | 8 | foreach( $this->patterns_ as $pattern => $type ) { |
|
243 | 8 | if( preg_match( '/'.$pattern.'/u', $str ) ) { |
|
244 | 8 | return $type; |
|
245 | } |
||
246 | 8 | } |
|
247 | |||
248 | 6 | return "O"; |
|
249 | } |
||
250 | |||
251 | 8 | private function ts_( $v ) { |
|
252 | 8 | return $v ? $v : 0; |
|
253 | } |
||
254 | |||
255 | 8 | private function mb_string_to_array_( $str, $encoding = 'UTF-8' ) { |
|
256 | |||
257 | 8 | $result = array(); |
|
258 | 8 | $length = mb_strlen( $str, $encoding ); |
|
259 | |||
260 | 8 | for ( $i=0; $i < $length; ++$i ) { |
|
261 | 8 | $result[] = mb_substr( $str, $i, 1, $encoding ); |
|
262 | 8 | } |
|
263 | |||
264 | 8 | return $result; |
|
265 | } |
||
266 | |||
267 | } |
||
268 |
This check marks private properties in classes that are never used. Those properties can be removed.