Conditions | 4 |
Paths | 4 |
Total Lines | 19 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
1 | <?php |
||
17 | public function tokenize(string $text): array |
||
18 | { |
||
19 | $words = parent::tokenize($text); |
||
20 | |||
21 | $ngramsFactory = new NGrams(); |
||
22 | |||
23 | $ngram_dataset = []; |
||
24 | foreach ($words as $word) { |
||
25 | $length = strlen($word); |
||
26 | |||
27 | for ($i = 1; $i <= $length; $i++) { |
||
28 | foreach ($ngramsFactory->ngrams(str_split($word), $i) as $ngram) { |
||
29 | $ngram_dataset[] = implode('', $ngram); |
||
30 | } |
||
31 | } |
||
32 | } |
||
33 | |||
34 | return $ngram_dataset; |
||
35 | } |
||
36 | } |
||
37 |