Conditions | 4 |
Paths | 4 |
Total Lines | 19 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
1 | <?php |
||
15 | public function tokenize(string $text): array |
||
16 | { |
||
17 | $words = parent::tokenize($text); |
||
18 | |||
19 | $ngramsFactory = new \drupol\phpngrams\NGrams(); |
||
20 | |||
21 | $ngram_dataset = []; |
||
22 | foreach ($words as $word) { |
||
23 | $length = strlen($word); |
||
24 | |||
25 | for ($i = 1; $i <= $length; $i++) { |
||
26 | foreach ($ngramsFactory->ngrams(str_split($word), $i) as $ngram) { |
||
27 | $ngram_dataset[] = implode('', $ngram); |
||
28 | } |
||
29 | } |
||
30 | } |
||
31 | |||
32 | return $ngram_dataset; |
||
33 | } |
||
34 | } |
||
35 |