Conditions | 5 |
Paths | 9 |
Total Lines | 36 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
1 | <?php |
||
15 | public function tokenize(string $text): array |
||
16 | { |
||
17 | $words = parent::tokenize($text); |
||
18 | |||
19 | $length = 1; |
||
20 | foreach ($words as $word) { |
||
21 | $candidate_length = strlen($word); |
||
22 | if ($candidate_length > $length) { |
||
23 | $length = $candidate_length; |
||
24 | } |
||
25 | } |
||
26 | |||
27 | $ngramsFactory = new \drupol\phpngrams\NGrams(); |
||
28 | |||
29 | $ngram_dataset = []; |
||
30 | foreach ($words as $word) { |
||
31 | $length = strlen($word); |
||
32 | |||
33 | for ($i = 1; $i <= $length; $i++) { |
||
34 | $ngram_dataset = array_merge( |
||
35 | $ngram_dataset, |
||
36 | array_map( |
||
37 | 'implode', |
||
38 | iterator_to_array( |
||
39 | $ngramsFactory->ngrams( |
||
40 | str_split($word), |
||
41 | $i |
||
42 | ) |
||
43 | ) |
||
44 | ) |
||
45 | ); |
||
46 | } |
||
47 | } |
||
48 | |||
49 | return $ngram_dataset; |
||
50 | } |
||
51 | } |
||
52 |