| Conditions | 5 |
| Paths | 9 |
| Total Lines | 36 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
| 1 | <?php |
||
| 15 | public function tokenize(string $text): array |
||
| 16 | { |
||
| 17 | $words = parent::tokenize($text); |
||
| 18 | |||
| 19 | $length = 1; |
||
| 20 | foreach ($words as $word) { |
||
| 21 | $candidate_length = strlen($word); |
||
| 22 | if ($candidate_length > $length) { |
||
| 23 | $length = $candidate_length; |
||
| 24 | } |
||
| 25 | } |
||
| 26 | |||
| 27 | $ngramsFactory = new \drupol\phpngrams\NGrams(); |
||
| 28 | |||
| 29 | $ngram_dataset = []; |
||
| 30 | foreach ($words as $word) { |
||
| 31 | $length = strlen($word); |
||
| 32 | |||
| 33 | for ($i = 1; $i <= $length; $i++) { |
||
| 34 | $ngram_dataset = array_merge( |
||
| 35 | $ngram_dataset, |
||
| 36 | array_map( |
||
| 37 | 'implode', |
||
| 38 | iterator_to_array( |
||
| 39 | $ngramsFactory->ngrams( |
||
| 40 | str_split($word), |
||
| 41 | $i |
||
| 42 | ) |
||
| 43 | ) |
||
| 44 | ) |
||
| 45 | ); |
||
| 46 | } |
||
| 47 | } |
||
| 48 | |||
| 49 | return $ngram_dataset; |
||
| 50 | } |
||
| 51 | } |
||
| 52 |