| Conditions | 5 |
| Paths | 7 |
| Total Lines | 20 |
| Lines | 0 |
| Ratio | 0 % |
| Changes | 0 | ||
| 1 | <?php |
||
| 40 | public function tokenize(string $text): array |
||
| 41 | { |
||
| 42 | $ngram_dataset = []; |
||
| 43 | |||
| 44 | foreach (parent::tokenize($text) as $word) { |
||
| 45 | $lengths = range(1, strlen($word), 1); |
||
| 46 | |||
| 47 | if ($this->sizes !== null) { |
||
| 48 | $lengths = $this->sizes; |
||
| 49 | } |
||
| 50 | |||
| 51 | foreach ($lengths as $length) { |
||
| 52 | foreach ($this->ngramsFactory->ngrams(str_split($word), $length) as $ngram) { |
||
| 53 | $ngram_dataset[] = implode('', $ngram); |
||
| 54 | } |
||
| 55 | } |
||
| 56 | } |
||
| 57 | |||
| 58 | return $ngram_dataset; |
||
| 59 | } |
||
| 60 | } |
||
| 61 |