Conditions | 5 |
Paths | 7 |
Total Lines | 20 |
Lines | 0 |
Ratio | 0 % |
Changes | 0 |
1 | <?php |
||
40 | public function tokenize(string $text): array |
||
41 | { |
||
42 | $ngram_dataset = []; |
||
43 | |||
44 | foreach (parent::tokenize($text) as $word) { |
||
45 | $lengths = range(1, strlen($word), 1); |
||
46 | |||
47 | if ($this->sizes !== null) { |
||
48 | $lengths = $this->sizes; |
||
49 | } |
||
50 | |||
51 | foreach ($lengths as $length) { |
||
52 | foreach ($this->ngramsFactory->ngrams(str_split($word), $length) as $ngram) { |
||
53 | $ngram_dataset[] = implode('', $ngram); |
||
54 | } |
||
55 | } |
||
56 | } |
||
57 | |||
58 | return $ngram_dataset; |
||
59 | } |
||
60 | } |
||
61 |