Test Setup Failed
Pull Request — master (#350)
by Pol
04:18 queued 01:53
created

NGramTokenizer::tokenize()   A

Complexity

Conditions 4
Paths 4

Size

Total Lines 19

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 19
rs 9.6333
c 0
b 0
f 0
cc 4
nc 4
nop 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Tokenization;
6
7
/**
8
 * Class NGramTokenizer
9
 */
10
class NGramTokenizer extends WordTokenizer
11
{
12
    /**
13
     * {@inheritdoc}
14
     */
15
    public function tokenize(string $text): array
16
    {
17
        $words = parent::tokenize($text);
18
19
        $ngramsFactory = new \drupol\phpngrams\NGrams();
20
21
        $ngram_dataset = [];
22
        foreach ($words as $word) {
23
            $length = strlen($word);
24
25
            for ($i = 1; $i <= $length; $i++) {
26
                foreach ($ngramsFactory->ngrams(str_split($word), $i) as $ngram) {
27
                    $ngram_dataset[] = implode('', $ngram);
28
                }
29
            }
30
        }
31
32
        return $ngram_dataset;
33
    }
34
}
35