Test Setup Failed
Pull Request — master (#350)
by Pol
04:02 queued 01:29
created

NGramTokenizer   A

Complexity

Total Complexity 4

Size/Duplication

Total Lines 25
Duplicated Lines 0 %

Coupling/Cohesion

Components 0
Dependencies 2

Importance

Changes 0
Metric Value
wmc 4
lcom 0
cbo 2
dl 0
loc 25
rs 10
c 0
b 0
f 0

1 Method

Rating   Name   Duplication   Size   Complexity  
A tokenize() 0 19 4
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Tokenization;
6
7
use drupol\phpngrams\NGrams;
8
9
/**
10
 * Class NGramTokenizer
11
 */
12
class NGramTokenizer extends WordTokenizer
13
{
14
    /**
15
     * {@inheritdoc}
16
     */
17
    public function tokenize(string $text): array
18
    {
19
        $words = parent::tokenize($text);
20
21
        $ngramsFactory = new NGrams();
22
23
        $ngram_dataset = [];
24
        foreach ($words as $word) {
25
            $length = strlen($word);
26
27
            for ($i = 1; $i <= $length; $i++) {
28
                foreach ($ngramsFactory->ngrams(str_split($word), $i) as $ngram) {
29
                    $ngram_dataset[] = implode('', $ngram);
30
                }
31
            }
32
        }
33
34
        return $ngram_dataset;
35
    }
36
}
37