Total Complexity | 10 |
Total Lines | 54 |
Duplicated Lines | 0 % |
Changes | 1 | ||
Bugs | 0 | Features | 0 |
1 | <?php |
||
9 | class NGramWordTokenizer extends WordTokenizer |
||
10 | { |
||
11 | /** |
||
12 | * @var int |
||
13 | */ |
||
14 | private $minGram; |
||
15 | |||
16 | /** |
||
17 | * @var int |
||
18 | */ |
||
19 | private $maxGram; |
||
20 | |||
21 | public function __construct(int $minGram = 1, int $maxGram = 2) |
||
22 | { |
||
23 | if ($minGram < 1 || $maxGram < 1 || $minGram > $maxGram) { |
||
24 | throw new InvalidArgumentException(sprintf('Invalid (%s, %s) minGram and maxGram value combination', $minGram, $maxGram)); |
||
25 | } |
||
26 | |||
27 | $this->minGram = $minGram; |
||
28 | $this->maxGram = $maxGram; |
||
29 | } |
||
30 | |||
31 | /** |
||
32 | * {@inheritdoc} |
||
33 | */ |
||
34 | public function tokenize(string $text): array |
||
46 | } |
||
47 | |||
48 | private function getNgrams(array $match, int $n = 2): array |
||
63 | } |
||
64 | } |
||
65 |