for testing and deploying your application
for finding and fixing issues
for empowering human code reviews
<?php
declare(strict_types=1);
namespace Phpml\Tokenization;
use drupol\phpngrams\NGrams;
/**
* Class NGramTokenizer
*/
class NGramTokenizer extends WordTokenizer
{
* {@inheritdoc}
public function tokenize(string $text): array
$words = parent::tokenize($text);
$ngramsFactory = new NGrams();
$ngram_dataset = [];
foreach ($words as $word) {
$length = strlen($word);
for ($i = 1; $i <= $length; $i++) {
foreach ($ngramsFactory->ngrams(str_split($word), $i) as $ngram) {
$ngram_dataset[] = implode('', $ngram);
}
return $ngram_dataset;