for testing and deploying your application
for finding and fixing issues
for empowering human code reviews
<?php
declare(strict_types=1);
namespace Phpml\Tokenization;
/**
* Class NGramTokenizer
*/
class NGramTokenizer extends WordTokenizer
{
* {@inheritdoc}
public function tokenize(string $text): array
$words = parent::tokenize($text);
$length = 1;
foreach ($words as $word) {
$candidate_length = strlen($word);
if ($candidate_length > $length) {
$length = $candidate_length;
}
$ngramsFactory = new \drupol\phpngrams\NGrams();
$ngram_dataset = [];
$length = strlen($word);
for ($i = 1; $i <= $length; $i++) {
$ngram_dataset = array_merge(
$ngram_dataset,
array_map(
'implode',
iterator_to_array(
$ngramsFactory->ngrams(
str_split($word),
$i
)
);
return $ngram_dataset;