1 | <?php |
||
11 | class NGramTokenizer implements Tokenizer { |
||
12 | |||
13 | /** |
||
14 | * @var Tokenizer |
||
15 | */ |
||
16 | private $tokenizer; |
||
17 | |||
18 | /** |
||
19 | * @var integer |
||
20 | */ |
||
21 | private $ngramSize = 2; |
||
22 | |||
23 | /** |
||
24 | * @var boolean |
||
25 | */ |
||
26 | private $withMarker = false; |
||
27 | |||
28 | /** |
||
29 | * @since 0.1 |
||
30 | * |
||
31 | * @param Tokenizer $tokenizer |
||
32 | * @param integer $ngramSize |
||
33 | */ |
||
34 | 13 | public function __construct( Tokenizer $tokenizer = null, $ngramSize = 2 ) { |
|
38 | |||
39 | /** |
||
40 | * @since 0.1 |
||
41 | * |
||
42 | * @param boolean $withMarker |
||
43 | */ |
||
44 | 3 | public function withMarker( $withMarker ) { |
|
47 | |||
48 | /** |
||
49 | * @since 0.1 |
||
50 | * |
||
51 | * @param integer $ngramSize |
||
52 | */ |
||
53 | 1 | public function setNgramSize( $ngramSize ) { |
|
56 | |||
57 | /** |
||
58 | * @since 0.1 |
||
59 | * |
||
60 | * {@inheritDoc} |
||
61 | */ |
||
62 | 1 | public function setOption( $name, $value ) { |
|
67 | |||
68 | /** |
||
69 | * @since 0.1 |
||
70 | * |
||
71 | * {@inheritDoc} |
||
72 | */ |
||
73 | 8 | public function isWordTokenizer() { |
|
76 | |||
77 | /** |
||
78 | * @since 0.1 |
||
79 | * |
||
80 | * {@inheritDoc} |
||
81 | */ |
||
82 | 12 | public function tokenize( $string ) { |
|
96 | |||
97 | 12 | private function createNGrams( $text, $ngramSize, $withMarker ) { |
|
128 | |||
129 | } |
||
130 |