SanitizerFactory   A
last analyzed

Complexity

Total Complexity 25

Size/Duplication

Total Lines 230
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 15

Test Coverage

Coverage 94.83%

Importance

Changes 0
Metric Value
wmc 25
lcom 1
cbo 15
dl 0
loc 230
ccs 55
cts 58
cp 0.9483
rs 9.1666
c 0
b 0
f 0

18 Methods

Rating   Name   Duplication   Size   Complexity  
A newSanitizer() 0 3 1
A newStopwordAnalyzerByLanguage() 0 12 3
A newCdbStopwordAnalyzer() 0 3 1
A newArrayStopwordAnalyzer() 0 3 1
A newNullStopwordAnalyzer() 0 3 1
A newSynonymizerByLanguage() 0 8 2
A newNullSynonymizer() 0 3 1
A newNullLanguageDetector() 0 3 1
A newTextCatLanguageDetector() 0 3 1
A newPreferredTokenizerByLanguage() 0 18 4
A newCJKMatchableTokenizer() 0 14 2
A newIcuWordBoundaryTokenizer() 0 3 1
A newGenericRegExTokenizer() 0 3 1
A newPunctuationRegExTokenizer() 0 3 1
A newJaCompoundGroupTokenizer() 0 3 1
A newJaTinySegmenterTokenizer() 0 3 1
A newCJKSimpleCharacterRegExTokenizer() 0 3 1
A newNGramTokenizer() 0 3 1
1
<?php
2
3
namespace Onoi\Tesa;
4
5
use Onoi\Tesa\StopwordAnalyzer\StopwordAnalyzer;
6
use Onoi\Tesa\StopwordAnalyzer\NullStopwordAnalyzer;
7
use Onoi\Tesa\StopwordAnalyzer\CdbStopwordAnalyzer;
8
use Onoi\Tesa\StopwordAnalyzer\ArrayStopwordAnalyzer;
9
use Onoi\Tesa\Synonymizer\Synonymizer;
10
use Onoi\Tesa\Synonymizer\NullSynonymizer;
11
use Onoi\Tesa\LanguageDetector\NullLanguageDetector;
12
use Onoi\Tesa\LanguageDetector\TextCatLanguageDetector;
13
use Onoi\Tesa\Tokenizer\CJKSimpleCharacterRegExTokenizer;
14
use Onoi\Tesa\Tokenizer\Tokenizer;
15
use Onoi\Tesa\Tokenizer\GenericRegExTokenizer;
16
use Onoi\Tesa\Tokenizer\JaCompoundGroupTokenizer;
17
use Onoi\Tesa\Tokenizer\IcuWordBoundaryTokenizer;
18
use Onoi\Tesa\Tokenizer\NGramTokenizer;
19
use Onoi\Tesa\Tokenizer\JaTinySegmenterTokenizer;
20
use Onoi\Tesa\Tokenizer\PunctuationRegExTokenizer;
21
22
/**
23
 * @license GNU GPL v2+
24
 * @since 0.1
25
 *
26
 * @author mwjames
27
 */
28
class SanitizerFactory {
29
30
	/**
31
	 * @since 0.1
32
	 *
33
	 * @return Sanitizer
34
	 */
35 5
	public function newSanitizer( $text = '' ) {
36 5
		return new Sanitizer( $text );
37 1
	}
38
39
	/* StopwordAnalyzer */
40
41
	/**
42
	 * @since 0.1
43
	 *
44
	 * @param string|null $languageCode
45
	 *
46
	 * @return StopwordAnalyzer
47
	 */
48 2
	public function newStopwordAnalyzerByLanguage( $languageCode = null ) {
49
50 2
		if ( $languageCode === null ) {
51 1
			return $this->newNullStopwordAnalyzer();
52
		}
53
54 1
		$cdbStopwordAnalyzer = $this->newCdbStopwordAnalyzer(
55
			$languageCode
56 1
		);
57
58 1
		return $cdbStopwordAnalyzer->isAvailable() ? $cdbStopwordAnalyzer : $this->newNullStopwordAnalyzer();;
59
	}
60
61
	/**
62
	 * @since 0.1
63
	 *
64
	 * @return StopwordAnalyzer
65
	 */
66 6
	public function newCdbStopwordAnalyzer( $languageCode = null ) {
67 6
		return new CdbStopwordAnalyzer( CdbStopwordAnalyzer::getTargetByLanguage( $languageCode ) );
68
	}
69
70
	/**
71
	 * @since 0.1
72
	 *
73
	 * @param array $stopwords;
0 ignored issues
show
Documentation introduced by
There is no parameter named $stopwords;. Did you maybe mean $stopwords?

This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function. It has, however, found a similar but not annotated parameter which might be a good fit.

Consider the following example. The parameter $ireland is not defined by the method finale(...).

/**
 * @param array $germany
 * @param array $ireland
 */
function finale($germany, $island) {
    return "2:1";
}

The most likely cause is that the parameter was changed, but the annotation was not.

Loading history...
74
	 *
75
	 * @return StopwordAnalyzer
76
	 */
77 1
	public function newArrayStopwordAnalyzer( array $stopwords = array() ) {
78 1
		return new ArrayStopwordAnalyzer( $stopwords );
79
	}
80
81
	/**
82
	 * @since 0.1
83
	 *
84
	 * @return StopwordAnalyzer
85
	 */
86 2
	public function newNullStopwordAnalyzer() {
87 2
		return new NullStopwordAnalyzer();
88
	}
89
90
	/**
91
	 * @since 0.1
92
	 *
93
	 * @param string|null $languageCode
94
	 *
95
	 * @return Synonymizer
96
	 */
97 1
	public function newSynonymizerByLanguage( $languageCode = null ) {
98
99 1
		if ( $languageCode === null ) {
100 1
			return $this->newNullSynonymizer();
101
		}
102
103
		return $this->newNullSynonymizer();;
104
	}
105
106
	/* Synonymizer */
107
108
	/**
109
	 * @since 0.1
110
	 *
111
	 * @return Synonymizer
112
	 */
113 6
	public function newNullSynonymizer() {
114 6
		return new NullSynonymizer();
115
	}
116
117
	/* LanguageDetector */
118
119
	/**
120
	 * @since 0.1
121
	 *
122
	 * @return NullLanguageDetector
123
	 */
124 1
	public function newNullLanguageDetector() {
125 1
		return new NullLanguageDetector();
126
	}
127
128
	/**
129
	 * @since 0.1
130
	 *
131
	 * @return TextCatLanguageDetector
132
	 */
133 1
	public function newTextCatLanguageDetector() {
134 1
		return new TextCatLanguageDetector();
135
	}
136
137
	/* Tokenizer */
138
139
	/**
140
	 * @since 0.1
141
	 *
142
	 * @param string $text
143
	 * @param string|null $languageCode
144
	 *
145
	 * @return Tokenizer
146
	 */
147 1
	public function newPreferredTokenizerByLanguage( $text, $languageCode = null ) {
148
149 1
		$tokenizer = $this->newIcuWordBoundaryTokenizer();
150
151 1
		if ( !$tokenizer->isAvailable() && CharacterExaminer::contains( CharacterExaminer::CJK_UNIFIED, $text ) ) {
152
			return $this->newCJKMatchableTokenizer( $text );
153 1
		} elseif( !$tokenizer->isAvailable() ) {
154
			return $this->newGenericRegExTokenizer( $tokenizer );
155
		}
156
157 1
		$tokenizer->setLocale( $languageCode );
158
159 1
		$tokenizer->setWordTokenizerAttribute(
0 ignored issues
show
Bug introduced by
The method setWordTokenizerAttribute() does not exist on Onoi\Tesa\Tokenizer\Tokenizer. Did you maybe mean tokenize()?

This check marks calls to methods that do not seem to exist on an object.

This is most likely the result of a method being renamed without all references to it being renamed likewise.

Loading history...
160 1
			!CharacterExaminer::contains( CharacterExaminer::CJK_UNIFIED, $text )
161 1
		);
162
163 1
		return $this->newGenericRegExTokenizer( $tokenizer );
164
	}
165
166
	/**
167
	 * @since 0.1
168
	 *
169
	 * @param string $text
170
	 *
171
	 * @return Tokenizer
172
	 */
173 1
	public function newCJKMatchableTokenizer( $text ) {
174
175 1
		$tokenizer = null;
176
177 1
		if ( CharacterExaminer::contains( CharacterExaminer::HIRAGANA_KATAKANA, $text ) ) {
178 1
			$tokenizer = $this->newJaTinySegmenterTokenizer();
179 1
		} else {
180 1
			$tokenizer = $this->newNGramTokenizer( $tokenizer );
181
		}
182
183 1
		$tokenizer = $this->newCJKSimpleCharacterRegExTokenizer( $tokenizer );
184
185 1
		return $this->newGenericRegExTokenizer( $tokenizer );
186
	}
187
188
	/**
189
	 * @since 0.1
190
	 *
191
	 * @param Tokenizer|null $tokenizer
192
	 *
193
	 * @return Tokenizer
194
	 */
195 2
	public function newIcuWordBoundaryTokenizer( Tokenizer $tokenizer = null ) {
196 2
		return new IcuWordBoundaryTokenizer( $tokenizer );
197
	}
198
199
	/**
200
	 * @since 0.1
201
	 *
202
	 * @param Tokenizer|null $tokenizer
203
	 *
204
	 * @return Tokenizer
205
	 */
206 8
	public function newGenericRegExTokenizer( Tokenizer $tokenizer = null ) {
207 8
		return new GenericRegExTokenizer( $tokenizer );
208
	}
209
210
	/**
211
	 * @since 0.1
212
	 *
213
	 * @param Tokenizer|null $tokenizer
214
	 *
215
	 * @return Tokenizer
216
	 */
217 2
	public function newPunctuationRegExTokenizer( Tokenizer $tokenizer = null ) {
218 2
		return new PunctuationRegExTokenizer( $tokenizer );
219
	}
220
221
	/**
222
	 * @since 0.1
223
	 *
224
	 * @return Tokenizer
225
	 */
226 1
	public function newJaCompoundGroupTokenizer( Tokenizer $tokinizer = null ) {
227 1
		return new JaCompoundGroupTokenizer( $tokinizer );
228
	}
229
230
	/**
231
	 * @since 0.1
232
	 *
233
	 * @return Tokenizer
234
	 */
235 3
	public function newJaTinySegmenterTokenizer( Tokenizer $tokinizer = null ) {
236 3
		return new JaTinySegmenterTokenizer( $tokinizer );
237
	}
238
239
	/**
240
	 * @since 0.1
241
	 *
242
	 * @return Tokenizer
243
	 */
244 2
	public function newCJKSimpleCharacterRegExTokenizer( Tokenizer $tokinizer = null ) {
245 2
		return new CJKSimpleCharacterRegExTokenizer( $tokinizer );
246
	}
247
248
	/**
249
	 * @since 0.1
250
	 *
251
	 * @return Tokenizer
252
	 */
253 3
	public function newNGramTokenizer( Tokenizer $tokinizer = null, $ngram = 2 ) {
254 3
		return new NGramTokenizer( $tokinizer, $ngram );
255
	}
256
257
}
258