Completed
Push — master ( 9401a4...2cc7a2 )
by mw
13:59
created

SanitizerFactory::newStopwordAnalyzerByLanguage()   A

Complexity

Conditions 3
Paths 3

Size

Total Lines 12
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 12
rs 9.4285
cc 3
eloc 6
nc 3
nop 1
1
<?php
2
3
namespace Onoi\Tesa;
4
5
use Onoi\Tesa\StopwordAnalyzer\StopwordAnalyzer;
6
use Onoi\Tesa\StopwordAnalyzer\NullStopwordAnalyzer;
7
use Onoi\Tesa\StopwordAnalyzer\CdbStopwordAnalyzer;
8
use Onoi\Tesa\StopwordAnalyzer\ArrayStopwordAnalyzer;
9
use Onoi\Tesa\Synonymizer\Synonymizer;
10
use Onoi\Tesa\Synonymizer\NullSynonymizer;
11
use Onoi\Tesa\LanguageDetector\NullLanguageDetector;
12
use Onoi\Tesa\LanguageDetector\TextCatLanguageDetector;
13
use Onoi\Tesa\Tokenizer\CJKSimpleCharacterRegExTokenizer;
14
use Onoi\Tesa\Tokenizer\Tokenizer;
15
use Onoi\Tesa\Tokenizer\GenericRegExTokenizer;
16
use Onoi\Tesa\Tokenizer\JaCompoundGroupTokenizer;
17
use Onoi\Tesa\Tokenizer\IcuWordBoundaryTokenizer;
18
use Onoi\Tesa\Tokenizer\NGramTokenizer;
19
use Onoi\Tesa\Tokenizer\JaTinySegmenterTokenizer;
20
use Onoi\Tesa\Tokenizer\PunctuationRegExTokenizer;
21
22
/**
23
 * @license GNU GPL v2+
24
 * @since 0.1
25
 *
26
 * @author mwjames
27
 */
28
class SanitizerFactory {
29
30
	/**
31
	 * @since 0.1
32
	 *
33
	 * @return Sanitizer
34
	 */
35
	public function newSanitizer( $text = '' ) {
36
		return new Sanitizer( $text );
37
	}
38
39
	/* StopwordAnalyzer */
40
41
	/**
42
	 * @since 0.1
43
	 *
44
	 * @param string|null $languageCode
45
	 *
46
	 * @return StopwordAnalyzer
47
	 */
48
	public function newStopwordAnalyzerByLanguage( $languageCode = null ) {
49
50
		if ( $languageCode === null ) {
51
			return $this->newNullStopwordAnalyzer();
52
		}
53
54
		$cdbStopwordAnalyzer = $this->newCdbStopwordAnalyzer(
55
			$languageCode
56
		);
57
58
		return $cdbStopwordAnalyzer->isAvailable() ? $cdbStopwordAnalyzer : $this->newNullStopwordAnalyzer();;
59
	}
60
61
	/**
62
	 * @since 0.1
63
	 *
64
	 * @return StopwordAnalyzer
65
	 */
66
	public function newCdbStopwordAnalyzer( $languageCode = null ) {
67
		return new CdbStopwordAnalyzer( CdbStopwordAnalyzer::getTargetByLanguage( $languageCode ) );
68
	}
69
70
	/**
71
	 * @since 0.1
72
	 *
73
	 * @param array $stopwords;
0 ignored issues
show
Documentation introduced by
There is no parameter named $stopwords;. Did you maybe mean $stopwords?

This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function. It has, however, found a similar but not annotated parameter which might be a good fit.

Consider the following example. The parameter $ireland is not defined by the method finale(...).

/**
 * @param array $germany
 * @param array $ireland
 */
function finale($germany, $island) {
    return "2:1";
}

The most likely cause is that the parameter was changed, but the annotation was not.

Loading history...
74
	 *
75
	 * @return StopwordAnalyzer
76
	 */
77
	public function newArrayStopwordAnalyzer( array $stopwords = array() ) {
78
		return new ArrayStopwordAnalyzer( $stopwords );
79
	}
80
81
	/**
82
	 * @since 0.1
83
	 *
84
	 * @return StopwordAnalyzer
85
	 */
86
	public function newNullStopwordAnalyzer() {
87
		return new NullStopwordAnalyzer();
88
	}
89
90
	/**
91
	 * @since 0.1
92
	 *
93
	 * @param string|null $languageCode
94
	 *
95
	 * @return Synonymizer
96
	 */
97
	public function newSynonymizerByLanguage( $languageCode = null ) {
98
99
		if ( $languageCode === null ) {
100
			return $this->newNullSynonymizer();
101
		}
102
103
		return $this->newNullSynonymizer();;
104
	}
105
106
	/* Synonymizer */
107
108
	/**
109
	 * @since 0.1
110
	 *
111
	 * @return Synonymizer
112
	 */
113
	public function newNullSynonymizer() {
114
		return new NullSynonymizer();
115
	}
116
117
	/* LanguageDetector */
118
119
	/**
120
	 * @since 0.1
121
	 *
122
	 * @return NullLanguageDetector
123
	 */
124
	public function newNullLanguageDetector() {
125
		return new NullLanguageDetector();
126
	}
127
128
	/**
129
	 * @since 0.1
130
	 *
131
	 * @return TextCatLanguageDetector
132
	 */
133
	public function newTextCatLanguageDetector() {
134
		return new TextCatLanguageDetector();
135
	}
136
137
	/* Tokenizer */
138
139
	/**
140
	 * @since 0.1
141
	 *
142
	 * @param string $text
143
	 * @param string|null $languageCode
144
	 *
145
	 * @return Tokenizer
146
	 */
147
	public function newPreferredTokenizerByLanguage( $text, $languageCode = null ) {
148
149
		$tokenizer = $this->newIcuWordBoundaryTokenizer();
150
151
		if ( !$tokenizer->isAvailable() && CharacterExaminer::contains( CharacterExaminer::CJK_UNIFIED, $text ) ) {
152
			return $this->newCJKMatchableTokenizer( $text );
153
		} elseif( !$tokenizer->isAvailable() ) {
154
			return $this->newGenericRegExTokenizer( $tokenizer );
155
		}
156
157
		$tokenizer->setLocale( $languageCode );
158
159
		$tokenizer->setWordTokenizerAttribute(
0 ignored issues
show
Bug introduced by
The method setWordTokenizerAttribute() does not exist on Onoi\Tesa\Tokenizer\Tokenizer. Did you maybe mean tokenize()?

This check marks calls to methods that do not seem to exist on an object.

This is most likely the result of a method being renamed without all references to it being renamed likewise.

Loading history...
160
			!CharacterExaminer::contains( CharacterExaminer::CJK_UNIFIED, $text )
161
		);
162
163
		return $this->newGenericRegExTokenizer( $tokenizer );
164
	}
165
166
	/**
167
	 * @since 0.1
168
	 *
169
	 * @param string $text
170
	 *
171
	 * @return Tokenizer
172
	 */
173
	public function newCJKMatchableTokenizer( $text ) {
174
175
		$tokenizer = null;
176
177
		if ( CharacterExaminer::contains( CharacterExaminer::HIRAGANA_KATAKANA, $text ) ) {
178
			$tokenizer = $this->newJaTinySegmenterTokenizer();
179
		} else {
180
			$tokenizer = $this->newNGramTokenizer( $tokenizer );
181
		}
182
183
		$tokenizer = $this->newCJKSimpleCharacterRegExTokenizer( $tokenizer );
184
185
		return $this->newGenericRegExTokenizer( $tokenizer );
186
	}
187
188
	/**
189
	 * @since 0.1
190
	 *
191
	 * @param Tokenizer|null $tokenizer
192
	 *
193
	 * @return Tokenizer
194
	 */
195
	public function newIcuWordBoundaryTokenizer( Tokenizer $tokenizer = null ) {
196
		return new IcuWordBoundaryTokenizer( $tokenizer );
197
	}
198
199
	/**
200
	 * @since 0.1
201
	 *
202
	 * @param Tokenizer|null $tokenizer
203
	 *
204
	 * @return Tokenizer
205
	 */
206
	public function newGenericRegExTokenizer( Tokenizer $tokenizer = null ) {
207
		return new GenericRegExTokenizer( $tokenizer );
208
	}
209
210
	/**
211
	 * @since 0.1
212
	 *
213
	 * @param Tokenizer|null $tokenizer
214
	 *
215
	 * @return Tokenizer
216
	 */
217
	public function newPunctuationRegExTokenizer( Tokenizer $tokenizer = null ) {
218
		return new PunctuationRegExTokenizer( $tokenizer );
219
	}
220
221
	/**
222
	 * @since 0.1
223
	 *
224
	 * @return Tokenizer
225
	 */
226
	public function newJaCompoundGroupTokenizer( Tokenizer $tokinizer = null ) {
227
		return new JaCompoundGroupTokenizer( $tokinizer );
228
	}
229
230
	/**
231
	 * @since 0.1
232
	 *
233
	 * @return Tokenizer
234
	 */
235
	public function newJaTinySegmenterTokenizer( Tokenizer $tokinizer = null ) {
236
		return new JaTinySegmenterTokenizer( $tokinizer );
237
	}
238
239
	/**
240
	 * @since 0.1
241
	 *
242
	 * @return Tokenizer
243
	 */
244
	public function newCJKSimpleCharacterRegExTokenizer( Tokenizer $tokinizer = null ) {
245
		return new CJKSimpleCharacterRegExTokenizer( $tokinizer );
246
	}
247
248
	/**
249
	 * @since 0.1
250
	 *
251
	 * @return Tokenizer
252
	 */
253
	public function newNGramTokenizer( Tokenizer $tokinizer = null, $ngram = 2 ) {
254
		return new NGramTokenizer( $tokinizer, $ngram );
255
	}
256
257
}
258