onoi /
tesa
This project does not seem to handle request data directly as such no vulnerable execution paths were found.
include, or for example
via PHP's auto-loading mechanism.
These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
| 1 | <?php |
||
| 2 | |||
| 3 | namespace Onoi\Tesa; |
||
| 4 | |||
| 5 | use Onoi\Tesa\StopwordAnalyzer\StopwordAnalyzer; |
||
| 6 | use Onoi\Tesa\StopwordAnalyzer\NullStopwordAnalyzer; |
||
| 7 | use Onoi\Tesa\StopwordAnalyzer\CdbStopwordAnalyzer; |
||
| 8 | use Onoi\Tesa\StopwordAnalyzer\ArrayStopwordAnalyzer; |
||
| 9 | use Onoi\Tesa\Synonymizer\Synonymizer; |
||
| 10 | use Onoi\Tesa\Synonymizer\NullSynonymizer; |
||
| 11 | use Onoi\Tesa\LanguageDetector\NullLanguageDetector; |
||
| 12 | use Onoi\Tesa\LanguageDetector\TextCatLanguageDetector; |
||
| 13 | use Onoi\Tesa\Tokenizer\CJKSimpleCharacterRegExTokenizer; |
||
| 14 | use Onoi\Tesa\Tokenizer\Tokenizer; |
||
| 15 | use Onoi\Tesa\Tokenizer\GenericRegExTokenizer; |
||
| 16 | use Onoi\Tesa\Tokenizer\JaCompoundGroupTokenizer; |
||
| 17 | use Onoi\Tesa\Tokenizer\IcuWordBoundaryTokenizer; |
||
| 18 | use Onoi\Tesa\Tokenizer\NGramTokenizer; |
||
| 19 | use Onoi\Tesa\Tokenizer\JaTinySegmenterTokenizer; |
||
| 20 | use Onoi\Tesa\Tokenizer\PunctuationRegExTokenizer; |
||
| 21 | |||
| 22 | /** |
||
| 23 | * @license GNU GPL v2+ |
||
| 24 | * @since 0.1 |
||
| 25 | * |
||
| 26 | * @author mwjames |
||
| 27 | */ |
||
| 28 | class SanitizerFactory { |
||
| 29 | |||
| 30 | /** |
||
| 31 | * @since 0.1 |
||
| 32 | * |
||
| 33 | * @return Sanitizer |
||
| 34 | */ |
||
| 35 | 5 | public function newSanitizer( $text = '' ) { |
|
| 36 | 5 | return new Sanitizer( $text ); |
|
| 37 | 1 | } |
|
| 38 | |||
| 39 | /* StopwordAnalyzer */ |
||
| 40 | |||
| 41 | /** |
||
| 42 | * @since 0.1 |
||
| 43 | * |
||
| 44 | * @param string|null $languageCode |
||
| 45 | * |
||
| 46 | * @return StopwordAnalyzer |
||
| 47 | */ |
||
| 48 | 2 | public function newStopwordAnalyzerByLanguage( $languageCode = null ) { |
|
| 49 | |||
| 50 | 2 | if ( $languageCode === null ) { |
|
| 51 | 1 | return $this->newNullStopwordAnalyzer(); |
|
| 52 | } |
||
| 53 | |||
| 54 | 1 | $cdbStopwordAnalyzer = $this->newCdbStopwordAnalyzer( |
|
| 55 | $languageCode |
||
| 56 | 1 | ); |
|
| 57 | |||
| 58 | 1 | return $cdbStopwordAnalyzer->isAvailable() ? $cdbStopwordAnalyzer : $this->newNullStopwordAnalyzer();; |
|
| 59 | } |
||
| 60 | |||
| 61 | /** |
||
| 62 | * @since 0.1 |
||
| 63 | * |
||
| 64 | * @return StopwordAnalyzer |
||
| 65 | */ |
||
| 66 | 6 | public function newCdbStopwordAnalyzer( $languageCode = null ) { |
|
| 67 | 6 | return new CdbStopwordAnalyzer( CdbStopwordAnalyzer::getTargetByLanguage( $languageCode ) ); |
|
| 68 | } |
||
| 69 | |||
| 70 | /** |
||
| 71 | * @since 0.1 |
||
| 72 | * |
||
| 73 | * @param array $stopwords; |
||
|
0 ignored issues
–
show
|
|||
| 74 | * |
||
| 75 | * @return StopwordAnalyzer |
||
| 76 | */ |
||
| 77 | 1 | public function newArrayStopwordAnalyzer( array $stopwords = array() ) { |
|
| 78 | 1 | return new ArrayStopwordAnalyzer( $stopwords ); |
|
| 79 | } |
||
| 80 | |||
| 81 | /** |
||
| 82 | * @since 0.1 |
||
| 83 | * |
||
| 84 | * @return StopwordAnalyzer |
||
| 85 | */ |
||
| 86 | 2 | public function newNullStopwordAnalyzer() { |
|
| 87 | 2 | return new NullStopwordAnalyzer(); |
|
| 88 | } |
||
| 89 | |||
| 90 | /** |
||
| 91 | * @since 0.1 |
||
| 92 | * |
||
| 93 | * @param string|null $languageCode |
||
| 94 | * |
||
| 95 | * @return Synonymizer |
||
| 96 | */ |
||
| 97 | 1 | public function newSynonymizerByLanguage( $languageCode = null ) { |
|
| 98 | |||
| 99 | 1 | if ( $languageCode === null ) { |
|
| 100 | 1 | return $this->newNullSynonymizer(); |
|
| 101 | } |
||
| 102 | |||
| 103 | return $this->newNullSynonymizer();; |
||
| 104 | } |
||
| 105 | |||
| 106 | /* Synonymizer */ |
||
| 107 | |||
| 108 | /** |
||
| 109 | * @since 0.1 |
||
| 110 | * |
||
| 111 | * @return Synonymizer |
||
| 112 | */ |
||
| 113 | 6 | public function newNullSynonymizer() { |
|
| 114 | 6 | return new NullSynonymizer(); |
|
| 115 | } |
||
| 116 | |||
| 117 | /* LanguageDetector */ |
||
| 118 | |||
| 119 | /** |
||
| 120 | * @since 0.1 |
||
| 121 | * |
||
| 122 | * @return NullLanguageDetector |
||
| 123 | */ |
||
| 124 | 1 | public function newNullLanguageDetector() { |
|
| 125 | 1 | return new NullLanguageDetector(); |
|
| 126 | } |
||
| 127 | |||
| 128 | /** |
||
| 129 | * @since 0.1 |
||
| 130 | * |
||
| 131 | * @return TextCatLanguageDetector |
||
| 132 | */ |
||
| 133 | 1 | public function newTextCatLanguageDetector() { |
|
| 134 | 1 | return new TextCatLanguageDetector(); |
|
| 135 | } |
||
| 136 | |||
| 137 | /* Tokenizer */ |
||
| 138 | |||
| 139 | /** |
||
| 140 | * @since 0.1 |
||
| 141 | * |
||
| 142 | * @param string $text |
||
| 143 | * @param string|null $languageCode |
||
| 144 | * |
||
| 145 | * @return Tokenizer |
||
| 146 | */ |
||
| 147 | 1 | public function newPreferredTokenizerByLanguage( $text, $languageCode = null ) { |
|
| 148 | |||
| 149 | 1 | $tokenizer = $this->newIcuWordBoundaryTokenizer(); |
|
| 150 | |||
| 151 | 1 | if ( !$tokenizer->isAvailable() && CharacterExaminer::contains( CharacterExaminer::CJK_UNIFIED, $text ) ) { |
|
| 152 | return $this->newCJKMatchableTokenizer( $text ); |
||
| 153 | 1 | } elseif( !$tokenizer->isAvailable() ) { |
|
| 154 | return $this->newGenericRegExTokenizer( $tokenizer ); |
||
| 155 | } |
||
| 156 | |||
| 157 | 1 | $tokenizer->setLocale( $languageCode ); |
|
| 158 | |||
| 159 | 1 | $tokenizer->setWordTokenizerAttribute( |
|
|
0 ignored issues
–
show
The method
setWordTokenizerAttribute() does not exist on Onoi\Tesa\Tokenizer\Tokenizer. Did you maybe mean tokenize()?
This check marks calls to methods that do not seem to exist on an object. This is most likely the result of a method being renamed without all references to it being renamed likewise. Loading history...
|
|||
| 160 | 1 | !CharacterExaminer::contains( CharacterExaminer::CJK_UNIFIED, $text ) |
|
| 161 | 1 | ); |
|
| 162 | |||
| 163 | 1 | return $this->newGenericRegExTokenizer( $tokenizer ); |
|
| 164 | } |
||
| 165 | |||
| 166 | /** |
||
| 167 | * @since 0.1 |
||
| 168 | * |
||
| 169 | * @param string $text |
||
| 170 | * |
||
| 171 | * @return Tokenizer |
||
| 172 | */ |
||
| 173 | 1 | public function newCJKMatchableTokenizer( $text ) { |
|
| 174 | |||
| 175 | 1 | $tokenizer = null; |
|
| 176 | |||
| 177 | 1 | if ( CharacterExaminer::contains( CharacterExaminer::HIRAGANA_KATAKANA, $text ) ) { |
|
| 178 | 1 | $tokenizer = $this->newJaTinySegmenterTokenizer(); |
|
| 179 | 1 | } else { |
|
| 180 | 1 | $tokenizer = $this->newNGramTokenizer( $tokenizer ); |
|
| 181 | } |
||
| 182 | |||
| 183 | 1 | $tokenizer = $this->newCJKSimpleCharacterRegExTokenizer( $tokenizer ); |
|
| 184 | |||
| 185 | 1 | return $this->newGenericRegExTokenizer( $tokenizer ); |
|
| 186 | } |
||
| 187 | |||
| 188 | /** |
||
| 189 | * @since 0.1 |
||
| 190 | * |
||
| 191 | * @param Tokenizer|null $tokenizer |
||
| 192 | * |
||
| 193 | * @return Tokenizer |
||
| 194 | */ |
||
| 195 | 2 | public function newIcuWordBoundaryTokenizer( Tokenizer $tokenizer = null ) { |
|
| 196 | 2 | return new IcuWordBoundaryTokenizer( $tokenizer ); |
|
| 197 | } |
||
| 198 | |||
| 199 | /** |
||
| 200 | * @since 0.1 |
||
| 201 | * |
||
| 202 | * @param Tokenizer|null $tokenizer |
||
| 203 | * |
||
| 204 | * @return Tokenizer |
||
| 205 | */ |
||
| 206 | 8 | public function newGenericRegExTokenizer( Tokenizer $tokenizer = null ) { |
|
| 207 | 8 | return new GenericRegExTokenizer( $tokenizer ); |
|
| 208 | } |
||
| 209 | |||
| 210 | /** |
||
| 211 | * @since 0.1 |
||
| 212 | * |
||
| 213 | * @param Tokenizer|null $tokenizer |
||
| 214 | * |
||
| 215 | * @return Tokenizer |
||
| 216 | */ |
||
| 217 | 2 | public function newPunctuationRegExTokenizer( Tokenizer $tokenizer = null ) { |
|
| 218 | 2 | return new PunctuationRegExTokenizer( $tokenizer ); |
|
| 219 | } |
||
| 220 | |||
| 221 | /** |
||
| 222 | * @since 0.1 |
||
| 223 | * |
||
| 224 | * @return Tokenizer |
||
| 225 | */ |
||
| 226 | 1 | public function newJaCompoundGroupTokenizer( Tokenizer $tokinizer = null ) { |
|
| 227 | 1 | return new JaCompoundGroupTokenizer( $tokinizer ); |
|
| 228 | } |
||
| 229 | |||
| 230 | /** |
||
| 231 | * @since 0.1 |
||
| 232 | * |
||
| 233 | * @return Tokenizer |
||
| 234 | */ |
||
| 235 | 3 | public function newJaTinySegmenterTokenizer( Tokenizer $tokinizer = null ) { |
|
| 236 | 3 | return new JaTinySegmenterTokenizer( $tokinizer ); |
|
| 237 | } |
||
| 238 | |||
| 239 | /** |
||
| 240 | * @since 0.1 |
||
| 241 | * |
||
| 242 | * @return Tokenizer |
||
| 243 | */ |
||
| 244 | 2 | public function newCJKSimpleCharacterRegExTokenizer( Tokenizer $tokinizer = null ) { |
|
| 245 | 2 | return new CJKSimpleCharacterRegExTokenizer( $tokinizer ); |
|
| 246 | } |
||
| 247 | |||
| 248 | /** |
||
| 249 | * @since 0.1 |
||
| 250 | * |
||
| 251 | * @return Tokenizer |
||
| 252 | */ |
||
| 253 | 3 | public function newNGramTokenizer( Tokenizer $tokinizer = null, $ngram = 2 ) { |
|
| 254 | 3 | return new NGramTokenizer( $tokinizer, $ngram ); |
|
| 255 | } |
||
| 256 | |||
| 257 | } |
||
| 258 |
This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function. It has, however, found a similar but not annotated parameter which might be a good fit.
Consider the following example. The parameter
$irelandis not defined by the methodfinale(...).The most likely cause is that the parameter was changed, but the annotation was not.