Sanitizer::setOption()   B
last analyzed

Complexity

Conditions 5
Paths 4

Size

Total Lines 10
Code Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 5

Importance

Changes 0
Metric Value
dl 0
loc 10
ccs 8
cts 8
cp 1
rs 8.8571
c 0
b 0
f 0
cc 5
eloc 5
nc 4
nop 2
crap 5
1
<?php
2
3
namespace Onoi\Tesa;
4
5
use Onoi\Tesa\Tokenizer\Tokenizer;
6
use Onoi\Tesa\Synonymizer\Synonymizer;
7
use Onoi\Tesa\StopwordAnalyzer\StopwordAnalyzer;
8
use RuntimeException;
9
10
/**
11
 * @license GNU GPL v2+
12
 * @since 0.1
13
 *
14
 * @author mwjames
15
 */
16
class Sanitizer {
17
18
	const WHITELIST = 'WHITELIST';
19
	const MIN_LENGTH = 'MIN_LENGTH';
20
21
	/**
22
	 * Any change to the content of its data files should be reflected in a
23
	 * version change (the version number does not necessarily correlate with
24
	 * the library version)
25
	 */
26
	const VERSION = '0.2';
27
28
	/**
29
	 * @var string
30
	 */
31
	private $string = '';
32
33
	/**
34
	 * @var array
35
	 */
36
	private $whiteList = array();
37
38
	/**
39
	 * @var array
40
	 */
41
	private $minLength = 3;
42
43
	/**
44
	 * @since 0.1
45
	 *
46
	 * @param string $string
47
	 */
48 13
	public function __construct( $string = '' ) {
49 13
		$this->setText( $string );
50 13
	}
51
52
	/**
53
	 * @since 0.1
54
	 *
55
	 * @param string $name
56
	 * @param mixed $value
57
	 */
58 1
	public function setOption( $name, $value ) {
59
60 1
		if ( $name === self::WHITELIST && is_array( $value ) && $value !== array() ) {
61 1
			$this->whiteList = array_fill_keys( $value, true );
62 1
		}
63
64 1
		if ( $name === self::MIN_LENGTH ) {
65 1
			$this->minLength = (int)$value;
0 ignored issues
show
Documentation Bug introduced by
It seems like (int) $value of type integer is incompatible with the declared type array of property $minLength.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
66 1
		}
67 1
	}
68
69
	/**
70
	 * @since 0.1
71
	 *
72
	 * @param string $string
73
	 */
74 13
	public function setText( $string ) {
75 13
		$this->string = $string;
76 13
	}
77
78
	/**
79
	 * @since 0.1
80
	 *
81
	 * @param integer $flag
82
	 */
83 1
	public function applyTransliteration( $flag = Transliterator::DIACRITICS ) {
84 1
		$this->string = Normalizer::applyTransliteration( $this->string, $flag );
85 1
	}
86
87
	/**
88
	 * @see Localizer::convertDoubleWidth
89
	 *
90
	 * @since 0.1
91
	 *
92
	 * @param integer $flag
0 ignored issues
show
Bug introduced by
There is no parameter named $flag. Was it maybe removed?

This check looks for PHPDoc comments describing methods or function parameters that do not exist on the corresponding method or function.

Consider the following example. The parameter $italy is not defined by the method finale(...).

/**
 * @param array $germany
 * @param array $island
 * @param array $italy
 */
function finale($germany, $island) {
    return "2:1";
}

The most likely cause is that the parameter was removed, but the annotation was not.

Loading history...
93
	 */
94
	public function convertDoubleWidth() {
95
		$this->string = Normalizer::convertDoubleWidth( $this->string );
96
	}
97
98
	/**
99
	 * @since 0.1
100
	 *
101
	 * @param Tokenizer $tokenizer
102
	 * @param StopwordAnalyzer $stopwordAnalyzer
103
	 *
104
	 * @return string
105
	 */
106 7
	public function sanitizeWith( Tokenizer $tokenizer, StopwordAnalyzer $stopwordAnalyzer, Synonymizer $synonymizer ) {
107
108
		// Treat non-words tokenizers (Ja,Zh*) differently
109 7
		$minLength = $tokenizer->isWordTokenizer() ? $this->minLength : 1;
110
111 7
		$words = $tokenizer->tokenize( $this->string );
112
113 7
		if ( !$words || !is_array( $words ) ) {
114
			return $this->string;
115
		}
116
117 7
		$index = array();
118 7
		$pos = 0;
119
120 7
		foreach ( $words as $key => $word ) {
121
122 7
			$word = $synonymizer->synonymize( $word );
123
124
			// If it is not an exemption and less than the required minimum length
125
			// or identified as stop word it is removed
126 7
			if ( !isset( $this->whiteList[$word] ) && ( mb_strlen( $word ) < $minLength || $stopwordAnalyzer->isStopWord( $word ) ) ) {
127 6
				continue;
128
			}
129
130
			// Simple proximity, check for same words appearing next to each other
131 7
			if ( isset( $index[$pos-1] ) && $index[$pos-1] === $word ) {
132 2
				continue;
133
			}
134
135 7
			$index[] = trim( $word );
136 7
			$pos++;
137 7
		}
138
139 7
		return implode( ' ' , $index );
140
	}
141
142
	/**
143
	 * @since 0.1
144
	 */
145 5
	public function toLowercase() {
146 5
		$this->string = Normalizer::toLowercase( $this->string );
147 5
	}
148
149
	/**
150
	 * @since 0.1
151
	 *
152
	 * @param integer $length
153
	 */
154 3
	public function reduceLengthTo( $length ) {
155 3
		$this->string = Normalizer::reduceLengthTo( $this->string, $length );
156 3
	}
157
158
	/**
159
	 * @see http://www.phpwact.org/php/i18n/utf-8#str_replace
160
	 * @since 0.1
161
	 *
162
	 * @param string $search
163
	 * @param string $replace
164
	 */
165 1
	public function replace( $search, $replace ) {
166 1
		$this->string = str_replace( $search, $replace, $this->string );
167 1
	}
168
169
	/**
170
	 * @since 0.1
171
	 *
172
	 * @return string
173
	 */
174 6
	public function __toString() {
175 6
		return $this->string;
176
	}
177
178
}
179