Passed
Push — develop ( 169afe...f2bd80 )
by Jens
02:39
created

CharacterFilter   A

Complexity

Total Complexity 6

Size/Duplication

Total Lines 73
Duplicated Lines 0 %

Coupling/Cohesion

Components 0
Dependencies 0

Importance

Changes 0
Metric Value
dl 0
loc 73
rs 10
c 0
b 0
f 0
wmc 6
lcom 0
cbo 0

5 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 8 1
A __toString() 0 4 1
A filterSpecialCharacters() 0 11 1
A convertToUTF8() 0 11 2
A getFilteredString() 0 4 1
1
<?php
2
/**
3
 * User: jensk
4
 * Date: 21-2-2017
5
 * Time: 10:44
6
 */
7
8
namespace library\search;
9
10
class CharacterFilter
11
{
12
	protected $originalString;
13
	protected $filteredString = '';
14
15
	/**
16
	 * CharacterFilter constructor.
17
	 *
18
	 * @param $string
19
	 */
20
	public function __construct($string)
21
	{
22
		$this->originalString = $string;
23
		$string = $this->convertToUTF8($string);
24
		$string = mb_strtolower($string);
25
		$string = $this->filterSpecialCharacters($string);
26
		$this->filteredString = $string;
27
	}
28
29
	/**
30
	 * Returns the filtered string
31
	 * @return string|void
32
	 */
33
	function __toString()
0 ignored issues
show
Best Practice introduced by
It is generally recommended to explicitly declare the visibility for methods.

Adding explicit visibility (private, protected, or public) is generally recommend to communicate to other developers how, and from where this method is intended to be used.

Loading history...
34
	{
35
		return $this->filteredString;
36
	}
37
38
	/**
39
	 * Filter out all special characters, like punctuation and characters with accents
40
	 *
41
	 * @param $string
42
	 *
43
	 * @return mixed|string
44
	 */
45
	private function filterSpecialCharacters($string)
46
	{
47
		$string = strip_tags($string);
48
		$string = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $string); // Remove special alphanumeric characters
49
		$string = str_replace(array('+', '=', '!', ',', '.',';', ':', '?'), ' ', $string); // Replace sentence breaking charaters with spaces
50
		$string = preg_replace("/[\r\n]+/", " ", $string); // Replace multiple newlines with a single space.
51
		$string = preg_replace("/[\t]+/", " ", $string); // Replace multiple tabs with a single space.
52
		$string = preg_replace("/[^a-zA-Z0-9 ]/", '', $string); // Filter out everything that is not alphanumeric or a space
53
		$string = preg_replace('!\s+!', ' ', $string); // Replace multiple spaces with a single space
54
		return $string;
55
	}
56
57
	/**
58
	 * Convert the string to UTF-8 encoding
59
	 * @param $string
60
	 *
61
	 * @return string
62
	 */
63
	private function convertToUTF8($string)
64
	{
65
		$encoding = mb_detect_encoding($string, mb_detect_order(), false);
66
67
		if($encoding == "UTF-8") {
68
			$string = mb_convert_encoding($string, 'UTF-8', 'UTF-8');
69
		}
70
71
		$out = iconv(mb_detect_encoding($string, mb_detect_order(), false), "UTF-8//IGNORE", $string);
72
		return $out;
73
	}
74
75
	/**
76
	 * @return mixed|string
77
	 */
78
	public function getFilteredString()
79
	{
80
		return $this->filteredString;
81
	}
82
}