Passed
Push — develop ( 9c6499...a5e1bf )
by Jens
02:46
created

CharacterFilter::convertToUTF8()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 11
Code Lines 6

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 2
eloc 6
nc 2
nop 1
dl 0
loc 11
rs 9.4285
c 0
b 0
f 0
1
<?php
2
/**
3
 * User: jensk
4
 * Date: 21-2-2017
5
 * Time: 10:44
6
 */
7
8
namespace CloudControl\Cms\search;
9
10
class CharacterFilter
11
{
12
	protected $originalString;
13
	protected $filteredString = '';
14
15
	/**
16
	 * CharacterFilter constructor.
17
	 *
18
	 * @param $string
19
	 */
20
	public function __construct($string)
21
	{
22
		$this->originalString = $string;
23
		$string = $this->convertToUTF8($string);
24
		$string = mb_strtolower($string);
25
		$string = $this->filterSpecialCharacters($string);
26
		$this->filteredString = $string;
27
	}
28
29
	/**
30
	 * Returns the filtered string
31
	 * @return string|void
32
	 */
33
	public function __toString()
34
	{
35
		return $this->filteredString;
36
	}
37
38
	/**
39
	 * Filter out all special characters, like punctuation and characters with accents
40
	 *
41
	 * @param $string
42
	 *
43
	 * @return mixed|string
44
	 */
45
	private function filterSpecialCharacters($string)
46
	{
47
		$string = str_replace('<', ' <', $string); // This is need, otherwise this: <h1>something</h1><h2>something</h2> will result in somethingsomething
48
		$string = strip_tags($string);
49
		$string = trim($string);
50
		$string = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $string); // Remove special alphanumeric characters
51
		$string = str_replace(array('+', '=', '!', ',', '.',';', ':', '?'), ' ', $string); // Replace sentence breaking charaters with spaces
52
		$string = preg_replace("/[\r\n]+/", " ", $string); // Replace multiple newlines with a single space.
53
		$string = preg_replace("/[\t]+/", " ", $string); // Replace multiple tabs with a single space.
54
		$string = preg_replace("/[^a-zA-Z0-9 ]/", '', $string); // Filter out everything that is not alphanumeric or a space
55
		$string = preg_replace('!\s+!', ' ', $string); // Replace multiple spaces with a single space
56
		return $string;
57
	}
58
59
	/**
60
	 * Convert the string to UTF-8 encoding
61
	 * @param $string
62
	 *
63
	 * @return string
64
	 */
65
	private function convertToUTF8($string)
66
	{
67
		$encoding = mb_detect_encoding($string, mb_detect_order(), false);
68
69
		if($encoding == "UTF-8") {
70
			$string = mb_convert_encoding($string, 'UTF-8', 'UTF-8');
71
		}
72
73
		$out = iconv(mb_detect_encoding($string, mb_detect_order(), false), "UTF-8//IGNORE", $string);
74
		return $out;
75
	}
76
77
	/**
78
	 * @return mixed|string
79
	 */
80
	public function getFilteredString()
81
	{
82
		return $this->filteredString;
83
	}
84
}