CharacterFilter::getFilteredString()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 1
eloc 1
nc 1
nop 0
dl 0
loc 3
rs 10
c 0
b 0
f 0
1
<?php
2
/**
3
 * User: jensk
4
 * Date: 21-2-2017
5
 * Time: 10:44
6
 */
7
8
namespace CloudControl\Cms\search;
9
10
class CharacterFilter
11
{
12
    protected $originalString;
13
    /**
14
     * @var string
15
     */
16
    protected $filteredString = '';
17
18
    /**
19
     * CharacterFilter constructor.
20
     *
21
     * @param string $string
22
     */
23
    public function __construct($string)
24
    {
25
        $this->originalString = $string;
26
        $string = $this->convertToUTF8($string);
27
        $string = mb_strtolower($string);
28
        $string = $this->filterSpecialCharacters($string);
29
        $this->filteredString = $string;
30
    }
31
32
    /**
33
     * Returns the filtered string
34
     * @return string
35
     */
36
    public function __toString()
37
    {
38
        return (string)$this->filteredString;
39
    }
40
41
    /**
42
     * Filter out all special characters, like punctuation and characters with accents
43
     *
44
     * @param $string
45
     *
46
     * @return mixed|string
47
     */
48
    private function filterSpecialCharacters($string)
49
    {
50
        $string = str_replace('<', ' <',
51
            $string); // This is need, otherwise this: <h1>something</h1><h2>something</h2> will result in somethingsomething
52
        $string = strip_tags($string);
53
        $string = trim($string);
54
        $string = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $string); // Remove special alphanumeric characters
55
        $string = str_replace(array('+', '=', '!', ',', '.', ';', ':', '?'), ' ',
56
            $string); // Replace sentence breaking charaters with spaces
57
        $string = preg_replace("/[\r\n]+/", " ", $string); // Replace multiple newlines with a single space.
58
        $string = preg_replace("/[\t]+/", " ", $string); // Replace multiple tabs with a single space.
59
        $string = preg_replace("/[^a-zA-Z0-9 ]/", '',
60
            $string); // Filter out everything that is not alphanumeric or a space
61
        $string = preg_replace('!\s+!', ' ', $string); // Replace multiple spaces with a single space
62
        return $string;
63
    }
64
65
    /**
66
     * Convert the string to UTF-8 encoding
67
     * @param $string
68
     *
69
     * @return string
70
     */
71
    private function convertToUTF8($string)
72
    {
73
        $encoding = mb_detect_encoding($string, mb_detect_order(), false);
74
75
        if ($encoding == "UTF-8") {
76
            $string = mb_convert_encoding($string, 'UTF-8', 'UTF-8');
77
        }
78
79
        $out = iconv(mb_detect_encoding($string, mb_detect_order(), false), "UTF-8//IGNORE", $string);
80
        return $out;
81
    }
82
83
    /**
84
     * @return mixed|string
85
     */
86
    public function getFilteredString()
87
    {
88
        return $this->filteredString;
89
    }
90
}