Passed
Branch develop (96efe9)
by Jens
02:38
created
src/search/Tokenizer.php 1 patch
Indentation   +41 added lines, -41 removed lines patch added patch discarded remove patch
@@ -13,47 +13,47 @@
 block discarded – undo
13 13
  */
14 14
 class Tokenizer
15 15
 {
16
-	protected $inputString;
17
-	protected $tokenVector = array();
18
-
19
-	/**
20
-	 * Tokenizer constructor.
21
-	 *
22
-	 * @param string $string Should preferably be parsed wit \CloudControl\Cms\search\CharacterFilter
23
-	 * @see \CloudControl\Cms\search\CharacterFilter
24
-	 */
25
-	public function __construct($string)
26
-	{
27
-		$this->inputString = $string;
28
-		$this->tokenize();
29
-	}
30
-
31
-	protected function tokenize()
32
-	{
33
-		$tokens = explode(' ', $this->inputString);
34
-		foreach ($tokens as $token) {
35
-			$this->addTokenToVector($token);
36
-		}
37
-	}
38
-
39
-	protected function addTokenToVector($token)
40
-	{
41
-		if (!empty($token)) {
42
-			if (isset($this->tokenVector[$token])) {
43
-				$this->tokenVector[$token] += 1;
44
-			} else {
45
-				$this->tokenVector[$token] = 1;
46
-			}
47
-		}
48
-	}
49
-
50
-	/**
51
-	 * @return array
52
-	 */
53
-	public function getTokenVector()
54
-	{
55
-		return $this->tokenVector;
56
-	}
16
+    protected $inputString;
17
+    protected $tokenVector = array();
18
+
19
+    /**
20
+     * Tokenizer constructor.
21
+     *
22
+     * @param string $string Should preferably be parsed wit \CloudControl\Cms\search\CharacterFilter
23
+     * @see \CloudControl\Cms\search\CharacterFilter
24
+     */
25
+    public function __construct($string)
26
+    {
27
+        $this->inputString = $string;
28
+        $this->tokenize();
29
+    }
30
+
31
+    protected function tokenize()
32
+    {
33
+        $tokens = explode(' ', $this->inputString);
34
+        foreach ($tokens as $token) {
35
+            $this->addTokenToVector($token);
36
+        }
37
+    }
38
+
39
+    protected function addTokenToVector($token)
40
+    {
41
+        if (!empty($token)) {
42
+            if (isset($this->tokenVector[$token])) {
43
+                $this->tokenVector[$token] += 1;
44
+            } else {
45
+                $this->tokenVector[$token] = 1;
46
+            }
47
+        }
48
+    }
49
+
50
+    /**
51
+     * @return array
52
+     */
53
+    public function getTokenVector()
54
+    {
55
+        return $this->tokenVector;
56
+    }
57 57
 
58 58
 
59 59
 }
60 60
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/Filter.php 1 patch
Indentation   +10 added lines, -10 removed lines patch added patch discarded remove patch
@@ -11,15 +11,15 @@
 block discarded – undo
11 11
 
12 12
 interface Filter
13 13
 {
14
-	/**
15
-	 * Filter constructor.
16
-	 *
17
-	 * @param array $tokens
18
-	 */
19
-	public function __construct($tokens);
14
+    /**
15
+     * Filter constructor.
16
+     *
17
+     * @param array $tokens
18
+     */
19
+    public function __construct($tokens);
20 20
 
21
-	/**
22
-	 * @return array
23
-	 */
24
-	public function getFilterResults();
21
+    /**
22
+     * @return array
23
+     */
24
+    public function getFilterResults();
25 25
 }
26 26
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/DocumentTokenizer.php 1 patch
Indentation   +145 added lines, -145 removed lines patch added patch discarded remove patch
@@ -13,149 +13,149 @@
 block discarded – undo
13 13
 
14 14
 class DocumentTokenizer
15 15
 {
16
-	/**
17
-	 * @var Document
18
-	 */
19
-	protected $document;
20
-
21
-	/**
22
-	 * @var array
23
-	 */
24
-	protected $tokenVector = array();
25
-	protected $storage;
26
-
27
-	/**
28
-	 * Tokenizer constructor.
29
-	 *
30
-	 * @param \CloudControl\Cms\storage\Document $document
31
-	 * @param Storage                   $storage
32
-	 */
33
-	public function __construct(Document $document, Storage $storage)
34
-	{
35
-		$this->document = $document;
36
-		$this->storage = $storage;
37
-		$this->tokenize();
38
-	}
39
-
40
-	/**
41
-	 * Execute tokenization of all document fields
42
-	 */
43
-	private function tokenize()
44
-	{
45
-		$this->tokenizeTitle();
46
-		$this->tokenizeFields();
47
-		$this->tokenizeBricks();
48
-		$this->tokenizeDynamicBricks();
49
-		$this->tokenVector = array_filter($this->tokenVector);
50
-		arsort($this->tokenVector);
51
-	}
52
-
53
-	private function tokenizeTitle()
54
-	{
55
-		$filteredString = new CharacterFilter($this->document->title);
56
-		$tokenizer = new Tokenizer($filteredString);
57
-		$this->addTokenVectorToVector($tokenizer->getTokenVector(), 'title');
58
-	}
59
-
60
-	private function tokenizeFields()
61
-	{
62
-		$fields = $this->document->fields;
63
-		$documentDefinition = $this->storage->getDocumentTypes()->getDocumentTypeBySlug($this->document->documentTypeSlug);
64
-		foreach ($fields as $fieldName => $field) {
65
-			$fieldType = $this->getFieldType($fieldName, $documentDefinition);
66
-			$this->tokenizeField($field, $fieldName, $fieldType);
67
-		}
68
-	}
69
-
70
-	private function tokenizeField($field, $fieldName, $fieldType)
71
-	{
72
-		foreach ($field as $value) {
73
-			// Only index fields that contain text
74
-			if (in_array($fieldType, array('String', 'Text', 'Rich Text'))) {
75
-				$filteredString = new CharacterFilter($value);
76
-				$tokenizer = new Tokenizer($filteredString);
77
-				$this->addTokenVectorToVector($tokenizer->getTokenVector(), $fieldName);
78
-			}
79
-		}
80
-	}
81
-
82
-	private function tokenizeBricks()
83
-	{
84
-		$bricks = $this->document->bricks;
85
-		foreach ($bricks as $brickSlug => $bricks) {
86
-			foreach ($bricks as $brick) {
87
-				$this->tokenizeBrick($brick, $brickSlug);
88
-			}
89
-		}
90
-	}
91
-
92
-	private function tokenizeBrick($brick, $brickSlug)
93
-	{
94
-		$fields  = $brick->fields;
95
-		$brickDefinition = $this->storage->getBricks()->getBrickBySlug($brick->type);
96
-		foreach ($fields as $fieldName => $field) {
97
-			$fieldType = $this->getFieldType($fieldName, $brickDefinition);
98
-			$this->tokenizeField($field, $brickSlug . '__' . $fieldName, $fieldType);
99
-		}
100
-	}
101
-
102
-	private function tokenizeDynamicBricks()
103
-	{
104
-		$dynamicBricks = $this->document->dynamicBricks;
105
-		foreach ($dynamicBricks as $key => $brick) {
106
-			$this->tokenizeBrick($brick, 'dynamicBricks__' . $brick->type . $key);
107
-		}
108
-	}
109
-
110
-	public function getTokens()
111
-	{
112
-		return $this->tokenVector;
113
-	}
114
-
115
-	/**
116
-	 * Add a token to the existing tokenvector
117
-	 * @param     		$token
118
-	 * @param string    $field
119
-	 * @param int 		$count
120
-	 */
121
-	private function addTokenToVector($token, $field, $count = 1)
122
-	{
123
-		if (!empty($token)) {
124
-			if (isset($this->tokenVector[$field][$token])) {
125
-				$this->tokenVector[$field][$token] += $count;
126
-			} else {
127
-				$this->tokenVector[$field][$token] = $count;
128
-			}
129
-		}
130
-	}
131
-
132
-	/**
133
-	 * Add a complete token vector to the existing one.
134
-	 * @param $tokenVector
135
-	 * @param $field
136
-	 */
137
-	private function addTokenVectorToVector($tokenVector, $field)
138
-	{
139
-		foreach ($tokenVector as $token => $count) {
140
-			$this->addTokenToVector($token, $field, $count);
141
-		}
142
-	}
143
-
144
-	/**
145
-	 * Get the type for a field
146
-	 * @param $fieldName
147
-	 * @param $documentDefinition
148
-	 * @return mixed
149
-	 * @throws \Exception
150
-	 */
151
-	private function getFieldType($fieldName, $documentDefinition)
152
-	{
153
-		foreach ($documentDefinition->fields as $fieldTypeDefinition) {
154
-			if ($fieldTypeDefinition->slug === $fieldName) {
155
-				return $fieldTypeDefinition->type;
156
-			}
157
-		}
158
-
159
-		throw new \Exception('Unknown field type for field' . $fieldName . ' in document ' . $this->document->path);
160
-	}
16
+    /**
17
+     * @var Document
18
+     */
19
+    protected $document;
20
+
21
+    /**
22
+     * @var array
23
+     */
24
+    protected $tokenVector = array();
25
+    protected $storage;
26
+
27
+    /**
28
+     * Tokenizer constructor.
29
+     *
30
+     * @param \CloudControl\Cms\storage\Document $document
31
+     * @param Storage                   $storage
32
+     */
33
+    public function __construct(Document $document, Storage $storage)
34
+    {
35
+        $this->document = $document;
36
+        $this->storage = $storage;
37
+        $this->tokenize();
38
+    }
39
+
40
+    /**
41
+     * Execute tokenization of all document fields
42
+     */
43
+    private function tokenize()
44
+    {
45
+        $this->tokenizeTitle();
46
+        $this->tokenizeFields();
47
+        $this->tokenizeBricks();
48
+        $this->tokenizeDynamicBricks();
49
+        $this->tokenVector = array_filter($this->tokenVector);
50
+        arsort($this->tokenVector);
51
+    }
52
+
53
+    private function tokenizeTitle()
54
+    {
55
+        $filteredString = new CharacterFilter($this->document->title);
56
+        $tokenizer = new Tokenizer($filteredString);
57
+        $this->addTokenVectorToVector($tokenizer->getTokenVector(), 'title');
58
+    }
59
+
60
+    private function tokenizeFields()
61
+    {
62
+        $fields = $this->document->fields;
63
+        $documentDefinition = $this->storage->getDocumentTypes()->getDocumentTypeBySlug($this->document->documentTypeSlug);
64
+        foreach ($fields as $fieldName => $field) {
65
+            $fieldType = $this->getFieldType($fieldName, $documentDefinition);
66
+            $this->tokenizeField($field, $fieldName, $fieldType);
67
+        }
68
+    }
69
+
70
+    private function tokenizeField($field, $fieldName, $fieldType)
71
+    {
72
+        foreach ($field as $value) {
73
+            // Only index fields that contain text
74
+            if (in_array($fieldType, array('String', 'Text', 'Rich Text'))) {
75
+                $filteredString = new CharacterFilter($value);
76
+                $tokenizer = new Tokenizer($filteredString);
77
+                $this->addTokenVectorToVector($tokenizer->getTokenVector(), $fieldName);
78
+            }
79
+        }
80
+    }
81
+
82
+    private function tokenizeBricks()
83
+    {
84
+        $bricks = $this->document->bricks;
85
+        foreach ($bricks as $brickSlug => $bricks) {
86
+            foreach ($bricks as $brick) {
87
+                $this->tokenizeBrick($brick, $brickSlug);
88
+            }
89
+        }
90
+    }
91
+
92
+    private function tokenizeBrick($brick, $brickSlug)
93
+    {
94
+        $fields  = $brick->fields;
95
+        $brickDefinition = $this->storage->getBricks()->getBrickBySlug($brick->type);
96
+        foreach ($fields as $fieldName => $field) {
97
+            $fieldType = $this->getFieldType($fieldName, $brickDefinition);
98
+            $this->tokenizeField($field, $brickSlug . '__' . $fieldName, $fieldType);
99
+        }
100
+    }
101
+
102
+    private function tokenizeDynamicBricks()
103
+    {
104
+        $dynamicBricks = $this->document->dynamicBricks;
105
+        foreach ($dynamicBricks as $key => $brick) {
106
+            $this->tokenizeBrick($brick, 'dynamicBricks__' . $brick->type . $key);
107
+        }
108
+    }
109
+
110
+    public function getTokens()
111
+    {
112
+        return $this->tokenVector;
113
+    }
114
+
115
+    /**
116
+     * Add a token to the existing tokenvector
117
+     * @param     		$token
118
+     * @param string    $field
119
+     * @param int 		$count
120
+     */
121
+    private function addTokenToVector($token, $field, $count = 1)
122
+    {
123
+        if (!empty($token)) {
124
+            if (isset($this->tokenVector[$field][$token])) {
125
+                $this->tokenVector[$field][$token] += $count;
126
+            } else {
127
+                $this->tokenVector[$field][$token] = $count;
128
+            }
129
+        }
130
+    }
131
+
132
+    /**
133
+     * Add a complete token vector to the existing one.
134
+     * @param $tokenVector
135
+     * @param $field
136
+     */
137
+    private function addTokenVectorToVector($tokenVector, $field)
138
+    {
139
+        foreach ($tokenVector as $token => $count) {
140
+            $this->addTokenToVector($token, $field, $count);
141
+        }
142
+    }
143
+
144
+    /**
145
+     * Get the type for a field
146
+     * @param $fieldName
147
+     * @param $documentDefinition
148
+     * @return mixed
149
+     * @throws \Exception
150
+     */
151
+    private function getFieldType($fieldName, $documentDefinition)
152
+    {
153
+        foreach ($documentDefinition->fields as $fieldTypeDefinition) {
154
+            if ($fieldTypeDefinition->slug === $fieldName) {
155
+                return $fieldTypeDefinition->type;
156
+            }
157
+        }
158
+
159
+        throw new \Exception('Unknown field type for field' . $fieldName . ' in document ' . $this->document->path);
160
+    }
161 161
 }
162 162
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/CharacterFilter.php 1 patch
Indentation   +65 added lines, -65 removed lines patch added patch discarded remove patch
@@ -9,76 +9,76 @@
 block discarded – undo
9 9
 
10 10
 class CharacterFilter
11 11
 {
12
-	protected $originalString;
13
-	protected $filteredString = '';
12
+    protected $originalString;
13
+    protected $filteredString = '';
14 14
 
15
-	/**
16
-	 * CharacterFilter constructor.
17
-	 *
18
-	 * @param $string
19
-	 */
20
-	public function __construct($string)
21
-	{
22
-		$this->originalString = $string;
23
-		$string = $this->convertToUTF8($string);
24
-		$string = mb_strtolower($string);
25
-		$string = $this->filterSpecialCharacters($string);
26
-		$this->filteredString = $string;
27
-	}
15
+    /**
16
+     * CharacterFilter constructor.
17
+     *
18
+     * @param $string
19
+     */
20
+    public function __construct($string)
21
+    {
22
+        $this->originalString = $string;
23
+        $string = $this->convertToUTF8($string);
24
+        $string = mb_strtolower($string);
25
+        $string = $this->filterSpecialCharacters($string);
26
+        $this->filteredString = $string;
27
+    }
28 28
 
29
-	/**
30
-	 * Returns the filtered string
31
-	 * @return string|void
32
-	 */
33
-	public function __toString()
34
-	{
35
-		return $this->filteredString;
36
-	}
29
+    /**
30
+     * Returns the filtered string
31
+     * @return string|void
32
+     */
33
+    public function __toString()
34
+    {
35
+        return $this->filteredString;
36
+    }
37 37
 
38
-	/**
39
-	 * Filter out all special characters, like punctuation and characters with accents
40
-	 *
41
-	 * @param $string
42
-	 *
43
-	 * @return mixed|string
44
-	 */
45
-	private function filterSpecialCharacters($string)
46
-	{
47
-		$string = str_replace('<', ' <', $string); // This is need, otherwise this: <h1>something</h1><h2>something</h2> will result in somethingsomething
48
-		$string = strip_tags($string);
49
-		$string = trim($string);
50
-		$string = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $string); // Remove special alphanumeric characters
51
-		$string = str_replace(array('+', '=', '!', ',', '.',';', ':', '?'), ' ', $string); // Replace sentence breaking charaters with spaces
52
-		$string = preg_replace("/[\r\n]+/", " ", $string); // Replace multiple newlines with a single space.
53
-		$string = preg_replace("/[\t]+/", " ", $string); // Replace multiple tabs with a single space.
54
-		$string = preg_replace("/[^a-zA-Z0-9 ]/", '', $string); // Filter out everything that is not alphanumeric or a space
55
-		$string = preg_replace('!\s+!', ' ', $string); // Replace multiple spaces with a single space
56
-		return $string;
57
-	}
38
+    /**
39
+     * Filter out all special characters, like punctuation and characters with accents
40
+     *
41
+     * @param $string
42
+     *
43
+     * @return mixed|string
44
+     */
45
+    private function filterSpecialCharacters($string)
46
+    {
47
+        $string = str_replace('<', ' <', $string); // This is need, otherwise this: <h1>something</h1><h2>something</h2> will result in somethingsomething
48
+        $string = strip_tags($string);
49
+        $string = trim($string);
50
+        $string = iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $string); // Remove special alphanumeric characters
51
+        $string = str_replace(array('+', '=', '!', ',', '.',';', ':', '?'), ' ', $string); // Replace sentence breaking charaters with spaces
52
+        $string = preg_replace("/[\r\n]+/", " ", $string); // Replace multiple newlines with a single space.
53
+        $string = preg_replace("/[\t]+/", " ", $string); // Replace multiple tabs with a single space.
54
+        $string = preg_replace("/[^a-zA-Z0-9 ]/", '', $string); // Filter out everything that is not alphanumeric or a space
55
+        $string = preg_replace('!\s+!', ' ', $string); // Replace multiple spaces with a single space
56
+        return $string;
57
+    }
58 58
 
59
-	/**
60
-	 * Convert the string to UTF-8 encoding
61
-	 * @param $string
62
-	 *
63
-	 * @return string
64
-	 */
65
-	private function convertToUTF8($string)
66
-	{
67
-		$encoding = mb_detect_encoding($string, mb_detect_order(), false);
59
+    /**
60
+     * Convert the string to UTF-8 encoding
61
+     * @param $string
62
+     *
63
+     * @return string
64
+     */
65
+    private function convertToUTF8($string)
66
+    {
67
+        $encoding = mb_detect_encoding($string, mb_detect_order(), false);
68 68
 
69
-		if($encoding == "UTF-8") {
70
-			$string = mb_convert_encoding($string, 'UTF-8', 'UTF-8');
71
-		}
69
+        if($encoding == "UTF-8") {
70
+            $string = mb_convert_encoding($string, 'UTF-8', 'UTF-8');
71
+        }
72 72
 
73
-		$out = iconv(mb_detect_encoding($string, mb_detect_order(), false), "UTF-8//IGNORE", $string);
74
-		return $out;
75
-	}
73
+        $out = iconv(mb_detect_encoding($string, mb_detect_order(), false), "UTF-8//IGNORE", $string);
74
+        return $out;
75
+    }
76 76
 
77
-	/**
78
-	 * @return mixed|string
79
-	 */
80
-	public function getFilteredString()
81
-	{
82
-		return $this->filteredString;
83
-	}
77
+    /**
78
+     * @return mixed|string
79
+     */
80
+    public function getFilteredString()
81
+    {
82
+        return $this->filteredString;
83
+    }
84 84
 }
85 85
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/filters/EnglishStopWords.php 1 patch
Indentation   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -10,5 +10,5 @@
 block discarded – undo
10 10
 
11 11
 class EnglishStopWords extends StopWordsFilter
12 12
 {
13
-	protected $stopWords = array('a', 'about', 'above', 'after', 'again', 'against', 'all', 'am', 'an', 'and', 'any', 'are', 'arent', 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'cant', 'cannot', 'could', 'couldnt', 'did', 'didnt', 'do', 'does', 'doesnt', 'doing', 'dont', 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'hadnt', 'has', 'hasnt', 'have', 'havent', 'having', 'he', 'hed', 'hell', 'hes', 'her', 'here', 'heres', 'hers', 'herself', 'him', 'himself', 'his', 'how', 'hows', 'i', 'id', 'ill', 'im', 'ive', 'if', 'in', 'into', 'is', 'isnt', 'it', 'its', 'its', 'itself', 'lets', 'me', 'more', 'most', 'mustnt', 'my', 'myself', 'no', 'nor', 'not', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'ought', 'our', 'ours	ourselves', 'out', 'over', 'own', 'same', 'shant', 'she', 'shed', 'shell', 'shes', 'should', 'shouldnt', 'so', 'some', 'such', 'than', 'that', 'thats', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'there', 'theres', 'these', 'they', 'theyd', 'theyll', 'theyre', 'theyve', 'this', 'those', 'through', 'to', 'too', 'under', 'until', 'up', 'very', 'was', 'wasnt', 'we', 'wed', 'well', 'were', 'weve', 'were', 'werent', 'what', 'whats', 'when', 'whens', 'where', 'wheres', 'which', 'while', 'who', 'whos', 'whom', 'why', 'whys', 'with', 'wont', 'would', 'wouldnt', 'you', 'youd', 'youll', 'youre', 'youve', 'your', 'yours', 'yourself', 'yourselves');
13
+    protected $stopWords = array('a', 'about', 'above', 'after', 'again', 'against', 'all', 'am', 'an', 'and', 'any', 'are', 'arent', 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'cant', 'cannot', 'could', 'couldnt', 'did', 'didnt', 'do', 'does', 'doesnt', 'doing', 'dont', 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'hadnt', 'has', 'hasnt', 'have', 'havent', 'having', 'he', 'hed', 'hell', 'hes', 'her', 'here', 'heres', 'hers', 'herself', 'him', 'himself', 'his', 'how', 'hows', 'i', 'id', 'ill', 'im', 'ive', 'if', 'in', 'into', 'is', 'isnt', 'it', 'its', 'its', 'itself', 'lets', 'me', 'more', 'most', 'mustnt', 'my', 'myself', 'no', 'nor', 'not', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'ought', 'our', 'ours	ourselves', 'out', 'over', 'own', 'same', 'shant', 'she', 'shed', 'shell', 'shes', 'should', 'shouldnt', 'so', 'some', 'such', 'than', 'that', 'thats', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'there', 'theres', 'these', 'they', 'theyd', 'theyll', 'theyre', 'theyve', 'this', 'those', 'through', 'to', 'too', 'under', 'until', 'up', 'very', 'was', 'wasnt', 'we', 'wed', 'well', 'were', 'weve', 'were', 'werent', 'what', 'whats', 'when', 'whens', 'where', 'wheres', 'which', 'while', 'who', 'whos', 'whom', 'why', 'whys', 'with', 'wont', 'would', 'wouldnt', 'you', 'youd', 'youll', 'youre', 'youve', 'your', 'yours', 'yourself', 'yourselves');
14 14
 }
15 15
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/filters/DutchStopWords.php 1 patch
Indentation   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -10,5 +10,5 @@
 block discarded – undo
10 10
 
11 11
 class DutchStopWords extends StopWordsFilter
12 12
 {
13
-	protected $stopWords = array('aan','af','al','alles','als','altijd','andere','ben','bij','daar','dan','dat','de','der','deze','die','dit','doch','doen','door','doorgaans','dus','een','eens','en','er','ge','geen','geweest','haar','had','heb','hebben','heeft','hem','het','hier','hij','hoe','hun','iemand','iets','ik','in','is','ja','je','kan','kon','kunnen','maar','me','meer','men','met','mij','mijn','moet','na','naar','niet','niets','nog','nu','of','om','omdat','ons','ook','op','over','reeds','te','tegen','toch','toen','tot','u','uit','uw','van','veel','voor','want','waren','was','wat','we','wel','werd','wezen','wie','wij','wil','worden','zal','ze','zei','zelf','zich','zij','zijn','zo','zodat','zonder','zou');
13
+    protected $stopWords = array('aan','af','al','alles','als','altijd','andere','ben','bij','daar','dan','dat','de','der','deze','die','dit','doch','doen','door','doorgaans','dus','een','eens','en','er','ge','geen','geweest','haar','had','heb','hebben','heeft','hem','het','hier','hij','hoe','hun','iemand','iets','ik','in','is','ja','je','kan','kon','kunnen','maar','me','meer','men','met','mij','mijn','moet','na','naar','niet','niets','nog','nu','of','om','omdat','ons','ook','op','over','reeds','te','tegen','toch','toen','tot','u','uit','uw','van','veel','voor','want','waren','was','wat','we','wel','werd','wezen','wie','wij','wil','worden','zal','ze','zei','zelf','zich','zij','zijn','zo','zodat','zonder','zou');
14 14
 }
15 15
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/Indexer.php 1 patch
Indentation   +159 added lines, -159 removed lines patch added patch discarded remove patch
@@ -22,87 +22,87 @@  discard block
 block discarded – undo
22 22
  */
23 23
 class Indexer extends SearchDbConnected
24 24
 {
25
-	const SQLITE_MAX_COMPOUND_SELECT = 100;
26
-	protected $filters = array(
27
-		'DutchStopWords',
28
-		'EnglishStopWords'
29
-	);
30
-	protected $storageDir;
31
-	/**
32
-	 * @var double
33
-	 */
34
-	protected $loggingStart;
35
-	/**
36
-	 * @var string
37
-	 */
38
-	protected $log;
39
-	/**
40
-	 * @var double
41
-	 */
42
-	protected $lastLog;
43
-
44
-	const SEARCH_TEMP_DB = 'search_tmp.db';
45
-
46
-	/**
47
-	 * Creates a new temporary search db, cleans it if it exists
48
-	 * then calculates and stores the search index in this db
49
-	 * and finally if indexing completed replaces the current search
50
-	 * db with the temporary one. Returns the log in string format.
51
-	 * @return string
52
-	 */
53
-	public function updateIndex()
54
-	{
55
-		$this->startLogging();
56
-		$this->addLog('Indexing start.');
57
-		$this->addLog('Clearing index.');
58
-		$this->resetIndex();
59
-		$this->addLog('Cleaning Published Deleted Documents');
60
-		$this->storage->getDocuments()->cleanPublishedDeletedDocuments();
61
-		$this->addLog('Retrieving documents to be indexed.');
62
-		$documents = $this->storage->getDocuments()->getPublishedDocumentsNoFolders();
63
-		$this->addLog('Start Document Term Count for ' . count($documents) . ' documents');
64
-		$this->createDocumentTermCount($documents);
65
-		$this->addLog('Start Document Term Frequency.');
66
-		$this->createDocumentTermFrequency();
67
-		$this->addLog('Start Term Field Length Norm.');
68
-		$this->createTermFieldLengthNorm();
69
-		$this->addLog('Start Inverse Document Frequency.');
70
-		$this->createInverseDocumentFrequency();
71
-		$this->addLog('Replacing old index.');
72
-		$this->replaceOldIndex();
73
-		$this->addLog('Indexing complete.');
74
-		return $this->log;
75
-	}
76
-
77
-	/**
78
-	 * Count how often a term is used in a document
79
-	 *
80
-	 * @param $documents
81
-	 */
82
-	public function createDocumentTermCount($documents)
83
-	{
84
-		$termCount = new TermCount($this->getSearchDbHandle(), $documents, $this->filters, $this->storage);
85
-		$termCount->execute();
86
-	}
87
-
88
-	/**
89
-	 * Calculate the frequency index for a term with
90
-	 * a field
91
-	 */
92
-	public function createDocumentTermFrequency()
93
-	{
94
-		$termFrequency = new TermFrequency($this->getSearchDbHandle());
95
-		$termFrequency->execute();
96
-	}
97
-
98
-
99
-	/**
100
-	 * Resets the entire index
101
-	 */
102
-	public function resetIndex()
103
-	{
104
-		$db = $this->getSearchDbHandle();
105
-		$sql = '
25
+    const SQLITE_MAX_COMPOUND_SELECT = 100;
26
+    protected $filters = array(
27
+        'DutchStopWords',
28
+        'EnglishStopWords'
29
+    );
30
+    protected $storageDir;
31
+    /**
32
+     * @var double
33
+     */
34
+    protected $loggingStart;
35
+    /**
36
+     * @var string
37
+     */
38
+    protected $log;
39
+    /**
40
+     * @var double
41
+     */
42
+    protected $lastLog;
43
+
44
+    const SEARCH_TEMP_DB = 'search_tmp.db';
45
+
46
+    /**
47
+     * Creates a new temporary search db, cleans it if it exists
48
+     * then calculates and stores the search index in this db
49
+     * and finally if indexing completed replaces the current search
50
+     * db with the temporary one. Returns the log in string format.
51
+     * @return string
52
+     */
53
+    public function updateIndex()
54
+    {
55
+        $this->startLogging();
56
+        $this->addLog('Indexing start.');
57
+        $this->addLog('Clearing index.');
58
+        $this->resetIndex();
59
+        $this->addLog('Cleaning Published Deleted Documents');
60
+        $this->storage->getDocuments()->cleanPublishedDeletedDocuments();
61
+        $this->addLog('Retrieving documents to be indexed.');
62
+        $documents = $this->storage->getDocuments()->getPublishedDocumentsNoFolders();
63
+        $this->addLog('Start Document Term Count for ' . count($documents) . ' documents');
64
+        $this->createDocumentTermCount($documents);
65
+        $this->addLog('Start Document Term Frequency.');
66
+        $this->createDocumentTermFrequency();
67
+        $this->addLog('Start Term Field Length Norm.');
68
+        $this->createTermFieldLengthNorm();
69
+        $this->addLog('Start Inverse Document Frequency.');
70
+        $this->createInverseDocumentFrequency();
71
+        $this->addLog('Replacing old index.');
72
+        $this->replaceOldIndex();
73
+        $this->addLog('Indexing complete.');
74
+        return $this->log;
75
+    }
76
+
77
+    /**
78
+     * Count how often a term is used in a document
79
+     *
80
+     * @param $documents
81
+     */
82
+    public function createDocumentTermCount($documents)
83
+    {
84
+        $termCount = new TermCount($this->getSearchDbHandle(), $documents, $this->filters, $this->storage);
85
+        $termCount->execute();
86
+    }
87
+
88
+    /**
89
+     * Calculate the frequency index for a term with
90
+     * a field
91
+     */
92
+    public function createDocumentTermFrequency()
93
+    {
94
+        $termFrequency = new TermFrequency($this->getSearchDbHandle());
95
+        $termFrequency->execute();
96
+    }
97
+
98
+
99
+    /**
100
+     * Resets the entire index
101
+     */
102
+    public function resetIndex()
103
+    {
104
+        $db = $this->getSearchDbHandle();
105
+        $sql = '
106 106
 			DELETE FROM term_count;
107 107
 			DELETE FROM term_frequency;
108 108
 			DELETE FROM inverse_document_frequency;
@@ -110,82 +110,82 @@  discard block
 block discarded – undo
110 110
 			UPDATE `sqlite_sequence` SET `seq`= 0 WHERE `name`=\'term_frequency\';
111 111
 			UPDATE `sqlite_sequence` SET `seq`= 0 WHERE `name`=\'inverse_document_frequency\';
112 112
 		';
113
-		$db->exec($sql);
114
-	}
115
-
116
-	/**
117
-	 * Calculates the inverse document frequency for each
118
-	 * term. This is a representation of how often a certain
119
-	 * term is used in comparison to all terms.
120
-	 */
121
-	public function createInverseDocumentFrequency()
122
-	{
123
-		$documentCount = $this->getTotalDocumentCount();
124
-		$inverseDocumentFrequency = new InverseDocumentFrequency($this->getSearchDbHandle(), $documentCount);
125
-		$inverseDocumentFrequency->execute();
126
-	}
127
-
128
-	/**
129
-	 * @return int|mixed
130
-	 */
131
-	private function getTotalDocumentCount()
132
-	{
133
-		return $this->storage->getDocuments()->getTotalDocumentCount();
134
-	}
135
-
136
-	/**
137
-	 * Calculates the Term Field Length Norm.
138
-	 * This is an index determining how important a
139
-	 * term is, based on the total length of the field
140
-	 * it comes from.
141
-	 */
142
-	public function createTermFieldLengthNorm()
143
-	{
144
-		$termFieldLengthNorm = new TermFieldLengthNorm($this->getSearchDbHandle());
145
-		$termFieldLengthNorm->execute();
146
-	}
147
-
148
-	/**
149
-	 * Stores the time the indexing started in memory
150
-	 */
151
-	private function startLogging()
152
-	{
153
-		$this->loggingStart = round(microtime(true) * 1000);
154
-		$this->lastLog = $this->loggingStart;
155
-	}
156
-
157
-	/**
158
-	 * Adds a logline with the time since last log
159
-	 * @param $string
160
-	 */
161
-	private function addLog($string)
162
-	{
163
-		$currentTime = round(microtime(true) * 1000);
164
-		$this->log .= date('d-m-Y H:i:s - ') . str_pad($string, 50, " ", STR_PAD_RIGHT) . "\t" . ($currentTime - $this->lastLog) . 'ms since last log. ' . "\t" . ($currentTime - $this->loggingStart) . 'ms since start.' . PHP_EOL;
165
-		$this->lastLog = round(microtime(true) * 1000);
166
-	}
167
-
168
-	/**
169
-	 * Creates the SQLite \PDO object if it doesnt
170
-	 * exist and returns it.
171
-	 * @return \PDO
172
-	 */
173
-	protected function getSearchDbHandle()
174
-	{
175
-		if ($this->searchDbHandle === null) {
176
-			$path = $this->storageDir . DIRECTORY_SEPARATOR;
177
-			$this->searchDbHandle = new \PDO('sqlite:' . $path . self::SEARCH_TEMP_DB);
178
-		}
179
-		return $this->searchDbHandle;
180
-	}
181
-
182
-	/**
183
-	 * Replaces the old search index database with the new one.
184
-	 */
185
-	public function replaceOldIndex()
186
-	{
187
-		$this->searchDbHandle = null;
188
-		$path = $this->storageDir . DIRECTORY_SEPARATOR;
189
-		rename($path . self::SEARCH_TEMP_DB, $path . 'search.db');
190
-	}
113
+        $db->exec($sql);
114
+    }
115
+
116
+    /**
117
+     * Calculates the inverse document frequency for each
118
+     * term. This is a representation of how often a certain
119
+     * term is used in comparison to all terms.
120
+     */
121
+    public function createInverseDocumentFrequency()
122
+    {
123
+        $documentCount = $this->getTotalDocumentCount();
124
+        $inverseDocumentFrequency = new InverseDocumentFrequency($this->getSearchDbHandle(), $documentCount);
125
+        $inverseDocumentFrequency->execute();
126
+    }
127
+
128
+    /**
129
+     * @return int|mixed
130
+     */
131
+    private function getTotalDocumentCount()
132
+    {
133
+        return $this->storage->getDocuments()->getTotalDocumentCount();
134
+    }
135
+
136
+    /**
137
+     * Calculates the Term Field Length Norm.
138
+     * This is an index determining how important a
139
+     * term is, based on the total length of the field
140
+     * it comes from.
141
+     */
142
+    public function createTermFieldLengthNorm()
143
+    {
144
+        $termFieldLengthNorm = new TermFieldLengthNorm($this->getSearchDbHandle());
145
+        $termFieldLengthNorm->execute();
146
+    }
147
+
148
+    /**
149
+     * Stores the time the indexing started in memory
150
+     */
151
+    private function startLogging()
152
+    {
153
+        $this->loggingStart = round(microtime(true) * 1000);
154
+        $this->lastLog = $this->loggingStart;
155
+    }
156
+
157
+    /**
158
+     * Adds a logline with the time since last log
159
+     * @param $string
160
+     */
161
+    private function addLog($string)
162
+    {
163
+        $currentTime = round(microtime(true) * 1000);
164
+        $this->log .= date('d-m-Y H:i:s - ') . str_pad($string, 50, " ", STR_PAD_RIGHT) . "\t" . ($currentTime - $this->lastLog) . 'ms since last log. ' . "\t" . ($currentTime - $this->loggingStart) . 'ms since start.' . PHP_EOL;
165
+        $this->lastLog = round(microtime(true) * 1000);
166
+    }
167
+
168
+    /**
169
+     * Creates the SQLite \PDO object if it doesnt
170
+     * exist and returns it.
171
+     * @return \PDO
172
+     */
173
+    protected function getSearchDbHandle()
174
+    {
175
+        if ($this->searchDbHandle === null) {
176
+            $path = $this->storageDir . DIRECTORY_SEPARATOR;
177
+            $this->searchDbHandle = new \PDO('sqlite:' . $path . self::SEARCH_TEMP_DB);
178
+        }
179
+        return $this->searchDbHandle;
180
+    }
181
+
182
+    /**
183
+     * Replaces the old search index database with the new one.
184
+     */
185
+    public function replaceOldIndex()
186
+    {
187
+        $this->searchDbHandle = null;
188
+        $path = $this->storageDir . DIRECTORY_SEPARATOR;
189
+        rename($path . self::SEARCH_TEMP_DB, $path . 'search.db');
190
+    }
191 191
 }
192 192
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/results/SearchResult.php 1 patch
Indentation   +39 added lines, -39 removed lines patch added patch discarded remove patch
@@ -13,43 +13,43 @@
 block discarded – undo
13 13
 
14 14
 class SearchResult
15 15
 {
16
-	/**
17
-	 * @var string
18
-	 */
19
-	public $documentPath;
20
-	/**
21
-	 * @var array
22
-	 */
23
-	public $matchingTokens;
24
-	/**
25
-	 * @var float
26
-	 */
27
-	public $score;
28
-
29
-	protected $document;
30
-	/**
31
-	 * @var Storage
32
-	 */
33
-	protected $storage;
34
-
35
-	/**
36
-	 * @return Document
37
-	 */
38
-	public function getDocument()
39
-	{
40
-		if ($this->document instanceof Document) {
41
-			return $this->document;
42
-		} else {
43
-			$this->document = $this->storage->getDocuments()->getDocumentBySlug(substr($this->documentPath, 1));
44
-			$this->document->dbHandle = $this->storage->getContentDbHandle();
45
-			$this->document->documentStorage = $this->storage->getRepository();
46
-
47
-			return $this->document;
48
-		}
49
-	}
50
-
51
-	public function setStorage($storage)
52
-	{
53
-		$this->storage = $storage;
54
-	}
16
+    /**
17
+     * @var string
18
+     */
19
+    public $documentPath;
20
+    /**
21
+     * @var array
22
+     */
23
+    public $matchingTokens;
24
+    /**
25
+     * @var float
26
+     */
27
+    public $score;
28
+
29
+    protected $document;
30
+    /**
31
+     * @var Storage
32
+     */
33
+    protected $storage;
34
+
35
+    /**
36
+     * @return Document
37
+     */
38
+    public function getDocument()
39
+    {
40
+        if ($this->document instanceof Document) {
41
+            return $this->document;
42
+        } else {
43
+            $this->document = $this->storage->getDocuments()->getDocumentBySlug(substr($this->documentPath, 1));
44
+            $this->document->dbHandle = $this->storage->getContentDbHandle();
45
+            $this->document->documentStorage = $this->storage->getRepository();
46
+
47
+            return $this->document;
48
+        }
49
+    }
50
+
51
+    public function setStorage($storage)
52
+    {
53
+        $this->storage = $storage;
54
+    }
55 55
 }
56 56
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/results/SearchSuggestion.php 1 patch
Indentation   +3 added lines, -3 removed lines patch added patch discarded remove patch
@@ -10,7 +10,7 @@
 block discarded – undo
10 10
 
11 11
 class SearchSuggestion
12 12
 {
13
-	public $original;
14
-	public $term;
15
-	public $editDistance;
13
+    public $original;
14
+    public $term;
15
+    public $editDistance;
16 16
 }
17 17
\ No newline at end of file
Please login to merge, or discard this patch.