Passed
Push — master ( 8c639f...f61691 )
by Jens
05:01
created
src/search/indexer/InverseDocumentFrequency.php 1 patch
Indentation   +39 added lines, -39 removed lines patch added patch discarded remove patch
@@ -10,51 +10,51 @@
 block discarded – undo
10 10
 
11 11
 class InverseDocumentFrequency
12 12
 {
13
-	/**
14
-	 * @var \PDO
15
-	 */
16
-	protected $dbHandle;
17
-	protected $documentCount;
13
+    /**
14
+     * @var \PDO
15
+     */
16
+    protected $dbHandle;
17
+    protected $documentCount;
18 18
 
19
-	/**
20
-	 * InverseDocumentFrequency constructor.
21
-	 *
22
-	 * @param \PDO 	$dbHandle
23
-	 * @param int   $documentCount
24
-	 */
25
-	public function __construct($dbHandle, $documentCount)
26
-	{
27
-		$this->dbHandle = $dbHandle;
28
-		$this->documentCount = $documentCount;
29
-	}
19
+    /**
20
+     * InverseDocumentFrequency constructor.
21
+     *
22
+     * @param \PDO 	$dbHandle
23
+     * @param int   $documentCount
24
+     */
25
+    public function __construct($dbHandle, $documentCount)
26
+    {
27
+        $this->dbHandle = $dbHandle;
28
+        $this->documentCount = $documentCount;
29
+    }
30 30
 
31
-	/**
32
-	 * Formula to calculate:
33
-	 * 		idf(t) = 1 + log ( totalDocuments / (documentsThatContainTheTerm + 1))
34
-	 * @throws \Exception
35
-	 */
36
-	public function execute()
37
-	{
38
-		$db = $this->dbHandle;
39
-		$db->sqliteCreateFunction('log', 'log', 1);
40
-		$sql = '
31
+    /**
32
+     * Formula to calculate:
33
+     * 		idf(t) = 1 + log ( totalDocuments / (documentsThatContainTheTerm + 1))
34
+     * @throws \Exception
35
+     */
36
+    public function execute()
37
+    {
38
+        $db = $this->dbHandle;
39
+        $db->sqliteCreateFunction('log', 'log', 1);
40
+        $sql = '
41 41
 		INSERT INTO inverse_document_frequency (term, inverseDocumentFrequency)
42 42
 		SELECT DISTINCT term, (1+(log(:documentCount / COUNT(documentPath) + 1))) as inverseDocumentFrequency
43 43
 					  FROM term_count
44 44
 				  GROUP BY term
45 45
 		';
46 46
 
47
-		if (!$stmt = $db->prepare($sql)) {
48
-			$errorInfo = $db->errorInfo();
49
-			$errorMsg = $errorInfo[2];
50
-			throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
51
-		}
52
-		$stmt->bindValue(':documentCount', $this->documentCount);
53
-		$result = $stmt->execute();
54
-		if ($result === false) {
55
-			$errorInfo = $db->errorInfo();
56
-			$errorMsg = $errorInfo[2];
57
-			throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
58
-		}
59
-	}
47
+        if (!$stmt = $db->prepare($sql)) {
48
+            $errorInfo = $db->errorInfo();
49
+            $errorMsg = $errorInfo[2];
50
+            throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
51
+        }
52
+        $stmt->bindValue(':documentCount', $this->documentCount);
53
+        $result = $stmt->execute();
54
+        if ($result === false) {
55
+            $errorInfo = $db->errorInfo();
56
+            $errorMsg = $errorInfo[2];
57
+            throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
58
+        }
59
+    }
60 60
 }
61 61
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/indexer/TermCount.php 1 patch
Indentation   +100 added lines, -100 removed lines patch added patch discarded remove patch
@@ -14,113 +14,113 @@
 block discarded – undo
14 14
 
15 15
 class TermCount
16 16
 {
17
-	/**
18
-	 * @var \PDO
19
-	 */
20
-	protected $dbHandle;
21
-	protected $documents;
22
-	protected $filters;
23
-	protected $storage;
17
+    /**
18
+     * @var \PDO
19
+     */
20
+    protected $dbHandle;
21
+    protected $documents;
22
+    protected $filters;
23
+    protected $storage;
24 24
 
25
-	/**
26
-	 * TermCount constructor.
27
-	 *
28
-	 * @param \PDO    $dbHandle
29
-	 * @param array   $documents
30
-	 * @param array   $filters
31
-	 * @param Storage $jsonStorage
32
-	 */
33
-	public function __construct($dbHandle, $documents, $filters, $jsonStorage)
34
-	{
35
-		$this->dbHandle = $dbHandle;
36
-		$this->documents = $documents;
37
-		$this->filters = $filters;
38
-		$this->storage = $jsonStorage;
39
-	}
25
+    /**
26
+     * TermCount constructor.
27
+     *
28
+     * @param \PDO    $dbHandle
29
+     * @param array   $documents
30
+     * @param array   $filters
31
+     * @param Storage $jsonStorage
32
+     */
33
+    public function __construct($dbHandle, $documents, $filters, $jsonStorage)
34
+    {
35
+        $this->dbHandle = $dbHandle;
36
+        $this->documents = $documents;
37
+        $this->filters = $filters;
38
+        $this->storage = $jsonStorage;
39
+    }
40 40
 
41
-	public function execute()
42
-	{
43
-		$this->iterateDocumentsAndCreateTermCount($this->documents);
44
-	}
41
+    public function execute()
42
+    {
43
+        $this->iterateDocumentsAndCreateTermCount($this->documents);
44
+    }
45 45
 
46
-	protected function applyFilters($tokens)
47
-	{
48
-		foreach ($this->filters as $filterName) {
49
-			$filterClassName = '\CloudControl\Cms\search\filters\\' . $filterName;
50
-			$filter = new $filterClassName($tokens);
51
-			$tokens = $filter->getFilterResults();
52
-		}
53
-		return $tokens;
54
-	}
46
+    protected function applyFilters($tokens)
47
+    {
48
+        foreach ($this->filters as $filterName) {
49
+            $filterClassName = '\CloudControl\Cms\search\filters\\' . $filterName;
50
+            $filter = new $filterClassName($tokens);
51
+            $tokens = $filter->getFilterResults();
52
+        }
53
+        return $tokens;
54
+    }
55 55
 
56
-	protected function storeDocumentTermCount($document, $documentTermCount)
57
-	{
58
-		$db = $this->dbHandle;
59
-		$sqlStart = '
56
+    protected function storeDocumentTermCount($document, $documentTermCount)
57
+    {
58
+        $db = $this->dbHandle;
59
+        $sqlStart = '
60 60
 			INSERT INTO `term_count` (`documentPath`, `term`, `count`, `field`)
61 61
 				 VALUES ';
62
-		$sql = $sqlStart;
63
-		$values = array();
64
-		$quotedDocumentPath = $db->quote($document->path);
65
-		$i = 0;
66
-		foreach ($documentTermCount as $field => $countArray) {
67
-			$quotedField = $db->quote($field);
68
-			foreach ($countArray as $term => $count) {
69
-				$values[] = $quotedDocumentPath . ', ' . $db->quote($term) . ', ' . $db->quote($count) . ', ' . $quotedField;
70
-				$i += 1;
71
-				if ($i >= Indexer::SQLITE_MAX_COMPOUND_SELECT) {
72
-					$this->executeStoreDocumentTermCount($values, $sql, $db);
73
-					$values = array();
74
-					$sql = $sqlStart;
75
-					$i = 0;
76
-				}
77
-			}
78
-		}
79
-		if (count($values) != 0) {
80
-			$this->executeStoreDocumentTermCount($values, $sql, $db);
81
-		}
82
-	}
62
+        $sql = $sqlStart;
63
+        $values = array();
64
+        $quotedDocumentPath = $db->quote($document->path);
65
+        $i = 0;
66
+        foreach ($documentTermCount as $field => $countArray) {
67
+            $quotedField = $db->quote($field);
68
+            foreach ($countArray as $term => $count) {
69
+                $values[] = $quotedDocumentPath . ', ' . $db->quote($term) . ', ' . $db->quote($count) . ', ' . $quotedField;
70
+                $i += 1;
71
+                if ($i >= Indexer::SQLITE_MAX_COMPOUND_SELECT) {
72
+                    $this->executeStoreDocumentTermCount($values, $sql, $db);
73
+                    $values = array();
74
+                    $sql = $sqlStart;
75
+                    $i = 0;
76
+                }
77
+            }
78
+        }
79
+        if (count($values) != 0) {
80
+            $this->executeStoreDocumentTermCount($values, $sql, $db);
81
+        }
82
+    }
83 83
 
84
-	/**
85
-	 * @param $values
86
-	 * @param $sql
87
-	 * @param $db
88
-	 *
89
-	 * @throws \Exception
90
-	 */
91
-	protected function executeStoreDocumentTermCount($values, $sql, $db)
92
-	{
93
-		$sql .= '(' . implode('),' . PHP_EOL . '(', $values) . ');';
94
-		$stmt = $db->prepare($sql);
95
-		if ($stmt === false || !$stmt->execute()) {
96
-			$errorInfo = $db->errorInfo();
97
-			$errorMsg = $errorInfo[2];
98
-			throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
99
-		}
100
-	}
84
+    /**
85
+     * @param $values
86
+     * @param $sql
87
+     * @param $db
88
+     *
89
+     * @throws \Exception
90
+     */
91
+    protected function executeStoreDocumentTermCount($values, $sql, $db)
92
+    {
93
+        $sql .= '(' . implode('),' . PHP_EOL . '(', $values) . ');';
94
+        $stmt = $db->prepare($sql);
95
+        if ($stmt === false || !$stmt->execute()) {
96
+            $errorInfo = $db->errorInfo();
97
+            $errorMsg = $errorInfo[2];
98
+            throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
99
+        }
100
+    }
101 101
 
102
-	/**
103
-	 * @param $document
104
-	 */
105
-	private function createTermCountForDocument($document)
106
-	{
107
-		$tokenizer = new DocumentTokenizer($document, $this->storage);
108
-		$tokens = $tokenizer->getTokens();
109
-		$documentTermCount = $this->applyFilters($tokens);
110
-		$this->storeDocumentTermCount($document, $documentTermCount);
111
-	}
102
+    /**
103
+     * @param $document
104
+     */
105
+    private function createTermCountForDocument($document)
106
+    {
107
+        $tokenizer = new DocumentTokenizer($document, $this->storage);
108
+        $tokens = $tokenizer->getTokens();
109
+        $documentTermCount = $this->applyFilters($tokens);
110
+        $this->storeDocumentTermCount($document, $documentTermCount);
111
+    }
112 112
 
113
-	/**
114
-	 * @param $documents
115
-	 */
116
-	private function iterateDocumentsAndCreateTermCount($documents)
117
-	{
118
-		foreach ($documents as $document) {
119
-			if ($document->type === 'folder') {
120
-				$this->iterateDocumentsAndCreateTermCount($document->content);
121
-			} else {
122
-				$this->createTermCountForDocument($document);
123
-			}
124
-		}
125
-	}
113
+    /**
114
+     * @param $documents
115
+     */
116
+    private function iterateDocumentsAndCreateTermCount($documents)
117
+    {
118
+        foreach ($documents as $document) {
119
+            if ($document->type === 'folder') {
120
+                $this->iterateDocumentsAndCreateTermCount($document->content);
121
+            } else {
122
+                $this->createTermCountForDocument($document);
123
+            }
124
+        }
125
+    }
126 126
 }
127 127
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/indexer/TermFieldLengthNorm.php 1 patch
Indentation   +61 added lines, -61 removed lines patch added patch discarded remove patch
@@ -17,72 +17,72 @@
 block discarded – undo
17 17
  */
18 18
 class TermFieldLengthNorm
19 19
 {
20
-	/**
21
-	 * @var \PDO
22
-	 */
23
-	protected $dbHandle;
20
+    /**
21
+     * @var \PDO
22
+     */
23
+    protected $dbHandle;
24 24
 
25
-	/**
26
-	 * TermFieldLengthNorm constructor.
27
-	 *
28
-	 * @param \PDO $dbHandle
29
-	 */
30
-	public function __construct($dbHandle)
31
-	{
32
-		$this->dbHandle = $dbHandle;
33
-	}
25
+    /**
26
+     * TermFieldLengthNorm constructor.
27
+     *
28
+     * @param \PDO $dbHandle
29
+     */
30
+    public function __construct($dbHandle)
31
+    {
32
+        $this->dbHandle = $dbHandle;
33
+    }
34 34
 
35
-	public function execute()
36
-	{
37
-		$db = $this->dbHandle;
38
-		$db->sqliteCreateFunction('sqrt', 'sqrt', 1);
39
-		$sql = '
35
+    public function execute()
36
+    {
37
+        $db = $this->dbHandle;
38
+        $db->sqliteCreateFunction('sqrt', 'sqrt', 1);
39
+        $sql = '
40 40
 		SELECT documentPath, field, COUNT(`count`) as termCount
41 41
 		  FROM term_count
42 42
 	  GROUP BY documentPath, field
43 43
 		';
44
-		$stmt = $db->prepare($sql);
45
-		if ($stmt === false) {
46
-			$errorInfo = $db->errorInfo();
47
-			$errorMsg = $errorInfo[2];
48
-			throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
49
-		}
50
-		if (($stmt->execute()) === false) {
51
-			$errorInfo = $db->errorInfo();
52
-			$errorMsg = $errorInfo[2];
53
-			throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
54
-		}
55
-		$uniqueFieldsPerDocument = $stmt->fetchAll(\PDO::FETCH_OBJ);
56
-		$values = array();
57
-		$i = 0;
58
-		foreach ($uniqueFieldsPerDocument as $fieldRow) {
59
-			$values[] = 'UPDATE term_frequency SET termNorm = 1/sqrt(' . intval($fieldRow->termCount) . ') WHERE documentPath = ' . $db->quote($fieldRow->documentPath) . ' AND field = ' . $db->quote($fieldRow->field) . ';';
60
-			$i += 1;
61
-			if ($i >= Indexer::SQLITE_MAX_COMPOUND_SELECT) {
62
-				$this->executeUpdateTermNorm($values, $db);
63
-				$values = array();
64
-				$i = 0;
65
-			}
66
-		}
67
-		if (count($values) != 0) {
68
-			$this->executeUpdateTermNorm($values, $db);
69
-		}
70
-	}
44
+        $stmt = $db->prepare($sql);
45
+        if ($stmt === false) {
46
+            $errorInfo = $db->errorInfo();
47
+            $errorMsg = $errorInfo[2];
48
+            throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
49
+        }
50
+        if (($stmt->execute()) === false) {
51
+            $errorInfo = $db->errorInfo();
52
+            $errorMsg = $errorInfo[2];
53
+            throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
54
+        }
55
+        $uniqueFieldsPerDocument = $stmt->fetchAll(\PDO::FETCH_OBJ);
56
+        $values = array();
57
+        $i = 0;
58
+        foreach ($uniqueFieldsPerDocument as $fieldRow) {
59
+            $values[] = 'UPDATE term_frequency SET termNorm = 1/sqrt(' . intval($fieldRow->termCount) . ') WHERE documentPath = ' . $db->quote($fieldRow->documentPath) . ' AND field = ' . $db->quote($fieldRow->field) . ';';
60
+            $i += 1;
61
+            if ($i >= Indexer::SQLITE_MAX_COMPOUND_SELECT) {
62
+                $this->executeUpdateTermNorm($values, $db);
63
+                $values = array();
64
+                $i = 0;
65
+            }
66
+        }
67
+        if (count($values) != 0) {
68
+            $this->executeUpdateTermNorm($values, $db);
69
+        }
70
+    }
71 71
 
72
-	/**
73
-	 * @param array $values
74
-	 * @param \PDO $db
75
-	 * @throws \Exception
76
-	 */
77
-	private function executeUpdateTermNorm($values, $db)
78
-	{
79
-		$sql  = 'BEGIN TRANSACTION;' . PHP_EOL;
80
-		$sql .= implode(PHP_EOL, $values) . PHP_EOL;
81
-		$sql .= 'COMMIT;';
82
-		if (($db->exec($sql)) === false) {
83
-			$errorInfo = $db->errorInfo();
84
-			$errorMsg = $errorInfo[2];
85
-			throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
86
-		}
87
-	}
72
+    /**
73
+     * @param array $values
74
+     * @param \PDO $db
75
+     * @throws \Exception
76
+     */
77
+    private function executeUpdateTermNorm($values, $db)
78
+    {
79
+        $sql  = 'BEGIN TRANSACTION;' . PHP_EOL;
80
+        $sql .= implode(PHP_EOL, $values) . PHP_EOL;
81
+        $sql .= 'COMMIT;';
82
+        if (($db->exec($sql)) === false) {
83
+            $errorInfo = $db->errorInfo();
84
+            $errorMsg = $errorInfo[2];
85
+            throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
86
+        }
87
+    }
88 88
 }
89 89
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/Search.php 2 patches
Indentation   +225 added lines, -225 removed lines patch added patch discarded remove patch
@@ -26,117 +26,117 @@  discard block
 block discarded – undo
26 26
  */
27 27
 class Search extends SearchDbConnected
28 28
 {
29
-	/**
30
-	 * @var Tokenizer
31
-	 */
32
-	protected $tokenizer;
33
-	protected $results = array();
29
+    /**
30
+     * @var Tokenizer
31
+     */
32
+    protected $tokenizer;
33
+    protected $results = array();
34 34
 
35
-	/**
36
-	 * An array containing classes implementing \CloudControl\Cms\search\Filters
37
-	 * These will be applied to all tokenizers
38
-	 * @var array
39
-	 */
40
-	protected $filters = array(
41
-		'DutchStopWords',
42
-		'EnglishStopWords'
43
-	);
35
+    /**
36
+     * An array containing classes implementing \CloudControl\Cms\search\Filters
37
+     * These will be applied to all tokenizers
38
+     * @var array
39
+     */
40
+    protected $filters = array(
41
+        'DutchStopWords',
42
+        'EnglishStopWords'
43
+    );
44 44
 
45
-	/**
46
-	 * Returns an array of SeachResult and / or SearchSuggestion objects,
47
-	 * based on the tokens in the Tokenizer
48
-	 * @param Tokenizer $tokenizer
49
-	 *
50
-	 * @return array
51
-	 */
52
-	public function getDocumentsForTokenizer(Tokenizer $tokenizer)
53
-	{
54
-		$this->tokenizer = $tokenizer;
55
-		$resultsPerTokens = $this->queryTokens();
45
+    /**
46
+     * Returns an array of SeachResult and / or SearchSuggestion objects,
47
+     * based on the tokens in the Tokenizer
48
+     * @param Tokenizer $tokenizer
49
+     *
50
+     * @return array
51
+     */
52
+    public function getDocumentsForTokenizer(Tokenizer $tokenizer)
53
+    {
54
+        $this->tokenizer = $tokenizer;
55
+        $resultsPerTokens = $this->queryTokens();
56 56
 
57
-		$flatResults = $this->flattenResults($resultsPerTokens);
58
-		$flatResults = $this->applyQueryCoordination($flatResults);
59
-		usort($flatResults, array($this, "scoreCompare"));
57
+        $flatResults = $this->flattenResults($resultsPerTokens);
58
+        $flatResults = $this->applyQueryCoordination($flatResults);
59
+        usort($flatResults, array($this, "scoreCompare"));
60 60
 
61
-		$flatResults = array_merge($this->getSearchSuggestions(), $flatResults);
61
+        $flatResults = array_merge($this->getSearchSuggestions(), $flatResults);
62 62
 
63
-		return $flatResults;
64
-	}
63
+        return $flatResults;
64
+    }
65 65
 
66
-	/**
67
-	 * Returns the amount of distinct documents
68
-	 * that are currently in the search index.
69
-	 * @return int
70
-	 * @throws \Exception
71
-	 */
72
-	public function getIndexedDocuments()
73
-	{
74
-		$db = $this->getSearchDbHandle();
75
-		$sql = '
66
+    /**
67
+     * Returns the amount of distinct documents
68
+     * that are currently in the search index.
69
+     * @return int
70
+     * @throws \Exception
71
+     */
72
+    public function getIndexedDocuments()
73
+    {
74
+        $db = $this->getSearchDbHandle();
75
+        $sql = '
76 76
 			SELECT count(DISTINCT documentPath) as indexedDocuments
77 77
 			  FROM term_frequency
78 78
 		';
79
-		if (!$stmt = $db->query($sql)) {
80
-			$errorInfo = $db->errorInfo();
81
-			$errorMsg = $errorInfo[2];
82
-			throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
83
-		}
84
-		$result = $stmt->fetch(\PDO::FETCH_COLUMN);
85
-		if (false === $result) {
86
-			$errorInfo = $db->errorInfo();
87
-			$errorMsg = $errorInfo[2];
88
-			throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
89
-		}
90
-		return intval($result);
91
-	}
79
+        if (!$stmt = $db->query($sql)) {
80
+            $errorInfo = $db->errorInfo();
81
+            $errorMsg = $errorInfo[2];
82
+            throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
83
+        }
84
+        $result = $stmt->fetch(\PDO::FETCH_COLUMN);
85
+        if (false === $result) {
86
+            $errorInfo = $db->errorInfo();
87
+            $errorMsg = $errorInfo[2];
88
+            throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>');
89
+        }
90
+        return intval($result);
91
+    }
92 92
 
93
-	/**
94
-	 * Queries each token present in the Tokenizer
95
-	 * and returns SearchResult objects for the found
96
-	 * documents
97
-	 * @return array
98
-	 */
99
-	private function queryTokens()
100
-	{
101
-		$tokens = $this->getTokens();
93
+    /**
94
+     * Queries each token present in the Tokenizer
95
+     * and returns SearchResult objects for the found
96
+     * documents
97
+     * @return array
98
+     */
99
+    private function queryTokens()
100
+    {
101
+        $tokens = $this->getTokens();
102 102
 
103
-		$queryNorm = $this->getQueryNorm($tokens);
104
-		$results = array();
105
-		foreach ($tokens as $token) {
106
-			$results[$token] = $this->getResultsForToken($token, $queryNorm);
107
-		}
108
-		return $results;
109
-	}
103
+        $queryNorm = $this->getQueryNorm($tokens);
104
+        $results = array();
105
+        foreach ($tokens as $token) {
106
+            $results[$token] = $this->getResultsForToken($token, $queryNorm);
107
+        }
108
+        return $results;
109
+    }
110 110
 
111
-	/**
112
-	 * Applies the Filter objects in the the filter array to the
113
-	 * tokens in the Tokenizer
114
-	 * @param $tokens
115
-	 *
116
-	 * @return mixed
117
-	 */
118
-	protected function applyFilters($tokens)
119
-	{
120
-		foreach ($this->filters as $filterName) {
121
-			$filterClassName = '\CloudControl\Cms\search\filters\\' . $filterName;
122
-			$filter = new $filterClassName($tokens);
123
-			$tokens = $filter->getFilterResults();
124
-		}
125
-		return $tokens;
126
-	}
111
+    /**
112
+     * Applies the Filter objects in the the filter array to the
113
+     * tokens in the Tokenizer
114
+     * @param $tokens
115
+     *
116
+     * @return mixed
117
+     */
118
+    protected function applyFilters($tokens)
119
+    {
120
+        foreach ($this->filters as $filterName) {
121
+            $filterClassName = '\CloudControl\Cms\search\filters\\' . $filterName;
122
+            $filter = new $filterClassName($tokens);
123
+            $tokens = $filter->getFilterResults();
124
+        }
125
+        return $tokens;
126
+    }
127 127
 
128
-	/**
129
-	 * Queries the search index for a given token
130
-	 * and the query norm.
131
-	 * @param $token
132
-	 * @param $queryNorm
133
-	 *
134
-	 * @return array
135
-	 * @throws \Exception
136
-	 */
137
-	public function getResultsForToken($token, $queryNorm) {
138
-		$db = $this->getSearchDbHandle();
139
-		$sql = '
128
+    /**
129
+     * Queries the search index for a given token
130
+     * and the query norm.
131
+     * @param $token
132
+     * @param $queryNorm
133
+     *
134
+     * @return array
135
+     * @throws \Exception
136
+     */
137
+    public function getResultsForToken($token, $queryNorm) {
138
+        $db = $this->getSearchDbHandle();
139
+        $sql = '
140 140
 			SELECT (:queryNorm * 
141 141
 						(SUM(term_frequency.frequency) --TF
142 142
 						* inverse_document_frequency.inverseDocumentFrequency -- IDF
@@ -154,113 +154,113 @@  discard block
 block discarded – undo
154 154
 		  GROUP BY term_frequency.documentPath, term_frequency.term
155 155
 		  ORDER BY score DESC
156 156
 		';
157
-		if(!$stmt = $db->prepare($sql)) {
158
-			throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
159
-		}
160
-		$stmt->bindValue(':query', $token);
161
-		$stmt->bindValue(':queryNorm', $queryNorm);
162
-		if (!$stmt->execute()) {
163
-			throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
164
-		}
165
-		return $stmt->fetchAll(\PDO::FETCH_CLASS, '\CloudControl\Cms\search\results\SearchResult');
166
-	}
157
+        if(!$stmt = $db->prepare($sql)) {
158
+            throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
159
+        }
160
+        $stmt->bindValue(':query', $token);
161
+        $stmt->bindValue(':queryNorm', $queryNorm);
162
+        if (!$stmt->execute()) {
163
+            throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
164
+        }
165
+        return $stmt->fetchAll(\PDO::FETCH_CLASS, '\CloudControl\Cms\search\results\SearchResult');
166
+    }
167 167
 
168
-	/**
169
-	 * @param $resultsPerTokens
170
-	 *
171
-	 * @return array
172
-	 */
173
-	private function flattenResults($resultsPerTokens)
174
-	{
175
-		$finalResults = array();
176
-		foreach ($resultsPerTokens as $token => $resultPerToken) {
177
-			foreach ($resultPerToken as $result) {
178
-				if (isset($finalResults[$result->documentPath])) {
179
-					$finalResults[$result->documentPath]->score += $result->score;
180
-					$finalResults[$result->documentPath]->matchingTokens[] = $token;
181
-				} else {
182
-					$resultObj = new SearchResult();
183
-					$resultObj->documentPath = $result->documentPath;
184
-					$resultObj->matchingTokens = array($token);
185
-					$resultObj->score = floatval($result->score);
186
-					$resultObj->setStorage($this->storage);
187
-					$finalResults[$result->documentPath] = $resultObj;
188
-				}
189
-			}
190
-		}
191
-		return $finalResults;
192
-	}
168
+    /**
169
+     * @param $resultsPerTokens
170
+     *
171
+     * @return array
172
+     */
173
+    private function flattenResults($resultsPerTokens)
174
+    {
175
+        $finalResults = array();
176
+        foreach ($resultsPerTokens as $token => $resultPerToken) {
177
+            foreach ($resultPerToken as $result) {
178
+                if (isset($finalResults[$result->documentPath])) {
179
+                    $finalResults[$result->documentPath]->score += $result->score;
180
+                    $finalResults[$result->documentPath]->matchingTokens[] = $token;
181
+                } else {
182
+                    $resultObj = new SearchResult();
183
+                    $resultObj->documentPath = $result->documentPath;
184
+                    $resultObj->matchingTokens = array($token);
185
+                    $resultObj->score = floatval($result->score);
186
+                    $resultObj->setStorage($this->storage);
187
+                    $finalResults[$result->documentPath] = $resultObj;
188
+                }
189
+            }
190
+        }
191
+        return $finalResults;
192
+    }
193 193
 
194
-	private function scoreCompare($a, $b) {
195
-		if ($a->score == $b->score) {
196
-			return 0;
197
-		}
198
-		return ($a->score > $b->score) ? -1 : 1;
199
-	}
194
+    private function scoreCompare($a, $b) {
195
+        if ($a->score == $b->score) {
196
+            return 0;
197
+        }
198
+        return ($a->score > $b->score) ? -1 : 1;
199
+    }
200 200
 
201
-	/**
202
-	 * Calculates the query norm for all tokens in the Tokenizer
203
-	 * @param $tokens
204
-	 *
205
-	 * @return int
206
-	 * @throws \Exception
207
-	 */
208
-	private function getQueryNorm($tokens)
209
-	{
210
-		$db = $this->getSearchDbHandle();
211
-		$db->sqliteCreateFunction('sqrt', 'sqrt', 1);
212
-		foreach ($tokens as $key => $token) {
213
-			$tokens[$key] = $db->quote($token);
214
-		}
215
-		$terms = implode(',', $tokens);
216
-		$sql = '
201
+    /**
202
+     * Calculates the query norm for all tokens in the Tokenizer
203
+     * @param $tokens
204
+     *
205
+     * @return int
206
+     * @throws \Exception
207
+     */
208
+    private function getQueryNorm($tokens)
209
+    {
210
+        $db = $this->getSearchDbHandle();
211
+        $db->sqliteCreateFunction('sqrt', 'sqrt', 1);
212
+        foreach ($tokens as $key => $token) {
213
+            $tokens[$key] = $db->quote($token);
214
+        }
215
+        $terms = implode(',', $tokens);
216
+        $sql = '
217 217
 			SELECT (1 / sqrt(SUM(inverseDocumentFrequency))) as queryNorm
218 218
 			  FROM inverse_document_frequency
219 219
 			 WHERE term IN (' . $terms . ') 
220 220
 		';
221
-		if(!$stmt = $db->prepare($sql)) {
222
-			throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
223
-		}
224
-		if (!$stmt->execute()) {
225
-			throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
226
-		}
227
-		$result = $stmt->fetch(\PDO::FETCH_OBJ);
228
-		return $result->queryNorm == null ? 1 : $result->queryNorm;
229
-	}
221
+        if(!$stmt = $db->prepare($sql)) {
222
+            throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
223
+        }
224
+        if (!$stmt->execute()) {
225
+            throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
226
+        }
227
+        $result = $stmt->fetch(\PDO::FETCH_OBJ);
228
+        return $result->queryNorm == null ? 1 : $result->queryNorm;
229
+    }
230 230
 
231
-	/**
232
-	 * Applies query coordination to all results
233
-	 * @param $flatResults
234
-	 *
235
-	 * @return mixed
236
-	 */
237
-	private function applyQueryCoordination($flatResults)
238
-	{
239
-		$tokenVector = $this->tokenizer->getTokenVector();
240
-		$tokens = array_keys($tokenVector);
241
-		$tokenCount = count($tokens);
242
-		foreach ($flatResults as $key => $result) {
243
-			$matchCount = count($result->matchingTokens);
244
-			$result->score = ($matchCount / $tokenCount) * $result->score;
245
-			$flatResults[$key] = $result;
246
-		}
247
-		return $flatResults;
248
-	}
231
+    /**
232
+     * Applies query coordination to all results
233
+     * @param $flatResults
234
+     *
235
+     * @return mixed
236
+     */
237
+    private function applyQueryCoordination($flatResults)
238
+    {
239
+        $tokenVector = $this->tokenizer->getTokenVector();
240
+        $tokens = array_keys($tokenVector);
241
+        $tokenCount = count($tokens);
242
+        foreach ($flatResults as $key => $result) {
243
+            $matchCount = count($result->matchingTokens);
244
+            $result->score = ($matchCount / $tokenCount) * $result->score;
245
+            $flatResults[$key] = $result;
246
+        }
247
+        return $flatResults;
248
+    }
249 249
 
250
-	/**
251
-	 * Uses the levenshtein algorithm to determine the term that is
252
-	 * closest to the token that was input for the search
253
-	 * @return array
254
-	 * @throws \Exception
255
-	 */
256
-	private function getSearchSuggestions()
257
-	{
258
-		$tokens = $this->getTokens();
259
-		$allResults = array();
260
-		foreach ($tokens as $token) {
261
-			$db = $this->getSearchDbHandle();
262
-			$db->sqliteCreateFunction('levenshtein', 'levenshtein', 2);
263
-			$sql = '
250
+    /**
251
+     * Uses the levenshtein algorithm to determine the term that is
252
+     * closest to the token that was input for the search
253
+     * @return array
254
+     * @throws \Exception
255
+     */
256
+    private function getSearchSuggestions()
257
+    {
258
+        $tokens = $this->getTokens();
259
+        $allResults = array();
260
+        foreach ($tokens as $token) {
261
+            $db = $this->getSearchDbHandle();
262
+            $db->sqliteCreateFunction('levenshtein', 'levenshtein', 2);
263
+            $sql = '
264 264
 				SELECT *
265 265
 				  FROM (
266 266
 				  	SELECT :token as original, term, levenshtein(term, :token) as editDistance
@@ -270,35 +270,35 @@  discard block
 block discarded – undo
270 270
 			  	     )
271 271
 			  	   WHERE editDistance > 0
272 272
 			';
273
-			$stmt = $db->prepare($sql);
274
-			if ($stmt === false) {
275
-				throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
276
-			}
277
-			$stmt->bindValue(':token', $token);
278
-			if (($stmt === false) | (!$stmt->execute())) {
279
-				throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
280
-			}
281
-			$result = $stmt->fetchAll(\PDO::FETCH_CLASS, '\CloudControl\Cms\search\results\SearchSuggestion');
282
-			$allResults = array_merge($result, $allResults);
283
-		}
284
-		return $allResults;
285
-	}
273
+            $stmt = $db->prepare($sql);
274
+            if ($stmt === false) {
275
+                throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
276
+            }
277
+            $stmt->bindValue(':token', $token);
278
+            if (($stmt === false) | (!$stmt->execute())) {
279
+                throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
280
+            }
281
+            $result = $stmt->fetchAll(\PDO::FETCH_CLASS, '\CloudControl\Cms\search\results\SearchSuggestion');
282
+            $allResults = array_merge($result, $allResults);
283
+        }
284
+        return $allResults;
285
+    }
286 286
 
287
-	/**
288
-	 * Retrieves all tokens from the tokenizer
289
-	 * @return array
290
-	 */
291
-	private function getTokens()
292
-	{
293
-		$tokenVector = array(
294
-			'query' => array(),
295
-		);
296
-		$tokenVector['query'] = $this->tokenizer->getTokenVector();
297
-		$tokens = $this->applyFilters($tokenVector);
298
-		if (!empty($tokens)) {
299
-			$tokens = array_keys($tokens['query']);
300
-		}
287
+    /**
288
+     * Retrieves all tokens from the tokenizer
289
+     * @return array
290
+     */
291
+    private function getTokens()
292
+    {
293
+        $tokenVector = array(
294
+            'query' => array(),
295
+        );
296
+        $tokenVector['query'] = $this->tokenizer->getTokenVector();
297
+        $tokens = $this->applyFilters($tokenVector);
298
+        if (!empty($tokens)) {
299
+            $tokens = array_keys($tokens['query']);
300
+        }
301 301
 
302
-		return $tokens;
303
-	}
302
+        return $tokens;
303
+    }
304 304
 }
305 305
\ No newline at end of file
Please login to merge, or discard this patch.
Spacing   +2 added lines, -2 removed lines patch added patch discarded remove patch
@@ -154,7 +154,7 @@  discard block
 block discarded – undo
154 154
 		  GROUP BY term_frequency.documentPath, term_frequency.term
155 155
 		  ORDER BY score DESC
156 156
 		';
157
-		if(!$stmt = $db->prepare($sql)) {
157
+		if (!$stmt = $db->prepare($sql)) {
158 158
 			throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
159 159
 		}
160 160
 		$stmt->bindValue(':query', $token);
@@ -218,7 +218,7 @@  discard block
 block discarded – undo
218 218
 			  FROM inverse_document_frequency
219 219
 			 WHERE term IN (' . $terms . ') 
220 220
 		';
221
-		if(!$stmt = $db->prepare($sql)) {
221
+		if (!$stmt = $db->prepare($sql)) {
222 222
 			throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>');
223 223
 		}
224 224
 		if (!$stmt->execute()) {
Please login to merge, or discard this patch.
src/search/Tokenizer.php 1 patch
Indentation   +41 added lines, -41 removed lines patch added patch discarded remove patch
@@ -13,47 +13,47 @@
 block discarded – undo
13 13
  */
14 14
 class Tokenizer
15 15
 {
16
-	protected $inputString;
17
-	protected $tokenVector = array();
18
-
19
-	/**
20
-	 * Tokenizer constructor.
21
-	 *
22
-	 * @param string $string Should preferably be parsed wit \CloudControl\Cms\search\CharacterFilter
23
-	 * @see \CloudControl\Cms\search\CharacterFilter
24
-	 */
25
-	public function __construct($string)
26
-	{
27
-		$this->inputString = $string;
28
-		$this->tokenize();
29
-	}
30
-
31
-	protected function tokenize()
32
-	{
33
-		$tokens = explode(' ', $this->inputString);
34
-		foreach ($tokens as $token) {
35
-			$this->addTokenToVector($token);
36
-		}
37
-	}
38
-
39
-	protected function addTokenToVector($token)
40
-	{
41
-		if (!empty($token)) {
42
-			if (isset($this->tokenVector[$token])) {
43
-				$this->tokenVector[$token] += 1;
44
-			} else {
45
-				$this->tokenVector[$token] = 1;
46
-			}
47
-		}
48
-	}
49
-
50
-	/**
51
-	 * @return array
52
-	 */
53
-	public function getTokenVector()
54
-	{
55
-		return $this->tokenVector;
56
-	}
16
+    protected $inputString;
17
+    protected $tokenVector = array();
18
+
19
+    /**
20
+     * Tokenizer constructor.
21
+     *
22
+     * @param string $string Should preferably be parsed wit \CloudControl\Cms\search\CharacterFilter
23
+     * @see \CloudControl\Cms\search\CharacterFilter
24
+     */
25
+    public function __construct($string)
26
+    {
27
+        $this->inputString = $string;
28
+        $this->tokenize();
29
+    }
30
+
31
+    protected function tokenize()
32
+    {
33
+        $tokens = explode(' ', $this->inputString);
34
+        foreach ($tokens as $token) {
35
+            $this->addTokenToVector($token);
36
+        }
37
+    }
38
+
39
+    protected function addTokenToVector($token)
40
+    {
41
+        if (!empty($token)) {
42
+            if (isset($this->tokenVector[$token])) {
43
+                $this->tokenVector[$token] += 1;
44
+            } else {
45
+                $this->tokenVector[$token] = 1;
46
+            }
47
+        }
48
+    }
49
+
50
+    /**
51
+     * @return array
52
+     */
53
+    public function getTokenVector()
54
+    {
55
+        return $this->tokenVector;
56
+    }
57 57
 
58 58
 
59 59
 }
60 60
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/Filter.php 1 patch
Indentation   +10 added lines, -10 removed lines patch added patch discarded remove patch
@@ -11,15 +11,15 @@
 block discarded – undo
11 11
 
12 12
 interface Filter
13 13
 {
14
-	/**
15
-	 * Filter constructor.
16
-	 *
17
-	 * @param array $tokens
18
-	 */
19
-	public function __construct($tokens);
14
+    /**
15
+     * Filter constructor.
16
+     *
17
+     * @param array $tokens
18
+     */
19
+    public function __construct($tokens);
20 20
 
21
-	/**
22
-	 * @return array
23
-	 */
24
-	public function getFilterResults();
21
+    /**
22
+     * @return array
23
+     */
24
+    public function getFilterResults();
25 25
 }
26 26
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/DocumentTokenizer.php 2 patches
Indentation   +145 added lines, -145 removed lines patch added patch discarded remove patch
@@ -13,149 +13,149 @@
 block discarded – undo
13 13
 
14 14
 class DocumentTokenizer
15 15
 {
16
-	/**
17
-	 * @var Document
18
-	 */
19
-	protected $document;
20
-
21
-	/**
22
-	 * @var array
23
-	 */
24
-	protected $tokenVector = array();
25
-	protected $storage;
26
-
27
-	/**
28
-	 * Tokenizer constructor.
29
-	 *
30
-	 * @param \CloudControl\Cms\storage\Document $document
31
-	 * @param Storage                   $storage
32
-	 */
33
-	public function __construct(Document $document, Storage $storage)
34
-	{
35
-		$this->document = $document;
36
-		$this->storage = $storage;
37
-		$this->tokenize();
38
-	}
39
-
40
-	/**
41
-	 * Execute tokenization of all document fields
42
-	 */
43
-	private function tokenize()
44
-	{
45
-		$this->tokenizeTitle();
46
-		$this->tokenizeFields();
47
-		$this->tokenizeBricks();
48
-		$this->tokenizeDynamicBricks();
49
-		$this->tokenVector = array_filter($this->tokenVector);
50
-		arsort($this->tokenVector);
51
-	}
52
-
53
-	private function tokenizeTitle()
54
-	{
55
-		$filteredString = new CharacterFilter($this->document->title);
56
-		$tokenizer = new Tokenizer($filteredString);
57
-		$this->addTokenVectorToVector($tokenizer->getTokenVector(), 'title');
58
-	}
59
-
60
-	private function tokenizeFields()
61
-	{
62
-		$fields = $this->document->fields;
63
-		$documentDefinition = $this->storage->getDocumentTypes()->getDocumentTypeBySlug($this->document->documentTypeSlug);
64
-		foreach ($fields as $fieldName => $field) {
65
-			$fieldType = $this->getFieldType($fieldName, $documentDefinition);
66
-			$this->tokenizeField($field, $fieldName, $fieldType);
67
-		}
68
-	}
69
-
70
-	private function tokenizeField($field, $fieldName, $fieldType)
71
-	{
72
-		foreach ($field as $value) {
73
-			// Only index fields that contain text
74
-			if (in_array($fieldType, array('String', 'Text', 'Rich Text'))) {
75
-				$filteredString = new CharacterFilter($value);
76
-				$tokenizer = new Tokenizer($filteredString);
77
-				$this->addTokenVectorToVector($tokenizer->getTokenVector(), $fieldName);
78
-			}
79
-		}
80
-	}
81
-
82
-	private function tokenizeBricks()
83
-	{
84
-		$bricks = $this->document->bricks;
85
-		foreach ($bricks as $brickSlug => $bricks) {
86
-			foreach ($bricks as $brick) {
87
-				$this->tokenizeBrick($brick, $brickSlug);
88
-			}
89
-		}
90
-	}
91
-
92
-	private function tokenizeBrick($brick, $brickSlug)
93
-	{
94
-		$fields  = $brick->fields;
95
-		$brickDefinition = $this->storage->getBricks()->getBrickBySlug($brick->type);
96
-		foreach ($fields as $fieldName => $field) {
97
-			$fieldType = $this->getFieldType($fieldName, $brickDefinition);
98
-			$this->tokenizeField($field, $brickSlug . '__' . $fieldName, $fieldType);
99
-		}
100
-	}
101
-
102
-	private function tokenizeDynamicBricks()
103
-	{
104
-		$dynamicBricks = $this->document->dynamicBricks;
105
-		foreach ($dynamicBricks as $key => $brick) {
106
-			$this->tokenizeBrick($brick, 'dynamicBricks__' . $brick->type . $key);
107
-		}
108
-	}
109
-
110
-	public function getTokens()
111
-	{
112
-		return $this->tokenVector;
113
-	}
114
-
115
-	/**
116
-	 * Add a token to the existing tokenvector
117
-	 * @param     		$token
118
-	 * @param string    $field
119
-	 * @param int 		$count
120
-	 */
121
-	private function addTokenToVector($token, $field, $count = 1)
122
-	{
123
-		if (!empty($token)) {
124
-			if (isset($this->tokenVector[$field][$token])) {
125
-				$this->tokenVector[$field][$token] += $count;
126
-			} else {
127
-				$this->tokenVector[$field][$token] = $count;
128
-			}
129
-		}
130
-	}
131
-
132
-	/**
133
-	 * Add a complete token vector to the existing one.
134
-	 * @param $tokenVector
135
-	 * @param $field
136
-	 */
137
-	private function addTokenVectorToVector($tokenVector, $field)
138
-	{
139
-		foreach ($tokenVector as $token => $count) {
140
-			$this->addTokenToVector($token, $field, $count);
141
-		}
142
-	}
143
-
144
-	/**
145
-	 * Get the type for a field
146
-	 * @param $fieldName
147
-	 * @param $documentDefinition
148
-	 * @return mixed
149
-	 * @throws \Exception
150
-	 */
151
-	private function getFieldType($fieldName, $documentDefinition)
152
-	{
153
-		foreach ($documentDefinition->fields as $fieldTypeDefinition) {
154
-			if ($fieldTypeDefinition->slug === $fieldName) {
155
-				return $fieldTypeDefinition->type;
156
-			}
157
-		}
158
-
159
-		throw new \Exception('Unknown field type for field' . $fieldName . ' in document ' . $this->document->path);
160
-	}
16
+    /**
17
+     * @var Document
18
+     */
19
+    protected $document;
20
+
21
+    /**
22
+     * @var array
23
+     */
24
+    protected $tokenVector = array();
25
+    protected $storage;
26
+
27
+    /**
28
+     * Tokenizer constructor.
29
+     *
30
+     * @param \CloudControl\Cms\storage\Document $document
31
+     * @param Storage                   $storage
32
+     */
33
+    public function __construct(Document $document, Storage $storage)
34
+    {
35
+        $this->document = $document;
36
+        $this->storage = $storage;
37
+        $this->tokenize();
38
+    }
39
+
40
+    /**
41
+     * Execute tokenization of all document fields
42
+     */
43
+    private function tokenize()
44
+    {
45
+        $this->tokenizeTitle();
46
+        $this->tokenizeFields();
47
+        $this->tokenizeBricks();
48
+        $this->tokenizeDynamicBricks();
49
+        $this->tokenVector = array_filter($this->tokenVector);
50
+        arsort($this->tokenVector);
51
+    }
52
+
53
+    private function tokenizeTitle()
54
+    {
55
+        $filteredString = new CharacterFilter($this->document->title);
56
+        $tokenizer = new Tokenizer($filteredString);
57
+        $this->addTokenVectorToVector($tokenizer->getTokenVector(), 'title');
58
+    }
59
+
60
+    private function tokenizeFields()
61
+    {
62
+        $fields = $this->document->fields;
63
+        $documentDefinition = $this->storage->getDocumentTypes()->getDocumentTypeBySlug($this->document->documentTypeSlug);
64
+        foreach ($fields as $fieldName => $field) {
65
+            $fieldType = $this->getFieldType($fieldName, $documentDefinition);
66
+            $this->tokenizeField($field, $fieldName, $fieldType);
67
+        }
68
+    }
69
+
70
+    private function tokenizeField($field, $fieldName, $fieldType)
71
+    {
72
+        foreach ($field as $value) {
73
+            // Only index fields that contain text
74
+            if (in_array($fieldType, array('String', 'Text', 'Rich Text'))) {
75
+                $filteredString = new CharacterFilter($value);
76
+                $tokenizer = new Tokenizer($filteredString);
77
+                $this->addTokenVectorToVector($tokenizer->getTokenVector(), $fieldName);
78
+            }
79
+        }
80
+    }
81
+
82
+    private function tokenizeBricks()
83
+    {
84
+        $bricks = $this->document->bricks;
85
+        foreach ($bricks as $brickSlug => $bricks) {
86
+            foreach ($bricks as $brick) {
87
+                $this->tokenizeBrick($brick, $brickSlug);
88
+            }
89
+        }
90
+    }
91
+
92
+    private function tokenizeBrick($brick, $brickSlug)
93
+    {
94
+        $fields  = $brick->fields;
95
+        $brickDefinition = $this->storage->getBricks()->getBrickBySlug($brick->type);
96
+        foreach ($fields as $fieldName => $field) {
97
+            $fieldType = $this->getFieldType($fieldName, $brickDefinition);
98
+            $this->tokenizeField($field, $brickSlug . '__' . $fieldName, $fieldType);
99
+        }
100
+    }
101
+
102
+    private function tokenizeDynamicBricks()
103
+    {
104
+        $dynamicBricks = $this->document->dynamicBricks;
105
+        foreach ($dynamicBricks as $key => $brick) {
106
+            $this->tokenizeBrick($brick, 'dynamicBricks__' . $brick->type . $key);
107
+        }
108
+    }
109
+
110
+    public function getTokens()
111
+    {
112
+        return $this->tokenVector;
113
+    }
114
+
115
+    /**
116
+     * Add a token to the existing tokenvector
117
+     * @param     		$token
118
+     * @param string    $field
119
+     * @param int 		$count
120
+     */
121
+    private function addTokenToVector($token, $field, $count = 1)
122
+    {
123
+        if (!empty($token)) {
124
+            if (isset($this->tokenVector[$field][$token])) {
125
+                $this->tokenVector[$field][$token] += $count;
126
+            } else {
127
+                $this->tokenVector[$field][$token] = $count;
128
+            }
129
+        }
130
+    }
131
+
132
+    /**
133
+     * Add a complete token vector to the existing one.
134
+     * @param $tokenVector
135
+     * @param $field
136
+     */
137
+    private function addTokenVectorToVector($tokenVector, $field)
138
+    {
139
+        foreach ($tokenVector as $token => $count) {
140
+            $this->addTokenToVector($token, $field, $count);
141
+        }
142
+    }
143
+
144
+    /**
145
+     * Get the type for a field
146
+     * @param $fieldName
147
+     * @param $documentDefinition
148
+     * @return mixed
149
+     * @throws \Exception
150
+     */
151
+    private function getFieldType($fieldName, $documentDefinition)
152
+    {
153
+        foreach ($documentDefinition->fields as $fieldTypeDefinition) {
154
+            if ($fieldTypeDefinition->slug === $fieldName) {
155
+                return $fieldTypeDefinition->type;
156
+            }
157
+        }
158
+
159
+        throw new \Exception('Unknown field type for field' . $fieldName . ' in document ' . $this->document->path);
160
+    }
161 161
 }
162 162
\ No newline at end of file
Please login to merge, or discard this patch.
Spacing   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -91,7 +91,7 @@
 block discarded – undo
91 91
 
92 92
 	private function tokenizeBrick($brick, $brickSlug)
93 93
 	{
94
-		$fields  = $brick->fields;
94
+		$fields = $brick->fields;
95 95
 		$brickDefinition = $this->storage->getBricks()->getBrickBySlug($brick->type);
96 96
 		foreach ($fields as $fieldName => $field) {
97 97
 			$fieldType = $this->getFieldType($fieldName, $brickDefinition);
Please login to merge, or discard this patch.
src/search/filters/EnglishStopWords.php 1 patch
Indentation   +1 added lines, -1 removed lines patch added patch discarded remove patch
@@ -10,5 +10,5 @@
 block discarded – undo
10 10
 
11 11
 class EnglishStopWords extends StopWordsFilter
12 12
 {
13
-	protected $stopWords = array('a', 'about', 'above', 'after', 'again', 'against', 'all', 'am', 'an', 'and', 'any', 'are', 'arent', 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'cant', 'cannot', 'could', 'couldnt', 'did', 'didnt', 'do', 'does', 'doesnt', 'doing', 'dont', 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'hadnt', 'has', 'hasnt', 'have', 'havent', 'having', 'he', 'hed', 'hell', 'hes', 'her', 'here', 'heres', 'hers', 'herself', 'him', 'himself', 'his', 'how', 'hows', 'i', 'id', 'ill', 'im', 'ive', 'if', 'in', 'into', 'is', 'isnt', 'it', 'its', 'its', 'itself', 'lets', 'me', 'more', 'most', 'mustnt', 'my', 'myself', 'no', 'nor', 'not', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'ought', 'our', 'ours	ourselves', 'out', 'over', 'own', 'same', 'shant', 'she', 'shed', 'shell', 'shes', 'should', 'shouldnt', 'so', 'some', 'such', 'than', 'that', 'thats', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'there', 'theres', 'these', 'they', 'theyd', 'theyll', 'theyre', 'theyve', 'this', 'those', 'through', 'to', 'too', 'under', 'until', 'up', 'very', 'was', 'wasnt', 'we', 'wed', 'well', 'were', 'weve', 'were', 'werent', 'what', 'whats', 'when', 'whens', 'where', 'wheres', 'which', 'while', 'who', 'whos', 'whom', 'why', 'whys', 'with', 'wont', 'would', 'wouldnt', 'you', 'youd', 'youll', 'youre', 'youve', 'your', 'yours', 'yourself', 'yourselves');
13
+    protected $stopWords = array('a', 'about', 'above', 'after', 'again', 'against', 'all', 'am', 'an', 'and', 'any', 'are', 'arent', 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'cant', 'cannot', 'could', 'couldnt', 'did', 'didnt', 'do', 'does', 'doesnt', 'doing', 'dont', 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'hadnt', 'has', 'hasnt', 'have', 'havent', 'having', 'he', 'hed', 'hell', 'hes', 'her', 'here', 'heres', 'hers', 'herself', 'him', 'himself', 'his', 'how', 'hows', 'i', 'id', 'ill', 'im', 'ive', 'if', 'in', 'into', 'is', 'isnt', 'it', 'its', 'its', 'itself', 'lets', 'me', 'more', 'most', 'mustnt', 'my', 'myself', 'no', 'nor', 'not', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'ought', 'our', 'ours	ourselves', 'out', 'over', 'own', 'same', 'shant', 'she', 'shed', 'shell', 'shes', 'should', 'shouldnt', 'so', 'some', 'such', 'than', 'that', 'thats', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'there', 'theres', 'these', 'they', 'theyd', 'theyll', 'theyre', 'theyve', 'this', 'those', 'through', 'to', 'too', 'under', 'until', 'up', 'very', 'was', 'wasnt', 'we', 'wed', 'well', 'were', 'weve', 'were', 'werent', 'what', 'whats', 'when', 'whens', 'where', 'wheres', 'which', 'while', 'who', 'whos', 'whom', 'why', 'whys', 'with', 'wont', 'would', 'wouldnt', 'you', 'youd', 'youll', 'youre', 'youve', 'your', 'yours', 'yourself', 'yourselves');
14 14
 }
15 15
\ No newline at end of file
Please login to merge, or discard this patch.
src/search/Indexer.php 1 patch
Indentation   +159 added lines, -159 removed lines patch added patch discarded remove patch
@@ -22,87 +22,87 @@  discard block
 block discarded – undo
22 22
  */
23 23
 class Indexer extends SearchDbConnected
24 24
 {
25
-	const SQLITE_MAX_COMPOUND_SELECT = 100;
26
-	protected $filters = array(
27
-		'DutchStopWords',
28
-		'EnglishStopWords'
29
-	);
30
-	protected $storageDir;
31
-	/**
32
-	 * @var double
33
-	 */
34
-	protected $loggingStart;
35
-	/**
36
-	 * @var string
37
-	 */
38
-	protected $log;
39
-	/**
40
-	 * @var double
41
-	 */
42
-	protected $lastLog;
43
-
44
-	const SEARCH_TEMP_DB = 'search_tmp.db';
45
-
46
-	/**
47
-	 * Creates a new temporary search db, cleans it if it exists
48
-	 * then calculates and stores the search index in this db
49
-	 * and finally if indexing completed replaces the current search
50
-	 * db with the temporary one. Returns the log in string format.
51
-	 * @return string
52
-	 */
53
-	public function updateIndex()
54
-	{
55
-		$this->startLogging();
56
-		$this->addLog('Indexing start.');
57
-		$this->addLog('Clearing index.');
58
-		$this->resetIndex();
59
-		$this->addLog('Cleaning Published Deleted Documents');
60
-		$this->storage->getDocuments()->cleanPublishedDeletedDocuments();
61
-		$this->addLog('Retrieving documents to be indexed.');
62
-		$documents = $this->storage->getDocuments()->getPublishedDocumentsNoFolders();
63
-		$this->addLog('Start Document Term Count for ' . count($documents) . ' documents');
64
-		$this->createDocumentTermCount($documents);
65
-		$this->addLog('Start Document Term Frequency.');
66
-		$this->createDocumentTermFrequency();
67
-		$this->addLog('Start Term Field Length Norm.');
68
-		$this->createTermFieldLengthNorm();
69
-		$this->addLog('Start Inverse Document Frequency.');
70
-		$this->createInverseDocumentFrequency();
71
-		$this->addLog('Replacing old index.');
72
-		$this->replaceOldIndex();
73
-		$this->addLog('Indexing complete.');
74
-		return $this->log;
75
-	}
76
-
77
-	/**
78
-	 * Count how often a term is used in a document
79
-	 *
80
-	 * @param $documents
81
-	 */
82
-	public function createDocumentTermCount($documents)
83
-	{
84
-		$termCount = new TermCount($this->getSearchDbHandle(), $documents, $this->filters, $this->storage);
85
-		$termCount->execute();
86
-	}
87
-
88
-	/**
89
-	 * Calculate the frequency index for a term with
90
-	 * a field
91
-	 */
92
-	public function createDocumentTermFrequency()
93
-	{
94
-		$termFrequency = new TermFrequency($this->getSearchDbHandle());
95
-		$termFrequency->execute();
96
-	}
97
-
98
-
99
-	/**
100
-	 * Resets the entire index
101
-	 */
102
-	public function resetIndex()
103
-	{
104
-		$db = $this->getSearchDbHandle();
105
-		$sql = '
25
+    const SQLITE_MAX_COMPOUND_SELECT = 100;
26
+    protected $filters = array(
27
+        'DutchStopWords',
28
+        'EnglishStopWords'
29
+    );
30
+    protected $storageDir;
31
+    /**
32
+     * @var double
33
+     */
34
+    protected $loggingStart;
35
+    /**
36
+     * @var string
37
+     */
38
+    protected $log;
39
+    /**
40
+     * @var double
41
+     */
42
+    protected $lastLog;
43
+
44
+    const SEARCH_TEMP_DB = 'search_tmp.db';
45
+
46
+    /**
47
+     * Creates a new temporary search db, cleans it if it exists
48
+     * then calculates and stores the search index in this db
49
+     * and finally if indexing completed replaces the current search
50
+     * db with the temporary one. Returns the log in string format.
51
+     * @return string
52
+     */
53
+    public function updateIndex()
54
+    {
55
+        $this->startLogging();
56
+        $this->addLog('Indexing start.');
57
+        $this->addLog('Clearing index.');
58
+        $this->resetIndex();
59
+        $this->addLog('Cleaning Published Deleted Documents');
60
+        $this->storage->getDocuments()->cleanPublishedDeletedDocuments();
61
+        $this->addLog('Retrieving documents to be indexed.');
62
+        $documents = $this->storage->getDocuments()->getPublishedDocumentsNoFolders();
63
+        $this->addLog('Start Document Term Count for ' . count($documents) . ' documents');
64
+        $this->createDocumentTermCount($documents);
65
+        $this->addLog('Start Document Term Frequency.');
66
+        $this->createDocumentTermFrequency();
67
+        $this->addLog('Start Term Field Length Norm.');
68
+        $this->createTermFieldLengthNorm();
69
+        $this->addLog('Start Inverse Document Frequency.');
70
+        $this->createInverseDocumentFrequency();
71
+        $this->addLog('Replacing old index.');
72
+        $this->replaceOldIndex();
73
+        $this->addLog('Indexing complete.');
74
+        return $this->log;
75
+    }
76
+
77
+    /**
78
+     * Count how often a term is used in a document
79
+     *
80
+     * @param $documents
81
+     */
82
+    public function createDocumentTermCount($documents)
83
+    {
84
+        $termCount = new TermCount($this->getSearchDbHandle(), $documents, $this->filters, $this->storage);
85
+        $termCount->execute();
86
+    }
87
+
88
+    /**
89
+     * Calculate the frequency index for a term with
90
+     * a field
91
+     */
92
+    public function createDocumentTermFrequency()
93
+    {
94
+        $termFrequency = new TermFrequency($this->getSearchDbHandle());
95
+        $termFrequency->execute();
96
+    }
97
+
98
+
99
+    /**
100
+     * Resets the entire index
101
+     */
102
+    public function resetIndex()
103
+    {
104
+        $db = $this->getSearchDbHandle();
105
+        $sql = '
106 106
 			DELETE FROM term_count;
107 107
 			DELETE FROM term_frequency;
108 108
 			DELETE FROM inverse_document_frequency;
@@ -110,82 +110,82 @@  discard block
 block discarded – undo
110 110
 			UPDATE `sqlite_sequence` SET `seq`= 0 WHERE `name`=\'term_frequency\';
111 111
 			UPDATE `sqlite_sequence` SET `seq`= 0 WHERE `name`=\'inverse_document_frequency\';
112 112
 		';
113
-		$db->exec($sql);
114
-	}
115
-
116
-	/**
117
-	 * Calculates the inverse document frequency for each
118
-	 * term. This is a representation of how often a certain
119
-	 * term is used in comparison to all terms.
120
-	 */
121
-	public function createInverseDocumentFrequency()
122
-	{
123
-		$documentCount = $this->getTotalDocumentCount();
124
-		$inverseDocumentFrequency = new InverseDocumentFrequency($this->getSearchDbHandle(), $documentCount);
125
-		$inverseDocumentFrequency->execute();
126
-	}
127
-
128
-	/**
129
-	 * @return int|mixed
130
-	 */
131
-	private function getTotalDocumentCount()
132
-	{
133
-		return $this->storage->getDocuments()->getTotalDocumentCount();
134
-	}
135
-
136
-	/**
137
-	 * Calculates the Term Field Length Norm.
138
-	 * This is an index determining how important a
139
-	 * term is, based on the total length of the field
140
-	 * it comes from.
141
-	 */
142
-	public function createTermFieldLengthNorm()
143
-	{
144
-		$termFieldLengthNorm = new TermFieldLengthNorm($this->getSearchDbHandle());
145
-		$termFieldLengthNorm->execute();
146
-	}
147
-
148
-	/**
149
-	 * Stores the time the indexing started in memory
150
-	 */
151
-	private function startLogging()
152
-	{
153
-		$this->loggingStart = round(microtime(true) * 1000);
154
-		$this->lastLog = $this->loggingStart;
155
-	}
156
-
157
-	/**
158
-	 * Adds a logline with the time since last log
159
-	 * @param $string
160
-	 */
161
-	private function addLog($string)
162
-	{
163
-		$currentTime = round(microtime(true) * 1000);
164
-		$this->log .= date('d-m-Y H:i:s - ') . str_pad($string, 50, " ", STR_PAD_RIGHT) . "\t" . ($currentTime - $this->lastLog) . 'ms since last log. ' . "\t" . ($currentTime - $this->loggingStart) . 'ms since start.' . PHP_EOL;
165
-		$this->lastLog = round(microtime(true) * 1000);
166
-	}
167
-
168
-	/**
169
-	 * Creates the SQLite \PDO object if it doesnt
170
-	 * exist and returns it.
171
-	 * @return \PDO
172
-	 */
173
-	protected function getSearchDbHandle()
174
-	{
175
-		if ($this->searchDbHandle === null) {
176
-			$path = $this->storageDir . DIRECTORY_SEPARATOR;
177
-			$this->searchDbHandle = new \PDO('sqlite:' . $path . self::SEARCH_TEMP_DB);
178
-		}
179
-		return $this->searchDbHandle;
180
-	}
181
-
182
-	/**
183
-	 * Replaces the old search index database with the new one.
184
-	 */
185
-	public function replaceOldIndex()
186
-	{
187
-		$this->searchDbHandle = null;
188
-		$path = $this->storageDir . DIRECTORY_SEPARATOR;
189
-		rename($path . self::SEARCH_TEMP_DB, $path . 'search.db');
190
-	}
113
+        $db->exec($sql);
114
+    }
115
+
116
+    /**
117
+     * Calculates the inverse document frequency for each
118
+     * term. This is a representation of how often a certain
119
+     * term is used in comparison to all terms.
120
+     */
121
+    public function createInverseDocumentFrequency()
122
+    {
123
+        $documentCount = $this->getTotalDocumentCount();
124
+        $inverseDocumentFrequency = new InverseDocumentFrequency($this->getSearchDbHandle(), $documentCount);
125
+        $inverseDocumentFrequency->execute();
126
+    }
127
+
128
+    /**
129
+     * @return int|mixed
130
+     */
131
+    private function getTotalDocumentCount()
132
+    {
133
+        return $this->storage->getDocuments()->getTotalDocumentCount();
134
+    }
135
+
136
+    /**
137
+     * Calculates the Term Field Length Norm.
138
+     * This is an index determining how important a
139
+     * term is, based on the total length of the field
140
+     * it comes from.
141
+     */
142
+    public function createTermFieldLengthNorm()
143
+    {
144
+        $termFieldLengthNorm = new TermFieldLengthNorm($this->getSearchDbHandle());
145
+        $termFieldLengthNorm->execute();
146
+    }
147
+
148
+    /**
149
+     * Stores the time the indexing started in memory
150
+     */
151
+    private function startLogging()
152
+    {
153
+        $this->loggingStart = round(microtime(true) * 1000);
154
+        $this->lastLog = $this->loggingStart;
155
+    }
156
+
157
+    /**
158
+     * Adds a logline with the time since last log
159
+     * @param $string
160
+     */
161
+    private function addLog($string)
162
+    {
163
+        $currentTime = round(microtime(true) * 1000);
164
+        $this->log .= date('d-m-Y H:i:s - ') . str_pad($string, 50, " ", STR_PAD_RIGHT) . "\t" . ($currentTime - $this->lastLog) . 'ms since last log. ' . "\t" . ($currentTime - $this->loggingStart) . 'ms since start.' . PHP_EOL;
165
+        $this->lastLog = round(microtime(true) * 1000);
166
+    }
167
+
168
+    /**
169
+     * Creates the SQLite \PDO object if it doesnt
170
+     * exist and returns it.
171
+     * @return \PDO
172
+     */
173
+    protected function getSearchDbHandle()
174
+    {
175
+        if ($this->searchDbHandle === null) {
176
+            $path = $this->storageDir . DIRECTORY_SEPARATOR;
177
+            $this->searchDbHandle = new \PDO('sqlite:' . $path . self::SEARCH_TEMP_DB);
178
+        }
179
+        return $this->searchDbHandle;
180
+    }
181
+
182
+    /**
183
+     * Replaces the old search index database with the new one.
184
+     */
185
+    public function replaceOldIndex()
186
+    {
187
+        $this->searchDbHandle = null;
188
+        $path = $this->storageDir . DIRECTORY_SEPARATOR;
189
+        rename($path . self::SEARCH_TEMP_DB, $path . 'search.db');
190
+    }
191 191
 }
192 192
\ No newline at end of file
Please login to merge, or discard this patch.