@@ -10,51 +10,51 @@ |
||
10 | 10 | |
11 | 11 | class InverseDocumentFrequency |
12 | 12 | { |
13 | - /** |
|
14 | - * @var \PDO |
|
15 | - */ |
|
16 | - protected $dbHandle; |
|
17 | - protected $documentCount; |
|
13 | + /** |
|
14 | + * @var \PDO |
|
15 | + */ |
|
16 | + protected $dbHandle; |
|
17 | + protected $documentCount; |
|
18 | 18 | |
19 | - /** |
|
20 | - * InverseDocumentFrequency constructor. |
|
21 | - * |
|
22 | - * @param \PDO $dbHandle |
|
23 | - * @param int $documentCount |
|
24 | - */ |
|
25 | - public function __construct($dbHandle, $documentCount) |
|
26 | - { |
|
27 | - $this->dbHandle = $dbHandle; |
|
28 | - $this->documentCount = $documentCount; |
|
29 | - } |
|
19 | + /** |
|
20 | + * InverseDocumentFrequency constructor. |
|
21 | + * |
|
22 | + * @param \PDO $dbHandle |
|
23 | + * @param int $documentCount |
|
24 | + */ |
|
25 | + public function __construct($dbHandle, $documentCount) |
|
26 | + { |
|
27 | + $this->dbHandle = $dbHandle; |
|
28 | + $this->documentCount = $documentCount; |
|
29 | + } |
|
30 | 30 | |
31 | - /** |
|
32 | - * Formula to calculate: |
|
33 | - * idf(t) = 1 + log ( totalDocuments / (documentsThatContainTheTerm + 1)) |
|
34 | - * @throws \Exception |
|
35 | - */ |
|
36 | - public function execute() |
|
37 | - { |
|
38 | - $db = $this->dbHandle; |
|
39 | - $db->sqliteCreateFunction('log', 'log', 1); |
|
40 | - $sql = ' |
|
31 | + /** |
|
32 | + * Formula to calculate: |
|
33 | + * idf(t) = 1 + log ( totalDocuments / (documentsThatContainTheTerm + 1)) |
|
34 | + * @throws \Exception |
|
35 | + */ |
|
36 | + public function execute() |
|
37 | + { |
|
38 | + $db = $this->dbHandle; |
|
39 | + $db->sqliteCreateFunction('log', 'log', 1); |
|
40 | + $sql = ' |
|
41 | 41 | INSERT INTO inverse_document_frequency (term, inverseDocumentFrequency) |
42 | 42 | SELECT DISTINCT term, (1+(log(:documentCount / COUNT(documentPath) + 1))) as inverseDocumentFrequency |
43 | 43 | FROM term_count |
44 | 44 | GROUP BY term |
45 | 45 | '; |
46 | 46 | |
47 | - if (!$stmt = $db->prepare($sql)) { |
|
48 | - $errorInfo = $db->errorInfo(); |
|
49 | - $errorMsg = $errorInfo[2]; |
|
50 | - throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
51 | - } |
|
52 | - $stmt->bindValue(':documentCount', $this->documentCount); |
|
53 | - $result = $stmt->execute(); |
|
54 | - if ($result === false) { |
|
55 | - $errorInfo = $db->errorInfo(); |
|
56 | - $errorMsg = $errorInfo[2]; |
|
57 | - throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
58 | - } |
|
59 | - } |
|
47 | + if (!$stmt = $db->prepare($sql)) { |
|
48 | + $errorInfo = $db->errorInfo(); |
|
49 | + $errorMsg = $errorInfo[2]; |
|
50 | + throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
51 | + } |
|
52 | + $stmt->bindValue(':documentCount', $this->documentCount); |
|
53 | + $result = $stmt->execute(); |
|
54 | + if ($result === false) { |
|
55 | + $errorInfo = $db->errorInfo(); |
|
56 | + $errorMsg = $errorInfo[2]; |
|
57 | + throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
58 | + } |
|
59 | + } |
|
60 | 60 | } |
61 | 61 | \ No newline at end of file |
@@ -14,113 +14,113 @@ |
||
14 | 14 | |
15 | 15 | class TermCount |
16 | 16 | { |
17 | - /** |
|
18 | - * @var \PDO |
|
19 | - */ |
|
20 | - protected $dbHandle; |
|
21 | - protected $documents; |
|
22 | - protected $filters; |
|
23 | - protected $storage; |
|
17 | + /** |
|
18 | + * @var \PDO |
|
19 | + */ |
|
20 | + protected $dbHandle; |
|
21 | + protected $documents; |
|
22 | + protected $filters; |
|
23 | + protected $storage; |
|
24 | 24 | |
25 | - /** |
|
26 | - * TermCount constructor. |
|
27 | - * |
|
28 | - * @param \PDO $dbHandle |
|
29 | - * @param array $documents |
|
30 | - * @param array $filters |
|
31 | - * @param Storage $jsonStorage |
|
32 | - */ |
|
33 | - public function __construct($dbHandle, $documents, $filters, $jsonStorage) |
|
34 | - { |
|
35 | - $this->dbHandle = $dbHandle; |
|
36 | - $this->documents = $documents; |
|
37 | - $this->filters = $filters; |
|
38 | - $this->storage = $jsonStorage; |
|
39 | - } |
|
25 | + /** |
|
26 | + * TermCount constructor. |
|
27 | + * |
|
28 | + * @param \PDO $dbHandle |
|
29 | + * @param array $documents |
|
30 | + * @param array $filters |
|
31 | + * @param Storage $jsonStorage |
|
32 | + */ |
|
33 | + public function __construct($dbHandle, $documents, $filters, $jsonStorage) |
|
34 | + { |
|
35 | + $this->dbHandle = $dbHandle; |
|
36 | + $this->documents = $documents; |
|
37 | + $this->filters = $filters; |
|
38 | + $this->storage = $jsonStorage; |
|
39 | + } |
|
40 | 40 | |
41 | - public function execute() |
|
42 | - { |
|
43 | - $this->iterateDocumentsAndCreateTermCount($this->documents); |
|
44 | - } |
|
41 | + public function execute() |
|
42 | + { |
|
43 | + $this->iterateDocumentsAndCreateTermCount($this->documents); |
|
44 | + } |
|
45 | 45 | |
46 | - protected function applyFilters($tokens) |
|
47 | - { |
|
48 | - foreach ($this->filters as $filterName) { |
|
49 | - $filterClassName = '\CloudControl\Cms\search\filters\\' . $filterName; |
|
50 | - $filter = new $filterClassName($tokens); |
|
51 | - $tokens = $filter->getFilterResults(); |
|
52 | - } |
|
53 | - return $tokens; |
|
54 | - } |
|
46 | + protected function applyFilters($tokens) |
|
47 | + { |
|
48 | + foreach ($this->filters as $filterName) { |
|
49 | + $filterClassName = '\CloudControl\Cms\search\filters\\' . $filterName; |
|
50 | + $filter = new $filterClassName($tokens); |
|
51 | + $tokens = $filter->getFilterResults(); |
|
52 | + } |
|
53 | + return $tokens; |
|
54 | + } |
|
55 | 55 | |
56 | - protected function storeDocumentTermCount($document, $documentTermCount) |
|
57 | - { |
|
58 | - $db = $this->dbHandle; |
|
59 | - $sqlStart = ' |
|
56 | + protected function storeDocumentTermCount($document, $documentTermCount) |
|
57 | + { |
|
58 | + $db = $this->dbHandle; |
|
59 | + $sqlStart = ' |
|
60 | 60 | INSERT INTO `term_count` (`documentPath`, `term`, `count`, `field`) |
61 | 61 | VALUES '; |
62 | - $sql = $sqlStart; |
|
63 | - $values = array(); |
|
64 | - $quotedDocumentPath = $db->quote($document->path); |
|
65 | - $i = 0; |
|
66 | - foreach ($documentTermCount as $field => $countArray) { |
|
67 | - $quotedField = $db->quote($field); |
|
68 | - foreach ($countArray as $term => $count) { |
|
69 | - $values[] = $quotedDocumentPath . ', ' . $db->quote($term) . ', ' . $db->quote($count) . ', ' . $quotedField; |
|
70 | - $i += 1; |
|
71 | - if ($i >= Indexer::SQLITE_MAX_COMPOUND_SELECT) { |
|
72 | - $this->executeStoreDocumentTermCount($values, $sql, $db); |
|
73 | - $values = array(); |
|
74 | - $sql = $sqlStart; |
|
75 | - $i = 0; |
|
76 | - } |
|
77 | - } |
|
78 | - } |
|
79 | - if (count($values) != 0) { |
|
80 | - $this->executeStoreDocumentTermCount($values, $sql, $db); |
|
81 | - } |
|
82 | - } |
|
62 | + $sql = $sqlStart; |
|
63 | + $values = array(); |
|
64 | + $quotedDocumentPath = $db->quote($document->path); |
|
65 | + $i = 0; |
|
66 | + foreach ($documentTermCount as $field => $countArray) { |
|
67 | + $quotedField = $db->quote($field); |
|
68 | + foreach ($countArray as $term => $count) { |
|
69 | + $values[] = $quotedDocumentPath . ', ' . $db->quote($term) . ', ' . $db->quote($count) . ', ' . $quotedField; |
|
70 | + $i += 1; |
|
71 | + if ($i >= Indexer::SQLITE_MAX_COMPOUND_SELECT) { |
|
72 | + $this->executeStoreDocumentTermCount($values, $sql, $db); |
|
73 | + $values = array(); |
|
74 | + $sql = $sqlStart; |
|
75 | + $i = 0; |
|
76 | + } |
|
77 | + } |
|
78 | + } |
|
79 | + if (count($values) != 0) { |
|
80 | + $this->executeStoreDocumentTermCount($values, $sql, $db); |
|
81 | + } |
|
82 | + } |
|
83 | 83 | |
84 | - /** |
|
85 | - * @param $values |
|
86 | - * @param $sql |
|
87 | - * @param $db |
|
88 | - * |
|
89 | - * @throws \Exception |
|
90 | - */ |
|
91 | - protected function executeStoreDocumentTermCount($values, $sql, $db) |
|
92 | - { |
|
93 | - $sql .= '(' . implode('),' . PHP_EOL . '(', $values) . ');'; |
|
94 | - $stmt = $db->prepare($sql); |
|
95 | - if ($stmt === false || !$stmt->execute()) { |
|
96 | - $errorInfo = $db->errorInfo(); |
|
97 | - $errorMsg = $errorInfo[2]; |
|
98 | - throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
99 | - } |
|
100 | - } |
|
84 | + /** |
|
85 | + * @param $values |
|
86 | + * @param $sql |
|
87 | + * @param $db |
|
88 | + * |
|
89 | + * @throws \Exception |
|
90 | + */ |
|
91 | + protected function executeStoreDocumentTermCount($values, $sql, $db) |
|
92 | + { |
|
93 | + $sql .= '(' . implode('),' . PHP_EOL . '(', $values) . ');'; |
|
94 | + $stmt = $db->prepare($sql); |
|
95 | + if ($stmt === false || !$stmt->execute()) { |
|
96 | + $errorInfo = $db->errorInfo(); |
|
97 | + $errorMsg = $errorInfo[2]; |
|
98 | + throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
99 | + } |
|
100 | + } |
|
101 | 101 | |
102 | - /** |
|
103 | - * @param $document |
|
104 | - */ |
|
105 | - private function createTermCountForDocument($document) |
|
106 | - { |
|
107 | - $tokenizer = new DocumentTokenizer($document, $this->storage); |
|
108 | - $tokens = $tokenizer->getTokens(); |
|
109 | - $documentTermCount = $this->applyFilters($tokens); |
|
110 | - $this->storeDocumentTermCount($document, $documentTermCount); |
|
111 | - } |
|
102 | + /** |
|
103 | + * @param $document |
|
104 | + */ |
|
105 | + private function createTermCountForDocument($document) |
|
106 | + { |
|
107 | + $tokenizer = new DocumentTokenizer($document, $this->storage); |
|
108 | + $tokens = $tokenizer->getTokens(); |
|
109 | + $documentTermCount = $this->applyFilters($tokens); |
|
110 | + $this->storeDocumentTermCount($document, $documentTermCount); |
|
111 | + } |
|
112 | 112 | |
113 | - /** |
|
114 | - * @param $documents |
|
115 | - */ |
|
116 | - private function iterateDocumentsAndCreateTermCount($documents) |
|
117 | - { |
|
118 | - foreach ($documents as $document) { |
|
119 | - if ($document->type === 'folder') { |
|
120 | - $this->iterateDocumentsAndCreateTermCount($document->content); |
|
121 | - } else { |
|
122 | - $this->createTermCountForDocument($document); |
|
123 | - } |
|
124 | - } |
|
125 | - } |
|
113 | + /** |
|
114 | + * @param $documents |
|
115 | + */ |
|
116 | + private function iterateDocumentsAndCreateTermCount($documents) |
|
117 | + { |
|
118 | + foreach ($documents as $document) { |
|
119 | + if ($document->type === 'folder') { |
|
120 | + $this->iterateDocumentsAndCreateTermCount($document->content); |
|
121 | + } else { |
|
122 | + $this->createTermCountForDocument($document); |
|
123 | + } |
|
124 | + } |
|
125 | + } |
|
126 | 126 | } |
127 | 127 | \ No newline at end of file |
@@ -17,72 +17,72 @@ |
||
17 | 17 | */ |
18 | 18 | class TermFieldLengthNorm |
19 | 19 | { |
20 | - /** |
|
21 | - * @var \PDO |
|
22 | - */ |
|
23 | - protected $dbHandle; |
|
20 | + /** |
|
21 | + * @var \PDO |
|
22 | + */ |
|
23 | + protected $dbHandle; |
|
24 | 24 | |
25 | - /** |
|
26 | - * TermFieldLengthNorm constructor. |
|
27 | - * |
|
28 | - * @param \PDO $dbHandle |
|
29 | - */ |
|
30 | - public function __construct($dbHandle) |
|
31 | - { |
|
32 | - $this->dbHandle = $dbHandle; |
|
33 | - } |
|
25 | + /** |
|
26 | + * TermFieldLengthNorm constructor. |
|
27 | + * |
|
28 | + * @param \PDO $dbHandle |
|
29 | + */ |
|
30 | + public function __construct($dbHandle) |
|
31 | + { |
|
32 | + $this->dbHandle = $dbHandle; |
|
33 | + } |
|
34 | 34 | |
35 | - public function execute() |
|
36 | - { |
|
37 | - $db = $this->dbHandle; |
|
38 | - $db->sqliteCreateFunction('sqrt', 'sqrt', 1); |
|
39 | - $sql = ' |
|
35 | + public function execute() |
|
36 | + { |
|
37 | + $db = $this->dbHandle; |
|
38 | + $db->sqliteCreateFunction('sqrt', 'sqrt', 1); |
|
39 | + $sql = ' |
|
40 | 40 | SELECT documentPath, field, COUNT(`count`) as termCount |
41 | 41 | FROM term_count |
42 | 42 | GROUP BY documentPath, field |
43 | 43 | '; |
44 | - $stmt = $db->prepare($sql); |
|
45 | - if ($stmt === false) { |
|
46 | - $errorInfo = $db->errorInfo(); |
|
47 | - $errorMsg = $errorInfo[2]; |
|
48 | - throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
49 | - } |
|
50 | - if (($stmt->execute()) === false) { |
|
51 | - $errorInfo = $db->errorInfo(); |
|
52 | - $errorMsg = $errorInfo[2]; |
|
53 | - throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
54 | - } |
|
55 | - $uniqueFieldsPerDocument = $stmt->fetchAll(\PDO::FETCH_OBJ); |
|
56 | - $values = array(); |
|
57 | - $i = 0; |
|
58 | - foreach ($uniqueFieldsPerDocument as $fieldRow) { |
|
59 | - $values[] = 'UPDATE term_frequency SET termNorm = 1/sqrt(' . intval($fieldRow->termCount) . ') WHERE documentPath = ' . $db->quote($fieldRow->documentPath) . ' AND field = ' . $db->quote($fieldRow->field) . ';'; |
|
60 | - $i += 1; |
|
61 | - if ($i >= Indexer::SQLITE_MAX_COMPOUND_SELECT) { |
|
62 | - $this->executeUpdateTermNorm($values, $db); |
|
63 | - $values = array(); |
|
64 | - $i = 0; |
|
65 | - } |
|
66 | - } |
|
67 | - if (count($values) != 0) { |
|
68 | - $this->executeUpdateTermNorm($values, $db); |
|
69 | - } |
|
70 | - } |
|
44 | + $stmt = $db->prepare($sql); |
|
45 | + if ($stmt === false) { |
|
46 | + $errorInfo = $db->errorInfo(); |
|
47 | + $errorMsg = $errorInfo[2]; |
|
48 | + throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
49 | + } |
|
50 | + if (($stmt->execute()) === false) { |
|
51 | + $errorInfo = $db->errorInfo(); |
|
52 | + $errorMsg = $errorInfo[2]; |
|
53 | + throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
54 | + } |
|
55 | + $uniqueFieldsPerDocument = $stmt->fetchAll(\PDO::FETCH_OBJ); |
|
56 | + $values = array(); |
|
57 | + $i = 0; |
|
58 | + foreach ($uniqueFieldsPerDocument as $fieldRow) { |
|
59 | + $values[] = 'UPDATE term_frequency SET termNorm = 1/sqrt(' . intval($fieldRow->termCount) . ') WHERE documentPath = ' . $db->quote($fieldRow->documentPath) . ' AND field = ' . $db->quote($fieldRow->field) . ';'; |
|
60 | + $i += 1; |
|
61 | + if ($i >= Indexer::SQLITE_MAX_COMPOUND_SELECT) { |
|
62 | + $this->executeUpdateTermNorm($values, $db); |
|
63 | + $values = array(); |
|
64 | + $i = 0; |
|
65 | + } |
|
66 | + } |
|
67 | + if (count($values) != 0) { |
|
68 | + $this->executeUpdateTermNorm($values, $db); |
|
69 | + } |
|
70 | + } |
|
71 | 71 | |
72 | - /** |
|
73 | - * @param array $values |
|
74 | - * @param \PDO $db |
|
75 | - * @throws \Exception |
|
76 | - */ |
|
77 | - private function executeUpdateTermNorm($values, $db) |
|
78 | - { |
|
79 | - $sql = 'BEGIN TRANSACTION;' . PHP_EOL; |
|
80 | - $sql .= implode(PHP_EOL, $values) . PHP_EOL; |
|
81 | - $sql .= 'COMMIT;'; |
|
82 | - if (($db->exec($sql)) === false) { |
|
83 | - $errorInfo = $db->errorInfo(); |
|
84 | - $errorMsg = $errorInfo[2]; |
|
85 | - throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
86 | - } |
|
87 | - } |
|
72 | + /** |
|
73 | + * @param array $values |
|
74 | + * @param \PDO $db |
|
75 | + * @throws \Exception |
|
76 | + */ |
|
77 | + private function executeUpdateTermNorm($values, $db) |
|
78 | + { |
|
79 | + $sql = 'BEGIN TRANSACTION;' . PHP_EOL; |
|
80 | + $sql .= implode(PHP_EOL, $values) . PHP_EOL; |
|
81 | + $sql .= 'COMMIT;'; |
|
82 | + if (($db->exec($sql)) === false) { |
|
83 | + $errorInfo = $db->errorInfo(); |
|
84 | + $errorMsg = $errorInfo[2]; |
|
85 | + throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
86 | + } |
|
87 | + } |
|
88 | 88 | } |
89 | 89 | \ No newline at end of file |
@@ -26,117 +26,117 @@ discard block |
||
26 | 26 | */ |
27 | 27 | class Search extends SearchDbConnected |
28 | 28 | { |
29 | - /** |
|
30 | - * @var Tokenizer |
|
31 | - */ |
|
32 | - protected $tokenizer; |
|
33 | - protected $results = array(); |
|
29 | + /** |
|
30 | + * @var Tokenizer |
|
31 | + */ |
|
32 | + protected $tokenizer; |
|
33 | + protected $results = array(); |
|
34 | 34 | |
35 | - /** |
|
36 | - * An array containing classes implementing \CloudControl\Cms\search\Filters |
|
37 | - * These will be applied to all tokenizers |
|
38 | - * @var array |
|
39 | - */ |
|
40 | - protected $filters = array( |
|
41 | - 'DutchStopWords', |
|
42 | - 'EnglishStopWords' |
|
43 | - ); |
|
35 | + /** |
|
36 | + * An array containing classes implementing \CloudControl\Cms\search\Filters |
|
37 | + * These will be applied to all tokenizers |
|
38 | + * @var array |
|
39 | + */ |
|
40 | + protected $filters = array( |
|
41 | + 'DutchStopWords', |
|
42 | + 'EnglishStopWords' |
|
43 | + ); |
|
44 | 44 | |
45 | - /** |
|
46 | - * Returns an array of SeachResult and / or SearchSuggestion objects, |
|
47 | - * based on the tokens in the Tokenizer |
|
48 | - * @param Tokenizer $tokenizer |
|
49 | - * |
|
50 | - * @return array |
|
51 | - */ |
|
52 | - public function getDocumentsForTokenizer(Tokenizer $tokenizer) |
|
53 | - { |
|
54 | - $this->tokenizer = $tokenizer; |
|
55 | - $resultsPerTokens = $this->queryTokens(); |
|
45 | + /** |
|
46 | + * Returns an array of SeachResult and / or SearchSuggestion objects, |
|
47 | + * based on the tokens in the Tokenizer |
|
48 | + * @param Tokenizer $tokenizer |
|
49 | + * |
|
50 | + * @return array |
|
51 | + */ |
|
52 | + public function getDocumentsForTokenizer(Tokenizer $tokenizer) |
|
53 | + { |
|
54 | + $this->tokenizer = $tokenizer; |
|
55 | + $resultsPerTokens = $this->queryTokens(); |
|
56 | 56 | |
57 | - $flatResults = $this->flattenResults($resultsPerTokens); |
|
58 | - $flatResults = $this->applyQueryCoordination($flatResults); |
|
59 | - usort($flatResults, array($this, "scoreCompare")); |
|
57 | + $flatResults = $this->flattenResults($resultsPerTokens); |
|
58 | + $flatResults = $this->applyQueryCoordination($flatResults); |
|
59 | + usort($flatResults, array($this, "scoreCompare")); |
|
60 | 60 | |
61 | - $flatResults = array_merge($this->getSearchSuggestions(), $flatResults); |
|
61 | + $flatResults = array_merge($this->getSearchSuggestions(), $flatResults); |
|
62 | 62 | |
63 | - return $flatResults; |
|
64 | - } |
|
63 | + return $flatResults; |
|
64 | + } |
|
65 | 65 | |
66 | - /** |
|
67 | - * Returns the amount of distinct documents |
|
68 | - * that are currently in the search index. |
|
69 | - * @return int |
|
70 | - * @throws \Exception |
|
71 | - */ |
|
72 | - public function getIndexedDocuments() |
|
73 | - { |
|
74 | - $db = $this->getSearchDbHandle(); |
|
75 | - $sql = ' |
|
66 | + /** |
|
67 | + * Returns the amount of distinct documents |
|
68 | + * that are currently in the search index. |
|
69 | + * @return int |
|
70 | + * @throws \Exception |
|
71 | + */ |
|
72 | + public function getIndexedDocuments() |
|
73 | + { |
|
74 | + $db = $this->getSearchDbHandle(); |
|
75 | + $sql = ' |
|
76 | 76 | SELECT count(DISTINCT documentPath) as indexedDocuments |
77 | 77 | FROM term_frequency |
78 | 78 | '; |
79 | - if (!$stmt = $db->query($sql)) { |
|
80 | - $errorInfo = $db->errorInfo(); |
|
81 | - $errorMsg = $errorInfo[2]; |
|
82 | - throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
83 | - } |
|
84 | - $result = $stmt->fetch(\PDO::FETCH_COLUMN); |
|
85 | - if (false === $result) { |
|
86 | - $errorInfo = $db->errorInfo(); |
|
87 | - $errorMsg = $errorInfo[2]; |
|
88 | - throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
89 | - } |
|
90 | - return intval($result); |
|
91 | - } |
|
79 | + if (!$stmt = $db->query($sql)) { |
|
80 | + $errorInfo = $db->errorInfo(); |
|
81 | + $errorMsg = $errorInfo[2]; |
|
82 | + throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
83 | + } |
|
84 | + $result = $stmt->fetch(\PDO::FETCH_COLUMN); |
|
85 | + if (false === $result) { |
|
86 | + $errorInfo = $db->errorInfo(); |
|
87 | + $errorMsg = $errorInfo[2]; |
|
88 | + throw new \Exception('SQLite Exception: ' . $errorMsg . ' in SQL: <br /><pre>' . $sql . '</pre>'); |
|
89 | + } |
|
90 | + return intval($result); |
|
91 | + } |
|
92 | 92 | |
93 | - /** |
|
94 | - * Queries each token present in the Tokenizer |
|
95 | - * and returns SearchResult objects for the found |
|
96 | - * documents |
|
97 | - * @return array |
|
98 | - */ |
|
99 | - private function queryTokens() |
|
100 | - { |
|
101 | - $tokens = $this->getTokens(); |
|
93 | + /** |
|
94 | + * Queries each token present in the Tokenizer |
|
95 | + * and returns SearchResult objects for the found |
|
96 | + * documents |
|
97 | + * @return array |
|
98 | + */ |
|
99 | + private function queryTokens() |
|
100 | + { |
|
101 | + $tokens = $this->getTokens(); |
|
102 | 102 | |
103 | - $queryNorm = $this->getQueryNorm($tokens); |
|
104 | - $results = array(); |
|
105 | - foreach ($tokens as $token) { |
|
106 | - $results[$token] = $this->getResultsForToken($token, $queryNorm); |
|
107 | - } |
|
108 | - return $results; |
|
109 | - } |
|
103 | + $queryNorm = $this->getQueryNorm($tokens); |
|
104 | + $results = array(); |
|
105 | + foreach ($tokens as $token) { |
|
106 | + $results[$token] = $this->getResultsForToken($token, $queryNorm); |
|
107 | + } |
|
108 | + return $results; |
|
109 | + } |
|
110 | 110 | |
111 | - /** |
|
112 | - * Applies the Filter objects in the the filter array to the |
|
113 | - * tokens in the Tokenizer |
|
114 | - * @param $tokens |
|
115 | - * |
|
116 | - * @return mixed |
|
117 | - */ |
|
118 | - protected function applyFilters($tokens) |
|
119 | - { |
|
120 | - foreach ($this->filters as $filterName) { |
|
121 | - $filterClassName = '\CloudControl\Cms\search\filters\\' . $filterName; |
|
122 | - $filter = new $filterClassName($tokens); |
|
123 | - $tokens = $filter->getFilterResults(); |
|
124 | - } |
|
125 | - return $tokens; |
|
126 | - } |
|
111 | + /** |
|
112 | + * Applies the Filter objects in the the filter array to the |
|
113 | + * tokens in the Tokenizer |
|
114 | + * @param $tokens |
|
115 | + * |
|
116 | + * @return mixed |
|
117 | + */ |
|
118 | + protected function applyFilters($tokens) |
|
119 | + { |
|
120 | + foreach ($this->filters as $filterName) { |
|
121 | + $filterClassName = '\CloudControl\Cms\search\filters\\' . $filterName; |
|
122 | + $filter = new $filterClassName($tokens); |
|
123 | + $tokens = $filter->getFilterResults(); |
|
124 | + } |
|
125 | + return $tokens; |
|
126 | + } |
|
127 | 127 | |
128 | - /** |
|
129 | - * Queries the search index for a given token |
|
130 | - * and the query norm. |
|
131 | - * @param $token |
|
132 | - * @param $queryNorm |
|
133 | - * |
|
134 | - * @return array |
|
135 | - * @throws \Exception |
|
136 | - */ |
|
137 | - public function getResultsForToken($token, $queryNorm) { |
|
138 | - $db = $this->getSearchDbHandle(); |
|
139 | - $sql = ' |
|
128 | + /** |
|
129 | + * Queries the search index for a given token |
|
130 | + * and the query norm. |
|
131 | + * @param $token |
|
132 | + * @param $queryNorm |
|
133 | + * |
|
134 | + * @return array |
|
135 | + * @throws \Exception |
|
136 | + */ |
|
137 | + public function getResultsForToken($token, $queryNorm) { |
|
138 | + $db = $this->getSearchDbHandle(); |
|
139 | + $sql = ' |
|
140 | 140 | SELECT (:queryNorm * |
141 | 141 | (SUM(term_frequency.frequency) --TF |
142 | 142 | * inverse_document_frequency.inverseDocumentFrequency -- IDF |
@@ -154,113 +154,113 @@ discard block |
||
154 | 154 | GROUP BY term_frequency.documentPath, term_frequency.term |
155 | 155 | ORDER BY score DESC |
156 | 156 | '; |
157 | - if(!$stmt = $db->prepare($sql)) { |
|
158 | - throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
|
159 | - } |
|
160 | - $stmt->bindValue(':query', $token); |
|
161 | - $stmt->bindValue(':queryNorm', $queryNorm); |
|
162 | - if (!$stmt->execute()) { |
|
163 | - throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
|
164 | - } |
|
165 | - return $stmt->fetchAll(\PDO::FETCH_CLASS, '\CloudControl\Cms\search\results\SearchResult'); |
|
166 | - } |
|
157 | + if(!$stmt = $db->prepare($sql)) { |
|
158 | + throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
|
159 | + } |
|
160 | + $stmt->bindValue(':query', $token); |
|
161 | + $stmt->bindValue(':queryNorm', $queryNorm); |
|
162 | + if (!$stmt->execute()) { |
|
163 | + throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
|
164 | + } |
|
165 | + return $stmt->fetchAll(\PDO::FETCH_CLASS, '\CloudControl\Cms\search\results\SearchResult'); |
|
166 | + } |
|
167 | 167 | |
168 | - /** |
|
169 | - * @param $resultsPerTokens |
|
170 | - * |
|
171 | - * @return array |
|
172 | - */ |
|
173 | - private function flattenResults($resultsPerTokens) |
|
174 | - { |
|
175 | - $finalResults = array(); |
|
176 | - foreach ($resultsPerTokens as $token => $resultPerToken) { |
|
177 | - foreach ($resultPerToken as $result) { |
|
178 | - if (isset($finalResults[$result->documentPath])) { |
|
179 | - $finalResults[$result->documentPath]->score += $result->score; |
|
180 | - $finalResults[$result->documentPath]->matchingTokens[] = $token; |
|
181 | - } else { |
|
182 | - $resultObj = new SearchResult(); |
|
183 | - $resultObj->documentPath = $result->documentPath; |
|
184 | - $resultObj->matchingTokens = array($token); |
|
185 | - $resultObj->score = floatval($result->score); |
|
186 | - $resultObj->setStorage($this->storage); |
|
187 | - $finalResults[$result->documentPath] = $resultObj; |
|
188 | - } |
|
189 | - } |
|
190 | - } |
|
191 | - return $finalResults; |
|
192 | - } |
|
168 | + /** |
|
169 | + * @param $resultsPerTokens |
|
170 | + * |
|
171 | + * @return array |
|
172 | + */ |
|
173 | + private function flattenResults($resultsPerTokens) |
|
174 | + { |
|
175 | + $finalResults = array(); |
|
176 | + foreach ($resultsPerTokens as $token => $resultPerToken) { |
|
177 | + foreach ($resultPerToken as $result) { |
|
178 | + if (isset($finalResults[$result->documentPath])) { |
|
179 | + $finalResults[$result->documentPath]->score += $result->score; |
|
180 | + $finalResults[$result->documentPath]->matchingTokens[] = $token; |
|
181 | + } else { |
|
182 | + $resultObj = new SearchResult(); |
|
183 | + $resultObj->documentPath = $result->documentPath; |
|
184 | + $resultObj->matchingTokens = array($token); |
|
185 | + $resultObj->score = floatval($result->score); |
|
186 | + $resultObj->setStorage($this->storage); |
|
187 | + $finalResults[$result->documentPath] = $resultObj; |
|
188 | + } |
|
189 | + } |
|
190 | + } |
|
191 | + return $finalResults; |
|
192 | + } |
|
193 | 193 | |
194 | - private function scoreCompare($a, $b) { |
|
195 | - if ($a->score == $b->score) { |
|
196 | - return 0; |
|
197 | - } |
|
198 | - return ($a->score > $b->score) ? -1 : 1; |
|
199 | - } |
|
194 | + private function scoreCompare($a, $b) { |
|
195 | + if ($a->score == $b->score) { |
|
196 | + return 0; |
|
197 | + } |
|
198 | + return ($a->score > $b->score) ? -1 : 1; |
|
199 | + } |
|
200 | 200 | |
201 | - /** |
|
202 | - * Calculates the query norm for all tokens in the Tokenizer |
|
203 | - * @param $tokens |
|
204 | - * |
|
205 | - * @return int |
|
206 | - * @throws \Exception |
|
207 | - */ |
|
208 | - private function getQueryNorm($tokens) |
|
209 | - { |
|
210 | - $db = $this->getSearchDbHandle(); |
|
211 | - $db->sqliteCreateFunction('sqrt', 'sqrt', 1); |
|
212 | - foreach ($tokens as $key => $token) { |
|
213 | - $tokens[$key] = $db->quote($token); |
|
214 | - } |
|
215 | - $terms = implode(',', $tokens); |
|
216 | - $sql = ' |
|
201 | + /** |
|
202 | + * Calculates the query norm for all tokens in the Tokenizer |
|
203 | + * @param $tokens |
|
204 | + * |
|
205 | + * @return int |
|
206 | + * @throws \Exception |
|
207 | + */ |
|
208 | + private function getQueryNorm($tokens) |
|
209 | + { |
|
210 | + $db = $this->getSearchDbHandle(); |
|
211 | + $db->sqliteCreateFunction('sqrt', 'sqrt', 1); |
|
212 | + foreach ($tokens as $key => $token) { |
|
213 | + $tokens[$key] = $db->quote($token); |
|
214 | + } |
|
215 | + $terms = implode(',', $tokens); |
|
216 | + $sql = ' |
|
217 | 217 | SELECT (1 / sqrt(SUM(inverseDocumentFrequency))) as queryNorm |
218 | 218 | FROM inverse_document_frequency |
219 | 219 | WHERE term IN (' . $terms . ') |
220 | 220 | '; |
221 | - if(!$stmt = $db->prepare($sql)) { |
|
222 | - throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
|
223 | - } |
|
224 | - if (!$stmt->execute()) { |
|
225 | - throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
|
226 | - } |
|
227 | - $result = $stmt->fetch(\PDO::FETCH_OBJ); |
|
228 | - return $result->queryNorm == null ? 1 : $result->queryNorm; |
|
229 | - } |
|
221 | + if(!$stmt = $db->prepare($sql)) { |
|
222 | + throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
|
223 | + } |
|
224 | + if (!$stmt->execute()) { |
|
225 | + throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
|
226 | + } |
|
227 | + $result = $stmt->fetch(\PDO::FETCH_OBJ); |
|
228 | + return $result->queryNorm == null ? 1 : $result->queryNorm; |
|
229 | + } |
|
230 | 230 | |
231 | - /** |
|
232 | - * Applies query coordination to all results |
|
233 | - * @param $flatResults |
|
234 | - * |
|
235 | - * @return mixed |
|
236 | - */ |
|
237 | - private function applyQueryCoordination($flatResults) |
|
238 | - { |
|
239 | - $tokenVector = $this->tokenizer->getTokenVector(); |
|
240 | - $tokens = array_keys($tokenVector); |
|
241 | - $tokenCount = count($tokens); |
|
242 | - foreach ($flatResults as $key => $result) { |
|
243 | - $matchCount = count($result->matchingTokens); |
|
244 | - $result->score = ($matchCount / $tokenCount) * $result->score; |
|
245 | - $flatResults[$key] = $result; |
|
246 | - } |
|
247 | - return $flatResults; |
|
248 | - } |
|
231 | + /** |
|
232 | + * Applies query coordination to all results |
|
233 | + * @param $flatResults |
|
234 | + * |
|
235 | + * @return mixed |
|
236 | + */ |
|
237 | + private function applyQueryCoordination($flatResults) |
|
238 | + { |
|
239 | + $tokenVector = $this->tokenizer->getTokenVector(); |
|
240 | + $tokens = array_keys($tokenVector); |
|
241 | + $tokenCount = count($tokens); |
|
242 | + foreach ($flatResults as $key => $result) { |
|
243 | + $matchCount = count($result->matchingTokens); |
|
244 | + $result->score = ($matchCount / $tokenCount) * $result->score; |
|
245 | + $flatResults[$key] = $result; |
|
246 | + } |
|
247 | + return $flatResults; |
|
248 | + } |
|
249 | 249 | |
250 | - /** |
|
251 | - * Uses the levenshtein algorithm to determine the term that is |
|
252 | - * closest to the token that was input for the search |
|
253 | - * @return array |
|
254 | - * @throws \Exception |
|
255 | - */ |
|
256 | - private function getSearchSuggestions() |
|
257 | - { |
|
258 | - $tokens = $this->getTokens(); |
|
259 | - $allResults = array(); |
|
260 | - foreach ($tokens as $token) { |
|
261 | - $db = $this->getSearchDbHandle(); |
|
262 | - $db->sqliteCreateFunction('levenshtein', 'levenshtein', 2); |
|
263 | - $sql = ' |
|
250 | + /** |
|
251 | + * Uses the levenshtein algorithm to determine the term that is |
|
252 | + * closest to the token that was input for the search |
|
253 | + * @return array |
|
254 | + * @throws \Exception |
|
255 | + */ |
|
256 | + private function getSearchSuggestions() |
|
257 | + { |
|
258 | + $tokens = $this->getTokens(); |
|
259 | + $allResults = array(); |
|
260 | + foreach ($tokens as $token) { |
|
261 | + $db = $this->getSearchDbHandle(); |
|
262 | + $db->sqliteCreateFunction('levenshtein', 'levenshtein', 2); |
|
263 | + $sql = ' |
|
264 | 264 | SELECT * |
265 | 265 | FROM ( |
266 | 266 | SELECT :token as original, term, levenshtein(term, :token) as editDistance |
@@ -270,35 +270,35 @@ discard block |
||
270 | 270 | ) |
271 | 271 | WHERE editDistance > 0 |
272 | 272 | '; |
273 | - $stmt = $db->prepare($sql); |
|
274 | - if ($stmt === false) { |
|
275 | - throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
|
276 | - } |
|
277 | - $stmt->bindValue(':token', $token); |
|
278 | - if (($stmt === false) | (!$stmt->execute())) { |
|
279 | - throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
|
280 | - } |
|
281 | - $result = $stmt->fetchAll(\PDO::FETCH_CLASS, '\CloudControl\Cms\search\results\SearchSuggestion'); |
|
282 | - $allResults = array_merge($result, $allResults); |
|
283 | - } |
|
284 | - return $allResults; |
|
285 | - } |
|
273 | + $stmt = $db->prepare($sql); |
|
274 | + if ($stmt === false) { |
|
275 | + throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
|
276 | + } |
|
277 | + $stmt->bindValue(':token', $token); |
|
278 | + if (($stmt === false) | (!$stmt->execute())) { |
|
279 | + throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
|
280 | + } |
|
281 | + $result = $stmt->fetchAll(\PDO::FETCH_CLASS, '\CloudControl\Cms\search\results\SearchSuggestion'); |
|
282 | + $allResults = array_merge($result, $allResults); |
|
283 | + } |
|
284 | + return $allResults; |
|
285 | + } |
|
286 | 286 | |
287 | - /** |
|
288 | - * Retrieves all tokens from the tokenizer |
|
289 | - * @return array |
|
290 | - */ |
|
291 | - private function getTokens() |
|
292 | - { |
|
293 | - $tokenVector = array( |
|
294 | - 'query' => array(), |
|
295 | - ); |
|
296 | - $tokenVector['query'] = $this->tokenizer->getTokenVector(); |
|
297 | - $tokens = $this->applyFilters($tokenVector); |
|
298 | - if (!empty($tokens)) { |
|
299 | - $tokens = array_keys($tokens['query']); |
|
300 | - } |
|
287 | + /** |
|
288 | + * Retrieves all tokens from the tokenizer |
|
289 | + * @return array |
|
290 | + */ |
|
291 | + private function getTokens() |
|
292 | + { |
|
293 | + $tokenVector = array( |
|
294 | + 'query' => array(), |
|
295 | + ); |
|
296 | + $tokenVector['query'] = $this->tokenizer->getTokenVector(); |
|
297 | + $tokens = $this->applyFilters($tokenVector); |
|
298 | + if (!empty($tokens)) { |
|
299 | + $tokens = array_keys($tokens['query']); |
|
300 | + } |
|
301 | 301 | |
302 | - return $tokens; |
|
303 | - } |
|
302 | + return $tokens; |
|
303 | + } |
|
304 | 304 | } |
305 | 305 | \ No newline at end of file |
@@ -154,7 +154,7 @@ discard block |
||
154 | 154 | GROUP BY term_frequency.documentPath, term_frequency.term |
155 | 155 | ORDER BY score DESC |
156 | 156 | '; |
157 | - if(!$stmt = $db->prepare($sql)) { |
|
157 | + if (!$stmt = $db->prepare($sql)) { |
|
158 | 158 | throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
159 | 159 | } |
160 | 160 | $stmt->bindValue(':query', $token); |
@@ -218,7 +218,7 @@ discard block |
||
218 | 218 | FROM inverse_document_frequency |
219 | 219 | WHERE term IN (' . $terms . ') |
220 | 220 | '; |
221 | - if(!$stmt = $db->prepare($sql)) { |
|
221 | + if (!$stmt = $db->prepare($sql)) { |
|
222 | 222 | throw new \Exception('SQLite exception: <pre>' . print_r($db->errorInfo(), true) . '</pre> for SQL:<pre>' . $sql . '</pre>'); |
223 | 223 | } |
224 | 224 | if (!$stmt->execute()) { |
@@ -13,47 +13,47 @@ |
||
13 | 13 | */ |
14 | 14 | class Tokenizer |
15 | 15 | { |
16 | - protected $inputString; |
|
17 | - protected $tokenVector = array(); |
|
18 | - |
|
19 | - /** |
|
20 | - * Tokenizer constructor. |
|
21 | - * |
|
22 | - * @param string $string Should preferably be parsed wit \CloudControl\Cms\search\CharacterFilter |
|
23 | - * @see \CloudControl\Cms\search\CharacterFilter |
|
24 | - */ |
|
25 | - public function __construct($string) |
|
26 | - { |
|
27 | - $this->inputString = $string; |
|
28 | - $this->tokenize(); |
|
29 | - } |
|
30 | - |
|
31 | - protected function tokenize() |
|
32 | - { |
|
33 | - $tokens = explode(' ', $this->inputString); |
|
34 | - foreach ($tokens as $token) { |
|
35 | - $this->addTokenToVector($token); |
|
36 | - } |
|
37 | - } |
|
38 | - |
|
39 | - protected function addTokenToVector($token) |
|
40 | - { |
|
41 | - if (!empty($token)) { |
|
42 | - if (isset($this->tokenVector[$token])) { |
|
43 | - $this->tokenVector[$token] += 1; |
|
44 | - } else { |
|
45 | - $this->tokenVector[$token] = 1; |
|
46 | - } |
|
47 | - } |
|
48 | - } |
|
49 | - |
|
50 | - /** |
|
51 | - * @return array |
|
52 | - */ |
|
53 | - public function getTokenVector() |
|
54 | - { |
|
55 | - return $this->tokenVector; |
|
56 | - } |
|
16 | + protected $inputString; |
|
17 | + protected $tokenVector = array(); |
|
18 | + |
|
19 | + /** |
|
20 | + * Tokenizer constructor. |
|
21 | + * |
|
22 | + * @param string $string Should preferably be parsed wit \CloudControl\Cms\search\CharacterFilter |
|
23 | + * @see \CloudControl\Cms\search\CharacterFilter |
|
24 | + */ |
|
25 | + public function __construct($string) |
|
26 | + { |
|
27 | + $this->inputString = $string; |
|
28 | + $this->tokenize(); |
|
29 | + } |
|
30 | + |
|
31 | + protected function tokenize() |
|
32 | + { |
|
33 | + $tokens = explode(' ', $this->inputString); |
|
34 | + foreach ($tokens as $token) { |
|
35 | + $this->addTokenToVector($token); |
|
36 | + } |
|
37 | + } |
|
38 | + |
|
39 | + protected function addTokenToVector($token) |
|
40 | + { |
|
41 | + if (!empty($token)) { |
|
42 | + if (isset($this->tokenVector[$token])) { |
|
43 | + $this->tokenVector[$token] += 1; |
|
44 | + } else { |
|
45 | + $this->tokenVector[$token] = 1; |
|
46 | + } |
|
47 | + } |
|
48 | + } |
|
49 | + |
|
50 | + /** |
|
51 | + * @return array |
|
52 | + */ |
|
53 | + public function getTokenVector() |
|
54 | + { |
|
55 | + return $this->tokenVector; |
|
56 | + } |
|
57 | 57 | |
58 | 58 | |
59 | 59 | } |
60 | 60 | \ No newline at end of file |
@@ -11,15 +11,15 @@ |
||
11 | 11 | |
12 | 12 | interface Filter |
13 | 13 | { |
14 | - /** |
|
15 | - * Filter constructor. |
|
16 | - * |
|
17 | - * @param array $tokens |
|
18 | - */ |
|
19 | - public function __construct($tokens); |
|
14 | + /** |
|
15 | + * Filter constructor. |
|
16 | + * |
|
17 | + * @param array $tokens |
|
18 | + */ |
|
19 | + public function __construct($tokens); |
|
20 | 20 | |
21 | - /** |
|
22 | - * @return array |
|
23 | - */ |
|
24 | - public function getFilterResults(); |
|
21 | + /** |
|
22 | + * @return array |
|
23 | + */ |
|
24 | + public function getFilterResults(); |
|
25 | 25 | } |
26 | 26 | \ No newline at end of file |
@@ -13,149 +13,149 @@ |
||
13 | 13 | |
14 | 14 | class DocumentTokenizer |
15 | 15 | { |
16 | - /** |
|
17 | - * @var Document |
|
18 | - */ |
|
19 | - protected $document; |
|
20 | - |
|
21 | - /** |
|
22 | - * @var array |
|
23 | - */ |
|
24 | - protected $tokenVector = array(); |
|
25 | - protected $storage; |
|
26 | - |
|
27 | - /** |
|
28 | - * Tokenizer constructor. |
|
29 | - * |
|
30 | - * @param \CloudControl\Cms\storage\Document $document |
|
31 | - * @param Storage $storage |
|
32 | - */ |
|
33 | - public function __construct(Document $document, Storage $storage) |
|
34 | - { |
|
35 | - $this->document = $document; |
|
36 | - $this->storage = $storage; |
|
37 | - $this->tokenize(); |
|
38 | - } |
|
39 | - |
|
40 | - /** |
|
41 | - * Execute tokenization of all document fields |
|
42 | - */ |
|
43 | - private function tokenize() |
|
44 | - { |
|
45 | - $this->tokenizeTitle(); |
|
46 | - $this->tokenizeFields(); |
|
47 | - $this->tokenizeBricks(); |
|
48 | - $this->tokenizeDynamicBricks(); |
|
49 | - $this->tokenVector = array_filter($this->tokenVector); |
|
50 | - arsort($this->tokenVector); |
|
51 | - } |
|
52 | - |
|
53 | - private function tokenizeTitle() |
|
54 | - { |
|
55 | - $filteredString = new CharacterFilter($this->document->title); |
|
56 | - $tokenizer = new Tokenizer($filteredString); |
|
57 | - $this->addTokenVectorToVector($tokenizer->getTokenVector(), 'title'); |
|
58 | - } |
|
59 | - |
|
60 | - private function tokenizeFields() |
|
61 | - { |
|
62 | - $fields = $this->document->fields; |
|
63 | - $documentDefinition = $this->storage->getDocumentTypes()->getDocumentTypeBySlug($this->document->documentTypeSlug); |
|
64 | - foreach ($fields as $fieldName => $field) { |
|
65 | - $fieldType = $this->getFieldType($fieldName, $documentDefinition); |
|
66 | - $this->tokenizeField($field, $fieldName, $fieldType); |
|
67 | - } |
|
68 | - } |
|
69 | - |
|
70 | - private function tokenizeField($field, $fieldName, $fieldType) |
|
71 | - { |
|
72 | - foreach ($field as $value) { |
|
73 | - // Only index fields that contain text |
|
74 | - if (in_array($fieldType, array('String', 'Text', 'Rich Text'))) { |
|
75 | - $filteredString = new CharacterFilter($value); |
|
76 | - $tokenizer = new Tokenizer($filteredString); |
|
77 | - $this->addTokenVectorToVector($tokenizer->getTokenVector(), $fieldName); |
|
78 | - } |
|
79 | - } |
|
80 | - } |
|
81 | - |
|
82 | - private function tokenizeBricks() |
|
83 | - { |
|
84 | - $bricks = $this->document->bricks; |
|
85 | - foreach ($bricks as $brickSlug => $bricks) { |
|
86 | - foreach ($bricks as $brick) { |
|
87 | - $this->tokenizeBrick($brick, $brickSlug); |
|
88 | - } |
|
89 | - } |
|
90 | - } |
|
91 | - |
|
92 | - private function tokenizeBrick($brick, $brickSlug) |
|
93 | - { |
|
94 | - $fields = $brick->fields; |
|
95 | - $brickDefinition = $this->storage->getBricks()->getBrickBySlug($brick->type); |
|
96 | - foreach ($fields as $fieldName => $field) { |
|
97 | - $fieldType = $this->getFieldType($fieldName, $brickDefinition); |
|
98 | - $this->tokenizeField($field, $brickSlug . '__' . $fieldName, $fieldType); |
|
99 | - } |
|
100 | - } |
|
101 | - |
|
102 | - private function tokenizeDynamicBricks() |
|
103 | - { |
|
104 | - $dynamicBricks = $this->document->dynamicBricks; |
|
105 | - foreach ($dynamicBricks as $key => $brick) { |
|
106 | - $this->tokenizeBrick($brick, 'dynamicBricks__' . $brick->type . $key); |
|
107 | - } |
|
108 | - } |
|
109 | - |
|
110 | - public function getTokens() |
|
111 | - { |
|
112 | - return $this->tokenVector; |
|
113 | - } |
|
114 | - |
|
115 | - /** |
|
116 | - * Add a token to the existing tokenvector |
|
117 | - * @param $token |
|
118 | - * @param string $field |
|
119 | - * @param int $count |
|
120 | - */ |
|
121 | - private function addTokenToVector($token, $field, $count = 1) |
|
122 | - { |
|
123 | - if (!empty($token)) { |
|
124 | - if (isset($this->tokenVector[$field][$token])) { |
|
125 | - $this->tokenVector[$field][$token] += $count; |
|
126 | - } else { |
|
127 | - $this->tokenVector[$field][$token] = $count; |
|
128 | - } |
|
129 | - } |
|
130 | - } |
|
131 | - |
|
132 | - /** |
|
133 | - * Add a complete token vector to the existing one. |
|
134 | - * @param $tokenVector |
|
135 | - * @param $field |
|
136 | - */ |
|
137 | - private function addTokenVectorToVector($tokenVector, $field) |
|
138 | - { |
|
139 | - foreach ($tokenVector as $token => $count) { |
|
140 | - $this->addTokenToVector($token, $field, $count); |
|
141 | - } |
|
142 | - } |
|
143 | - |
|
144 | - /** |
|
145 | - * Get the type for a field |
|
146 | - * @param $fieldName |
|
147 | - * @param $documentDefinition |
|
148 | - * @return mixed |
|
149 | - * @throws \Exception |
|
150 | - */ |
|
151 | - private function getFieldType($fieldName, $documentDefinition) |
|
152 | - { |
|
153 | - foreach ($documentDefinition->fields as $fieldTypeDefinition) { |
|
154 | - if ($fieldTypeDefinition->slug === $fieldName) { |
|
155 | - return $fieldTypeDefinition->type; |
|
156 | - } |
|
157 | - } |
|
158 | - |
|
159 | - throw new \Exception('Unknown field type for field' . $fieldName . ' in document ' . $this->document->path); |
|
160 | - } |
|
16 | + /** |
|
17 | + * @var Document |
|
18 | + */ |
|
19 | + protected $document; |
|
20 | + |
|
21 | + /** |
|
22 | + * @var array |
|
23 | + */ |
|
24 | + protected $tokenVector = array(); |
|
25 | + protected $storage; |
|
26 | + |
|
27 | + /** |
|
28 | + * Tokenizer constructor. |
|
29 | + * |
|
30 | + * @param \CloudControl\Cms\storage\Document $document |
|
31 | + * @param Storage $storage |
|
32 | + */ |
|
33 | + public function __construct(Document $document, Storage $storage) |
|
34 | + { |
|
35 | + $this->document = $document; |
|
36 | + $this->storage = $storage; |
|
37 | + $this->tokenize(); |
|
38 | + } |
|
39 | + |
|
40 | + /** |
|
41 | + * Execute tokenization of all document fields |
|
42 | + */ |
|
43 | + private function tokenize() |
|
44 | + { |
|
45 | + $this->tokenizeTitle(); |
|
46 | + $this->tokenizeFields(); |
|
47 | + $this->tokenizeBricks(); |
|
48 | + $this->tokenizeDynamicBricks(); |
|
49 | + $this->tokenVector = array_filter($this->tokenVector); |
|
50 | + arsort($this->tokenVector); |
|
51 | + } |
|
52 | + |
|
53 | + private function tokenizeTitle() |
|
54 | + { |
|
55 | + $filteredString = new CharacterFilter($this->document->title); |
|
56 | + $tokenizer = new Tokenizer($filteredString); |
|
57 | + $this->addTokenVectorToVector($tokenizer->getTokenVector(), 'title'); |
|
58 | + } |
|
59 | + |
|
60 | + private function tokenizeFields() |
|
61 | + { |
|
62 | + $fields = $this->document->fields; |
|
63 | + $documentDefinition = $this->storage->getDocumentTypes()->getDocumentTypeBySlug($this->document->documentTypeSlug); |
|
64 | + foreach ($fields as $fieldName => $field) { |
|
65 | + $fieldType = $this->getFieldType($fieldName, $documentDefinition); |
|
66 | + $this->tokenizeField($field, $fieldName, $fieldType); |
|
67 | + } |
|
68 | + } |
|
69 | + |
|
70 | + private function tokenizeField($field, $fieldName, $fieldType) |
|
71 | + { |
|
72 | + foreach ($field as $value) { |
|
73 | + // Only index fields that contain text |
|
74 | + if (in_array($fieldType, array('String', 'Text', 'Rich Text'))) { |
|
75 | + $filteredString = new CharacterFilter($value); |
|
76 | + $tokenizer = new Tokenizer($filteredString); |
|
77 | + $this->addTokenVectorToVector($tokenizer->getTokenVector(), $fieldName); |
|
78 | + } |
|
79 | + } |
|
80 | + } |
|
81 | + |
|
82 | + private function tokenizeBricks() |
|
83 | + { |
|
84 | + $bricks = $this->document->bricks; |
|
85 | + foreach ($bricks as $brickSlug => $bricks) { |
|
86 | + foreach ($bricks as $brick) { |
|
87 | + $this->tokenizeBrick($brick, $brickSlug); |
|
88 | + } |
|
89 | + } |
|
90 | + } |
|
91 | + |
|
92 | + private function tokenizeBrick($brick, $brickSlug) |
|
93 | + { |
|
94 | + $fields = $brick->fields; |
|
95 | + $brickDefinition = $this->storage->getBricks()->getBrickBySlug($brick->type); |
|
96 | + foreach ($fields as $fieldName => $field) { |
|
97 | + $fieldType = $this->getFieldType($fieldName, $brickDefinition); |
|
98 | + $this->tokenizeField($field, $brickSlug . '__' . $fieldName, $fieldType); |
|
99 | + } |
|
100 | + } |
|
101 | + |
|
102 | + private function tokenizeDynamicBricks() |
|
103 | + { |
|
104 | + $dynamicBricks = $this->document->dynamicBricks; |
|
105 | + foreach ($dynamicBricks as $key => $brick) { |
|
106 | + $this->tokenizeBrick($brick, 'dynamicBricks__' . $brick->type . $key); |
|
107 | + } |
|
108 | + } |
|
109 | + |
|
110 | + public function getTokens() |
|
111 | + { |
|
112 | + return $this->tokenVector; |
|
113 | + } |
|
114 | + |
|
115 | + /** |
|
116 | + * Add a token to the existing tokenvector |
|
117 | + * @param $token |
|
118 | + * @param string $field |
|
119 | + * @param int $count |
|
120 | + */ |
|
121 | + private function addTokenToVector($token, $field, $count = 1) |
|
122 | + { |
|
123 | + if (!empty($token)) { |
|
124 | + if (isset($this->tokenVector[$field][$token])) { |
|
125 | + $this->tokenVector[$field][$token] += $count; |
|
126 | + } else { |
|
127 | + $this->tokenVector[$field][$token] = $count; |
|
128 | + } |
|
129 | + } |
|
130 | + } |
|
131 | + |
|
132 | + /** |
|
133 | + * Add a complete token vector to the existing one. |
|
134 | + * @param $tokenVector |
|
135 | + * @param $field |
|
136 | + */ |
|
137 | + private function addTokenVectorToVector($tokenVector, $field) |
|
138 | + { |
|
139 | + foreach ($tokenVector as $token => $count) { |
|
140 | + $this->addTokenToVector($token, $field, $count); |
|
141 | + } |
|
142 | + } |
|
143 | + |
|
144 | + /** |
|
145 | + * Get the type for a field |
|
146 | + * @param $fieldName |
|
147 | + * @param $documentDefinition |
|
148 | + * @return mixed |
|
149 | + * @throws \Exception |
|
150 | + */ |
|
151 | + private function getFieldType($fieldName, $documentDefinition) |
|
152 | + { |
|
153 | + foreach ($documentDefinition->fields as $fieldTypeDefinition) { |
|
154 | + if ($fieldTypeDefinition->slug === $fieldName) { |
|
155 | + return $fieldTypeDefinition->type; |
|
156 | + } |
|
157 | + } |
|
158 | + |
|
159 | + throw new \Exception('Unknown field type for field' . $fieldName . ' in document ' . $this->document->path); |
|
160 | + } |
|
161 | 161 | } |
162 | 162 | \ No newline at end of file |
@@ -91,7 +91,7 @@ |
||
91 | 91 | |
92 | 92 | private function tokenizeBrick($brick, $brickSlug) |
93 | 93 | { |
94 | - $fields = $brick->fields; |
|
94 | + $fields = $brick->fields; |
|
95 | 95 | $brickDefinition = $this->storage->getBricks()->getBrickBySlug($brick->type); |
96 | 96 | foreach ($fields as $fieldName => $field) { |
97 | 97 | $fieldType = $this->getFieldType($fieldName, $brickDefinition); |
@@ -10,5 +10,5 @@ |
||
10 | 10 | |
11 | 11 | class EnglishStopWords extends StopWordsFilter |
12 | 12 | { |
13 | - protected $stopWords = array('a', 'about', 'above', 'after', 'again', 'against', 'all', 'am', 'an', 'and', 'any', 'are', 'arent', 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'cant', 'cannot', 'could', 'couldnt', 'did', 'didnt', 'do', 'does', 'doesnt', 'doing', 'dont', 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'hadnt', 'has', 'hasnt', 'have', 'havent', 'having', 'he', 'hed', 'hell', 'hes', 'her', 'here', 'heres', 'hers', 'herself', 'him', 'himself', 'his', 'how', 'hows', 'i', 'id', 'ill', 'im', 'ive', 'if', 'in', 'into', 'is', 'isnt', 'it', 'its', 'its', 'itself', 'lets', 'me', 'more', 'most', 'mustnt', 'my', 'myself', 'no', 'nor', 'not', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'ought', 'our', 'ours ourselves', 'out', 'over', 'own', 'same', 'shant', 'she', 'shed', 'shell', 'shes', 'should', 'shouldnt', 'so', 'some', 'such', 'than', 'that', 'thats', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'there', 'theres', 'these', 'they', 'theyd', 'theyll', 'theyre', 'theyve', 'this', 'those', 'through', 'to', 'too', 'under', 'until', 'up', 'very', 'was', 'wasnt', 'we', 'wed', 'well', 'were', 'weve', 'were', 'werent', 'what', 'whats', 'when', 'whens', 'where', 'wheres', 'which', 'while', 'who', 'whos', 'whom', 'why', 'whys', 'with', 'wont', 'would', 'wouldnt', 'you', 'youd', 'youll', 'youre', 'youve', 'your', 'yours', 'yourself', 'yourselves'); |
|
13 | + protected $stopWords = array('a', 'about', 'above', 'after', 'again', 'against', 'all', 'am', 'an', 'and', 'any', 'are', 'arent', 'as', 'at', 'be', 'because', 'been', 'before', 'being', 'below', 'between', 'both', 'but', 'by', 'cant', 'cannot', 'could', 'couldnt', 'did', 'didnt', 'do', 'does', 'doesnt', 'doing', 'dont', 'down', 'during', 'each', 'few', 'for', 'from', 'further', 'had', 'hadnt', 'has', 'hasnt', 'have', 'havent', 'having', 'he', 'hed', 'hell', 'hes', 'her', 'here', 'heres', 'hers', 'herself', 'him', 'himself', 'his', 'how', 'hows', 'i', 'id', 'ill', 'im', 'ive', 'if', 'in', 'into', 'is', 'isnt', 'it', 'its', 'its', 'itself', 'lets', 'me', 'more', 'most', 'mustnt', 'my', 'myself', 'no', 'nor', 'not', 'of', 'off', 'on', 'once', 'only', 'or', 'other', 'ought', 'our', 'ours ourselves', 'out', 'over', 'own', 'same', 'shant', 'she', 'shed', 'shell', 'shes', 'should', 'shouldnt', 'so', 'some', 'such', 'than', 'that', 'thats', 'the', 'their', 'theirs', 'them', 'themselves', 'then', 'there', 'theres', 'these', 'they', 'theyd', 'theyll', 'theyre', 'theyve', 'this', 'those', 'through', 'to', 'too', 'under', 'until', 'up', 'very', 'was', 'wasnt', 'we', 'wed', 'well', 'were', 'weve', 'were', 'werent', 'what', 'whats', 'when', 'whens', 'where', 'wheres', 'which', 'while', 'who', 'whos', 'whom', 'why', 'whys', 'with', 'wont', 'would', 'wouldnt', 'you', 'youd', 'youll', 'youre', 'youve', 'your', 'yours', 'yourself', 'yourselves'); |
|
14 | 14 | } |
15 | 15 | \ No newline at end of file |
@@ -22,87 +22,87 @@ discard block |
||
22 | 22 | */ |
23 | 23 | class Indexer extends SearchDbConnected |
24 | 24 | { |
25 | - const SQLITE_MAX_COMPOUND_SELECT = 100; |
|
26 | - protected $filters = array( |
|
27 | - 'DutchStopWords', |
|
28 | - 'EnglishStopWords' |
|
29 | - ); |
|
30 | - protected $storageDir; |
|
31 | - /** |
|
32 | - * @var double |
|
33 | - */ |
|
34 | - protected $loggingStart; |
|
35 | - /** |
|
36 | - * @var string |
|
37 | - */ |
|
38 | - protected $log; |
|
39 | - /** |
|
40 | - * @var double |
|
41 | - */ |
|
42 | - protected $lastLog; |
|
43 | - |
|
44 | - const SEARCH_TEMP_DB = 'search_tmp.db'; |
|
45 | - |
|
46 | - /** |
|
47 | - * Creates a new temporary search db, cleans it if it exists |
|
48 | - * then calculates and stores the search index in this db |
|
49 | - * and finally if indexing completed replaces the current search |
|
50 | - * db with the temporary one. Returns the log in string format. |
|
51 | - * @return string |
|
52 | - */ |
|
53 | - public function updateIndex() |
|
54 | - { |
|
55 | - $this->startLogging(); |
|
56 | - $this->addLog('Indexing start.'); |
|
57 | - $this->addLog('Clearing index.'); |
|
58 | - $this->resetIndex(); |
|
59 | - $this->addLog('Cleaning Published Deleted Documents'); |
|
60 | - $this->storage->getDocuments()->cleanPublishedDeletedDocuments(); |
|
61 | - $this->addLog('Retrieving documents to be indexed.'); |
|
62 | - $documents = $this->storage->getDocuments()->getPublishedDocumentsNoFolders(); |
|
63 | - $this->addLog('Start Document Term Count for ' . count($documents) . ' documents'); |
|
64 | - $this->createDocumentTermCount($documents); |
|
65 | - $this->addLog('Start Document Term Frequency.'); |
|
66 | - $this->createDocumentTermFrequency(); |
|
67 | - $this->addLog('Start Term Field Length Norm.'); |
|
68 | - $this->createTermFieldLengthNorm(); |
|
69 | - $this->addLog('Start Inverse Document Frequency.'); |
|
70 | - $this->createInverseDocumentFrequency(); |
|
71 | - $this->addLog('Replacing old index.'); |
|
72 | - $this->replaceOldIndex(); |
|
73 | - $this->addLog('Indexing complete.'); |
|
74 | - return $this->log; |
|
75 | - } |
|
76 | - |
|
77 | - /** |
|
78 | - * Count how often a term is used in a document |
|
79 | - * |
|
80 | - * @param $documents |
|
81 | - */ |
|
82 | - public function createDocumentTermCount($documents) |
|
83 | - { |
|
84 | - $termCount = new TermCount($this->getSearchDbHandle(), $documents, $this->filters, $this->storage); |
|
85 | - $termCount->execute(); |
|
86 | - } |
|
87 | - |
|
88 | - /** |
|
89 | - * Calculate the frequency index for a term with |
|
90 | - * a field |
|
91 | - */ |
|
92 | - public function createDocumentTermFrequency() |
|
93 | - { |
|
94 | - $termFrequency = new TermFrequency($this->getSearchDbHandle()); |
|
95 | - $termFrequency->execute(); |
|
96 | - } |
|
97 | - |
|
98 | - |
|
99 | - /** |
|
100 | - * Resets the entire index |
|
101 | - */ |
|
102 | - public function resetIndex() |
|
103 | - { |
|
104 | - $db = $this->getSearchDbHandle(); |
|
105 | - $sql = ' |
|
25 | + const SQLITE_MAX_COMPOUND_SELECT = 100; |
|
26 | + protected $filters = array( |
|
27 | + 'DutchStopWords', |
|
28 | + 'EnglishStopWords' |
|
29 | + ); |
|
30 | + protected $storageDir; |
|
31 | + /** |
|
32 | + * @var double |
|
33 | + */ |
|
34 | + protected $loggingStart; |
|
35 | + /** |
|
36 | + * @var string |
|
37 | + */ |
|
38 | + protected $log; |
|
39 | + /** |
|
40 | + * @var double |
|
41 | + */ |
|
42 | + protected $lastLog; |
|
43 | + |
|
44 | + const SEARCH_TEMP_DB = 'search_tmp.db'; |
|
45 | + |
|
46 | + /** |
|
47 | + * Creates a new temporary search db, cleans it if it exists |
|
48 | + * then calculates and stores the search index in this db |
|
49 | + * and finally if indexing completed replaces the current search |
|
50 | + * db with the temporary one. Returns the log in string format. |
|
51 | + * @return string |
|
52 | + */ |
|
53 | + public function updateIndex() |
|
54 | + { |
|
55 | + $this->startLogging(); |
|
56 | + $this->addLog('Indexing start.'); |
|
57 | + $this->addLog('Clearing index.'); |
|
58 | + $this->resetIndex(); |
|
59 | + $this->addLog('Cleaning Published Deleted Documents'); |
|
60 | + $this->storage->getDocuments()->cleanPublishedDeletedDocuments(); |
|
61 | + $this->addLog('Retrieving documents to be indexed.'); |
|
62 | + $documents = $this->storage->getDocuments()->getPublishedDocumentsNoFolders(); |
|
63 | + $this->addLog('Start Document Term Count for ' . count($documents) . ' documents'); |
|
64 | + $this->createDocumentTermCount($documents); |
|
65 | + $this->addLog('Start Document Term Frequency.'); |
|
66 | + $this->createDocumentTermFrequency(); |
|
67 | + $this->addLog('Start Term Field Length Norm.'); |
|
68 | + $this->createTermFieldLengthNorm(); |
|
69 | + $this->addLog('Start Inverse Document Frequency.'); |
|
70 | + $this->createInverseDocumentFrequency(); |
|
71 | + $this->addLog('Replacing old index.'); |
|
72 | + $this->replaceOldIndex(); |
|
73 | + $this->addLog('Indexing complete.'); |
|
74 | + return $this->log; |
|
75 | + } |
|
76 | + |
|
77 | + /** |
|
78 | + * Count how often a term is used in a document |
|
79 | + * |
|
80 | + * @param $documents |
|
81 | + */ |
|
82 | + public function createDocumentTermCount($documents) |
|
83 | + { |
|
84 | + $termCount = new TermCount($this->getSearchDbHandle(), $documents, $this->filters, $this->storage); |
|
85 | + $termCount->execute(); |
|
86 | + } |
|
87 | + |
|
88 | + /** |
|
89 | + * Calculate the frequency index for a term with |
|
90 | + * a field |
|
91 | + */ |
|
92 | + public function createDocumentTermFrequency() |
|
93 | + { |
|
94 | + $termFrequency = new TermFrequency($this->getSearchDbHandle()); |
|
95 | + $termFrequency->execute(); |
|
96 | + } |
|
97 | + |
|
98 | + |
|
99 | + /** |
|
100 | + * Resets the entire index |
|
101 | + */ |
|
102 | + public function resetIndex() |
|
103 | + { |
|
104 | + $db = $this->getSearchDbHandle(); |
|
105 | + $sql = ' |
|
106 | 106 | DELETE FROM term_count; |
107 | 107 | DELETE FROM term_frequency; |
108 | 108 | DELETE FROM inverse_document_frequency; |
@@ -110,82 +110,82 @@ discard block |
||
110 | 110 | UPDATE `sqlite_sequence` SET `seq`= 0 WHERE `name`=\'term_frequency\'; |
111 | 111 | UPDATE `sqlite_sequence` SET `seq`= 0 WHERE `name`=\'inverse_document_frequency\'; |
112 | 112 | '; |
113 | - $db->exec($sql); |
|
114 | - } |
|
115 | - |
|
116 | - /** |
|
117 | - * Calculates the inverse document frequency for each |
|
118 | - * term. This is a representation of how often a certain |
|
119 | - * term is used in comparison to all terms. |
|
120 | - */ |
|
121 | - public function createInverseDocumentFrequency() |
|
122 | - { |
|
123 | - $documentCount = $this->getTotalDocumentCount(); |
|
124 | - $inverseDocumentFrequency = new InverseDocumentFrequency($this->getSearchDbHandle(), $documentCount); |
|
125 | - $inverseDocumentFrequency->execute(); |
|
126 | - } |
|
127 | - |
|
128 | - /** |
|
129 | - * @return int|mixed |
|
130 | - */ |
|
131 | - private function getTotalDocumentCount() |
|
132 | - { |
|
133 | - return $this->storage->getDocuments()->getTotalDocumentCount(); |
|
134 | - } |
|
135 | - |
|
136 | - /** |
|
137 | - * Calculates the Term Field Length Norm. |
|
138 | - * This is an index determining how important a |
|
139 | - * term is, based on the total length of the field |
|
140 | - * it comes from. |
|
141 | - */ |
|
142 | - public function createTermFieldLengthNorm() |
|
143 | - { |
|
144 | - $termFieldLengthNorm = new TermFieldLengthNorm($this->getSearchDbHandle()); |
|
145 | - $termFieldLengthNorm->execute(); |
|
146 | - } |
|
147 | - |
|
148 | - /** |
|
149 | - * Stores the time the indexing started in memory |
|
150 | - */ |
|
151 | - private function startLogging() |
|
152 | - { |
|
153 | - $this->loggingStart = round(microtime(true) * 1000); |
|
154 | - $this->lastLog = $this->loggingStart; |
|
155 | - } |
|
156 | - |
|
157 | - /** |
|
158 | - * Adds a logline with the time since last log |
|
159 | - * @param $string |
|
160 | - */ |
|
161 | - private function addLog($string) |
|
162 | - { |
|
163 | - $currentTime = round(microtime(true) * 1000); |
|
164 | - $this->log .= date('d-m-Y H:i:s - ') . str_pad($string, 50, " ", STR_PAD_RIGHT) . "\t" . ($currentTime - $this->lastLog) . 'ms since last log. ' . "\t" . ($currentTime - $this->loggingStart) . 'ms since start.' . PHP_EOL; |
|
165 | - $this->lastLog = round(microtime(true) * 1000); |
|
166 | - } |
|
167 | - |
|
168 | - /** |
|
169 | - * Creates the SQLite \PDO object if it doesnt |
|
170 | - * exist and returns it. |
|
171 | - * @return \PDO |
|
172 | - */ |
|
173 | - protected function getSearchDbHandle() |
|
174 | - { |
|
175 | - if ($this->searchDbHandle === null) { |
|
176 | - $path = $this->storageDir . DIRECTORY_SEPARATOR; |
|
177 | - $this->searchDbHandle = new \PDO('sqlite:' . $path . self::SEARCH_TEMP_DB); |
|
178 | - } |
|
179 | - return $this->searchDbHandle; |
|
180 | - } |
|
181 | - |
|
182 | - /** |
|
183 | - * Replaces the old search index database with the new one. |
|
184 | - */ |
|
185 | - public function replaceOldIndex() |
|
186 | - { |
|
187 | - $this->searchDbHandle = null; |
|
188 | - $path = $this->storageDir . DIRECTORY_SEPARATOR; |
|
189 | - rename($path . self::SEARCH_TEMP_DB, $path . 'search.db'); |
|
190 | - } |
|
113 | + $db->exec($sql); |
|
114 | + } |
|
115 | + |
|
116 | + /** |
|
117 | + * Calculates the inverse document frequency for each |
|
118 | + * term. This is a representation of how often a certain |
|
119 | + * term is used in comparison to all terms. |
|
120 | + */ |
|
121 | + public function createInverseDocumentFrequency() |
|
122 | + { |
|
123 | + $documentCount = $this->getTotalDocumentCount(); |
|
124 | + $inverseDocumentFrequency = new InverseDocumentFrequency($this->getSearchDbHandle(), $documentCount); |
|
125 | + $inverseDocumentFrequency->execute(); |
|
126 | + } |
|
127 | + |
|
128 | + /** |
|
129 | + * @return int|mixed |
|
130 | + */ |
|
131 | + private function getTotalDocumentCount() |
|
132 | + { |
|
133 | + return $this->storage->getDocuments()->getTotalDocumentCount(); |
|
134 | + } |
|
135 | + |
|
136 | + /** |
|
137 | + * Calculates the Term Field Length Norm. |
|
138 | + * This is an index determining how important a |
|
139 | + * term is, based on the total length of the field |
|
140 | + * it comes from. |
|
141 | + */ |
|
142 | + public function createTermFieldLengthNorm() |
|
143 | + { |
|
144 | + $termFieldLengthNorm = new TermFieldLengthNorm($this->getSearchDbHandle()); |
|
145 | + $termFieldLengthNorm->execute(); |
|
146 | + } |
|
147 | + |
|
148 | + /** |
|
149 | + * Stores the time the indexing started in memory |
|
150 | + */ |
|
151 | + private function startLogging() |
|
152 | + { |
|
153 | + $this->loggingStart = round(microtime(true) * 1000); |
|
154 | + $this->lastLog = $this->loggingStart; |
|
155 | + } |
|
156 | + |
|
157 | + /** |
|
158 | + * Adds a logline with the time since last log |
|
159 | + * @param $string |
|
160 | + */ |
|
161 | + private function addLog($string) |
|
162 | + { |
|
163 | + $currentTime = round(microtime(true) * 1000); |
|
164 | + $this->log .= date('d-m-Y H:i:s - ') . str_pad($string, 50, " ", STR_PAD_RIGHT) . "\t" . ($currentTime - $this->lastLog) . 'ms since last log. ' . "\t" . ($currentTime - $this->loggingStart) . 'ms since start.' . PHP_EOL; |
|
165 | + $this->lastLog = round(microtime(true) * 1000); |
|
166 | + } |
|
167 | + |
|
168 | + /** |
|
169 | + * Creates the SQLite \PDO object if it doesnt |
|
170 | + * exist and returns it. |
|
171 | + * @return \PDO |
|
172 | + */ |
|
173 | + protected function getSearchDbHandle() |
|
174 | + { |
|
175 | + if ($this->searchDbHandle === null) { |
|
176 | + $path = $this->storageDir . DIRECTORY_SEPARATOR; |
|
177 | + $this->searchDbHandle = new \PDO('sqlite:' . $path . self::SEARCH_TEMP_DB); |
|
178 | + } |
|
179 | + return $this->searchDbHandle; |
|
180 | + } |
|
181 | + |
|
182 | + /** |
|
183 | + * Replaces the old search index database with the new one. |
|
184 | + */ |
|
185 | + public function replaceOldIndex() |
|
186 | + { |
|
187 | + $this->searchDbHandle = null; |
|
188 | + $path = $this->storageDir . DIRECTORY_SEPARATOR; |
|
189 | + rename($path . self::SEARCH_TEMP_DB, $path . 'search.db'); |
|
190 | + } |
|
191 | 191 | } |
192 | 192 | \ No newline at end of file |