Passed
Push — master ( 3f6c85...9c6499 )
by Jens
02:40
created

Indexer::resetIndex()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 13
Code Lines 4

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 4
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 13
rs 9.4285
1
<?php
2
/**
3
 * User: jensk
4
 * Date: 21-2-2017
5
 * Time: 10:29
6
 */
7
8
namespace library\search;
9
10
11
use library\search\indexer\InverseDocumentFrequency;
12
use library\search\indexer\TermCount;
13
use library\search\indexer\TermFieldLengthNorm;
14
use library\search\indexer\TermFrequency;
15
16
/**
17
 * Class Indexer
18
 * Responsible for creating the search index based on the
19
 * existing documents
20
 *
21
 * @package library\search
22
 */
23
class Indexer extends SearchDbConnected
24
{
25
	const SQLITE_MAX_COMPOUND_SELECT = 100;
26
	protected $filters = array(
27
		'DutchStopWords',
28
		'EnglishStopWords'
29
	);
30
	protected $storageDir;
31
	/**
32
	 * @var double
33
	 */
34
	protected $loggingStart;
35
	/**
36
	 * @var string
37
	 */
38
	protected $log;
39
	/**
40
	 * @var double
41
	 */
42
	protected $lastLog;
43
44
	const SEARCH_TEMP_DB = 'search_tmp.db';
45
46
	/**
47
	 * Creates a new temporary search db, cleans it if it exists
48
	 * then calculates and stores the search index in this db
49
	 * and finally if indexing completed replaces the current search
50
	 * db with the temporary one. Returns the log in string format.
51
	 * @return string
52
	 */
53
	public function updateIndex()
54
	{
55
		$this->startLogging();
56
		$this->addLog('Indexing start.');
57
		$this->addLog('Clearing index.');
58
		$this->resetIndex();
59
		$this->addLog('Cleaning Published Deleted Documents');
60
		$this->storage->getDocuments()->cleanPublishedDeletedDocuments();
61
		$this->addLog('Retrieving documents to be indexed.');
62
		$documents = $this->storage->getDocuments()->getPublishedDocumentsNoFolders();
63
		$this->addLog('Start Document Term Count for ' . count($documents) . ' documents');
64
		$this->createDocumentTermCount($documents);
65
		$this->addLog('Start Document Term Frequency.');
66
		$this->createDocumentTermFrequency();
67
		$this->addLog('Start Term Field Length Norm.');
68
		$this->createTermFieldLengthNorm();
69
		$this->addLog('Start Inverse Document Frequency.');
70
		$this->createInverseDocumentFrequency();
71
		$this->addLog('Replacing old index.');
72
		$this->replaceOldIndex();
73
		$this->addLog('Indexing complete.');
74
		return $this->log;
75
	}
76
77
	/**
78
	 * Count how often a term is used in a document
79
	 *
80
	 * @param $documents
81
	 */
82
	public function createDocumentTermCount($documents)
83
	{
84
		$termCount = new TermCount($this->getSearchDbHandle(), $documents, $this->filters, $this->storage);
85
		$termCount->execute();
86
	}
87
88
	/**
89
	 * Calculate the frequency index for a term with
90
	 * a field
91
	 */
92
	public function createDocumentTermFrequency()
93
	{
94
		$termFrequency = new TermFrequency($this->getSearchDbHandle());
95
		$termFrequency->execute();
96
	}
97
98
99
	/**
100
	 * Resets the entire index
101
	 */
102
	public function resetIndex()
103
	{
104
		$db = $this->getSearchDbHandle();
105
		$sql = '
106
			DELETE FROM term_count;
107
			DELETE FROM term_frequency;
108
			DELETE FROM inverse_document_frequency;
109
			UPDATE `sqlite_sequence` SET `seq`= 0 WHERE `name`=\'term_count\';
110
			UPDATE `sqlite_sequence` SET `seq`= 0 WHERE `name`=\'term_frequency\';
111
			UPDATE `sqlite_sequence` SET `seq`= 0 WHERE `name`=\'inverse_document_frequency\';
112
		';
113
		$db->exec($sql);
114
	}
115
116
	/**
117
	 * Calculates the inverse document frequency for each
118
	 * term. This is a representation of how often a certain
119
	 * term is used in comparison to all terms.
120
	 */
121
	public function createInverseDocumentFrequency()
122
	{
123
		$documentCount = $this->getTotalDocumentCount();
124
		$inverseDocumentFrequency = new InverseDocumentFrequency($this->getSearchDbHandle(), $documentCount);
125
		$inverseDocumentFrequency->execute();
126
	}
127
128
	/**
129
	 * @return int|mixed
130
	 */
131
	private function getTotalDocumentCount()
132
	{
133
		return $this->storage->getDocuments()->getTotalDocumentCount();
134
	}
135
136
	/**
137
	 * Calculates the Term Field Length Norm.
138
	 * This is an index determining how important a
139
	 * term is, based on the total length of the field
140
	 * it comes from.
141
	 */
142
	public function createTermFieldLengthNorm()
143
	{
144
		$termFieldLengthNorm = new TermFieldLengthNorm($this->getSearchDbHandle());
145
		$termFieldLengthNorm->execute();
146
	}
147
148
	/**
149
	 * Stores the time the indexing started in memory
150
	 */
151
	private function startLogging()
152
	{
153
		$this->loggingStart = round(microtime(true) * 1000);
154
		$this->lastLog = $this->loggingStart;
155
	}
156
157
	/**
158
	 * Adds a logline with the time since last log
159
	 * @param $string
160
	 */
161
	private function addLog($string)
162
	{
163
		$currentTime = round(microtime(true) * 1000);
164
		$this->log .= date('d-m-Y H:i:s - ') . str_pad($string, 50, " ", STR_PAD_RIGHT) . "\t" . ($currentTime - $this->lastLog) . 'ms since last log. ' . "\t" . ($currentTime - $this->loggingStart) . 'ms since start.' . PHP_EOL;
165
		$this->lastLog = round(microtime(true) * 1000);
166
	}
167
168
	/**
169
	 * Creates the SQLite \PDO object if it doesnt
170
	 * exist and returns it.
171
	 * @return \PDO
172
	 */
173 View Code Duplication
	protected function getSearchDbHandle()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
174
	{
175
		if ($this->searchDbHandle === null) {
176
			$path = __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . $this->storageDir . DIRECTORY_SEPARATOR;
177
			$this->searchDbHandle = new \PDO('sqlite:' . $path . self::SEARCH_TEMP_DB);
178
		}
179
		return $this->searchDbHandle;
180
	}
181
182
	/**
183
	 * Replaces the old search index database with the new one.
184
	 */
185
	public function replaceOldIndex()
186
	{
187
		$this->searchDbHandle = null;
188
		$path = __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . $this->storageDir . DIRECTORY_SEPARATOR;
189
		rename($path . self::SEARCH_TEMP_DB, $path . 'search.db');
190
	}
191
}