Passed
Push — develop ( 169afe...f2bd80 )
by Jens
02:39
created

Indexer   A

Complexity

Total Complexity 12

Size/Duplication

Total Lines 167
Duplicated Lines 4.79 %

Coupling/Cohesion

Components 1
Dependencies 6

Importance

Changes 0
Metric Value
dl 8
loc 167
rs 10
c 0
b 0
f 0
wmc 12
lcom 1
cbo 6

11 Methods

Rating   Name   Duplication   Size   Complexity  
A updateIndex() 0 21 1
A createDocumentTermCount() 0 5 1
A createDocumentTermFrequency() 0 5 1
A resetIndex() 0 13 1
A createInverseDocumentFrequency() 0 6 1
A getTotalDocumentCount() 0 4 1
A createTermFieldLengthNorm() 0 5 1
A startLogging() 0 5 1
A addLog() 0 6 1
A getSearchDbHandle() 8 8 2
A replaceOldIndex() 0 6 1

How to fix   Duplicated Code   

Duplicated Code

Duplicate code is one of the most pungent code smells. A rule that is often used is to re-structure code once it is duplicated in three or more places.

Common duplication problems, and corresponding solutions are:

1
<?php
2
/**
3
 * User: jensk
4
 * Date: 21-2-2017
5
 * Time: 10:29
6
 */
7
8
namespace library\search;
9
10
11
use library\search\indexer\InverseDocumentFrequency;
12
use library\search\indexer\TermCount;
13
use library\search\indexer\TermFieldLengthNorm;
14
use library\search\indexer\TermFrequency;
15
16
/**
17
 * Class Indexer
18
 * Responsible for creating the search index based on the
19
 * existing documents
20
 *
21
 * @package library\search
22
 */
23
class Indexer extends SearchDbConnected
24
{
25
	const SQLITE_MAX_COMPOUND_SELECT = 100;
26
	protected $filters = array(
27
		'DutchStopWords',
28
		'EnglishStopWords'
29
	);
30
	protected $storageDir;
31
	/**
32
	 * @var int
33
	 */
34
	protected $loggingStart;
35
	/**
36
	 * @var string
37
	 */
38
	protected $log;
39
	/**
40
	 * @var int
41
	 */
42
	protected $lastLog;
43
44
	const SEARCH_TEMP_DB = 'search_tmp.db';
45
46
	/**
47
	 * Creates a new temporary search db, cleans it if it exists
48
	 * then calculates and stores the search index in this db
49
	 * and finally if indexing completed replaces the current search
50
	 * db with the temporary one. Returns the log in string format.
51
	 * @return string
52
	 */
53
	public function updateIndex()
54
	{
55
		$this->startLogging();
56
		$this->addLog('Indexing start.');
57
		$this->addLog('Clearing index.');
58
		$this->resetIndex();
59
		$this->addLog('Retrieving documents to be indexed.');
60
		$documents = $this->storage->getDocuments();
61
		$this->addLog('Start Document Term Count for ' . count($documents) . ' documents');
62
		$this->createDocumentTermCount($documents);
63
		$this->addLog('Start Document Term Frequency.');
64
		$this->createDocumentTermFrequency();
65
		$this->addLog('Start Term Field Length Norm.');
66
		$this->createTermFieldLengthNorm();
67
		$this->addLog('Start Inverse Document Frequency.');
68
		$this->createInverseDocumentFrequency();
69
		$this->addLog('Replacing old index.');
70
		$this->replaceOldIndex();
71
		$this->addLog('Indexing complete.');
72
		return $this->log;
73
	}
74
75
	/**
76
	 * Count how often a term is used in a document
77
	 *
78
	 * @param $documents
79
	 */
80
	public function createDocumentTermCount($documents)
81
	{
82
		$termCount = new TermCount($this->getSearchDbHandle(), $documents, $this->filters, $this->storage);
0 ignored issues
show
Documentation introduced by
$this->getSearchDbHandle() is of type object<PDO>, but the function expects a resource.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
83
		$termCount->execute();
84
	}
85
86
	/**
87
	 * Calculate the frequency index for a term with
88
	 * a field
89
	 */
90
	public function createDocumentTermFrequency()
91
	{
92
		$termFrequency = new TermFrequency($this->getSearchDbHandle());
93
		$termFrequency->execute();
94
	}
95
96
97
	/**
98
	 * Resets the entire index
99
	 */
100
	public function resetIndex()
101
	{
102
		$db = $this->getSearchDbHandle();
103
		$sql = '
104
			DELETE FROM term_count;
105
			DELETE FROM term_frequency;
106
			DELETE FROM inverse_document_frequency;
107
			UPDATE `sqlite_sequence` SET `seq`= 0 WHERE `name`=\'term_count\';
108
			UPDATE `sqlite_sequence` SET `seq`= 0 WHERE `name`=\'term_frequency\';
109
			UPDATE `sqlite_sequence` SET `seq`= 0 WHERE `name`=\'inverse_document_frequency\';
110
		';
111
		$db->exec($sql);
112
	}
113
114
	/**
115
	 * Calculates the inverse document frequency for each
116
	 * term. This is a representation of how often a certain
117
	 * term is used in comparison to all terms.
118
	 */
119
	public function createInverseDocumentFrequency()
120
	{
121
		$documentCount = $this->getTotalDocumentCount();
122
		$inverseDocumentFrequency = new InverseDocumentFrequency($this->getSearchDbHandle(), $documentCount);
0 ignored issues
show
Documentation introduced by
$this->getSearchDbHandle() is of type object<PDO>, but the function expects a resource.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
123
		$inverseDocumentFrequency->execute();
124
	}
125
126
	/**
127
	 * @return int|mixed
128
	 */
129
	private function getTotalDocumentCount()
130
	{
131
		return $this->storage->getTotalDocumentCount();
132
	}
133
134
	/**
135
	 * Calculates the Term Field Length Norm.
136
	 * This is an index determining how important a
137
	 * term is, based on the total length of the field
138
	 * it comes from.
139
	 */
140
	public function createTermFieldLengthNorm()
141
	{
142
		$termFieldLengthNorm = new TermFieldLengthNorm($this->getSearchDbHandle());
143
		$termFieldLengthNorm->execute();
144
	}
145
146
	/**
147
	 * Stores the time the indexing started in memory
148
	 */
149
	private function startLogging()
150
	{
151
		$this->loggingStart = round(microtime(true) * 1000);
0 ignored issues
show
Documentation Bug introduced by
The property $loggingStart was declared of type integer, but round(microtime(true) * 1000) is of type double. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
152
		$this->lastLog = $this->loggingStart;
0 ignored issues
show
Documentation Bug introduced by
The property $lastLog was declared of type integer, but $this->loggingStart is of type double. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
153
	}
154
155
	/**
156
	 * Adds a logline with the time since last log
157
	 * @param $string
158
	 */
159
	private function addLog($string)
160
	{
161
		$currentTime = round(microtime(true) * 1000);
162
		$this->log .= date('d-m-Y H:i:s - ') . str_pad($string, 50, " ", STR_PAD_RIGHT) . "\t" . ($currentTime - $this->lastLog) . 'ms since last log. ' . "\t" . ($currentTime - $this->loggingStart) . 'ms since start.' . PHP_EOL;
163
		$this->lastLog = round(microtime(true) * 1000);
0 ignored issues
show
Documentation Bug introduced by
The property $lastLog was declared of type integer, but round(microtime(true) * 1000) is of type double. Maybe add a type cast?

This check looks for assignments to scalar types that may be of the wrong type.

To ensure the code behaves as expected, it may be a good idea to add an explicit type cast.

$answer = 42;

$correct = false;

$correct = (bool) $answer;
Loading history...
164
	}
165
166
	/**
167
	 * Creates the SQLite \PDO object if it doesnt
168
	 * exist and returns it.
169
	 * @return \PDO
170
	 */
171 View Code Duplication
	protected function getSearchDbHandle()
0 ignored issues
show
Duplication introduced by
This method seems to be duplicated in your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
172
	{
173
		if ($this->searchDbHandle === null) {
174
			$path = __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . $this->storageDir . DIRECTORY_SEPARATOR;
175
			$this->searchDbHandle = new \PDO('sqlite:' . $path . self::SEARCH_TEMP_DB);
0 ignored issues
show
Documentation Bug introduced by
It seems like new \PDO('sqlite:' . $pa.... self::SEARCH_TEMP_DB) of type object<PDO> is incompatible with the declared type resource of property $searchDbHandle.

Our type inference engine has found an assignment to a property that is incompatible with the declared type of that property.

Either this assignment is in error or the assigned type should be added to the documentation/type hint for that property..

Loading history...
176
		}
177
		return $this->searchDbHandle;
178
	}
179
180
	/**
181
	 * Replaces the old search index database with the new one.
182
	 */
183
	public function replaceOldIndex()
184
	{
185
		$this->searchDbHandle = null;
186
		$path = __DIR__ . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . '..' . DIRECTORY_SEPARATOR . $this->storageDir . DIRECTORY_SEPARATOR;
187
		rename($path . self::SEARCH_TEMP_DB, $path . 'search.db');
188
	}
189
}