Completed
Push — master ( 105406...22177f )
by T
02:02
created

Processor::setLogger()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 4
ccs 0
cts 3
cp 0
rs 10
cc 1
eloc 2
nc 1
nop 1
crap 2
1
<?php
2
3
namespace tomzx\IRCStats;
4
5
use Illuminate\Database\Connection;
6
use Psr\Log\LoggerAwareInterface;
7
use Psr\Log\LoggerInterface;
8
use Psr\Log\NullLogger;
9
10
class Processor implements LoggerAwareInterface {
11
	/**
12
	 * @var \tomzx\IRCStats\DatabaseProxy
13
	 */
14
	protected $databaseProxy;
15
	/**
16
	 * @var \Psr\Log\LoggerInterface
17
	 */
18
	protected $logger;
19
20
	/**
21
	 * @param \tomzx\IRCStats\DatabaseProxy $databaseProxy
22
	 */
23 2
	public function __construct(DatabaseProxy $databaseProxy)
24
	{
25 2
		$this->databaseProxy = $databaseProxy;
26 2
		$this->logger = new NullLogger();
27 2
	}
28
29
	/**
30
	 * @return \Illuminate\Database\Connection
31
	 */
32 2
	protected function getDatabase()
33
	{
34 2
		return $this->databaseProxy->getConnection();
35
	}
36
37
	/**
38
	 * @param \Psr\Log\LoggerInterface $logger
39
	 * @return void
40
	 */
41
	public function setLogger(LoggerInterface $logger)
42
	{
43
		$this->logger = $logger;
44
	}
45
46
	/**
47
	 * @return void
48
	 */
49 2
	public function run()
50
	{
51 2
		$db = $this->getDatabase();
52 2
		$this->initializeDictionary($db);
53 2
		$this->generateLogsWords($db);
54 2
	}
55
56
	/**
57
	 * @param \Illuminate\Database\Connection $db
58
	 * @throws \Exception
59
	 */
60 2
	protected function generateLogsWords(Connection $db)
61
	{
62
		// Find last processed logs id
63 2
		$lastLogId = (int)$db->table('logs_words')->max('logs_id');
64
65 2
		$dictionary = null;
66 2
		$batchSize = 250;
67 2
		$currentId = $lastLogId;
68 2
		while (true) {
69 2
			$this->logger->debug('Processing id > '.$currentId.' (batch of '.$batchSize.')');
70 2
			$fetchStart = microtime(true);
71
72 2
			$logs = $this->getLogs($db, $currentId, $batchSize);
73
74 2
			$fetchDuration = microtime(true) - $fetchStart;
75
76 2
			if ( ! $logs) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $logs of type array is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
77
				// No more data available
78 2
				$this->logger->debug('End of data');
79 2
				break;
80
			}
81
82
			if ( ! $dictionary) {
83
				$dictionary = $this->loadDictionary($db);
84
			}
85
86
			$insertStart = microtime(true);
87
			$data = [];
88
			foreach ($logs as $log) {
89
				// TODO: Replace this with preg_split <[email protected]>
90
				$words = explode(' ', $log->message);
91
				foreach ($words as $word) {
92
					// TODO: Support case insensitive <[email protected]>
93
					if ( ! isset($dictionary[$word])) {
94
						//$this->logger->debug('Unknown word '.$word.PHP_EOL);
0 ignored issues
show
Unused Code Comprehensibility introduced by
62% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
95
						continue;
96
					}
97
98
					$wordId = $dictionary[$word];
99
100
					$data[] = [
101
						'logs_id' => $log->id,
102
						// 'word'    => $word,
0 ignored issues
show
Unused Code Comprehensibility introduced by
58% of this comment could be valid code. Did you maybe forget this after debugging?

Sometimes obsolete code just ends up commented out instead of removed. In this case it is better to remove the code once you have checked you do not need it.

The code might also have been commented out for debugging purposes. In this case it is vital that someone uncomments it again or your project may behave in very unexpected ways in production.

This check looks for comments that seem to be mostly valid code and reports them.

Loading history...
103
						'word_id' => $wordId,
104
					];
105
				}
106
			}
107
108
			$currentId = $log->id;
0 ignored issues
show
Bug introduced by
The variable $log seems to be defined by a foreach iteration on line 88. Are you sure the iterator is never empty, otherwise this variable is not defined?

It seems like you are relying on a variable being defined by an iteration:

foreach ($a as $b) {
}

// $b is defined here only if $a has elements, for example if $a is array()
// then $b would not be defined here. To avoid that, we recommend to set a
// default value for $b.


// Better
$b = 0; // or whatever default makes sense in your context
foreach ($a as $b) {
}

// $b is now guaranteed to be defined here.
Loading history...
109
110
			$this->batchInsert($db->table('logs_words'), $data);
111
			$insertDuration = microtime(true) - $insertStart;
112
			$this->logger->debug('fetch: '.round($fetchDuration, 6).'s, insert: '.round($insertDuration, 6).'s');
113
		}
114 2
	}
115
116 2
	protected function initializeDictionary(Connection $db)
117
	{
118 2
		$dictionarySize = $db->table('words')->count();
119
120 2
		if ($dictionarySize > 0) {
121 1
			return;
122
		}
123
124 1
		$this->logger->info('Seeding words table...');
125 1
		$dictionarySeedStart = microtime(true);
126 1
		$dictionary = file(__DIR__ . '/../../../data/dictionary.txt');
127 1
		$data = [];
128 1
		foreach ($dictionary as $word) {
129 1
			$data[] = [
130 1
				'word' => trim($word),
131
			];
132 1
		}
133 1
		$this->batchInsert($db->table('words'), $data);
134
135 1
		$dictionarySeedDuration = microtime(true) - $dictionarySeedStart;
136 1
		$this->logger->info('Finished seeding words table in '.round($dictionarySeedDuration, 6).'s');
137 1
	}
138
139
	/**
140
	 * @param \Illuminate\Database\Connection $db
141
	 * @return array
142
	 */
143
	protected function getDictionary(Connection $db)
144
	{
145
		return $db->table('words')
146
			->select('id', 'word')
147
			->lists('id', 'word');
148
	}
149
150
	/**
151
	 * @param \Illuminate\Database\Connection $db
152
	 * @return array
153
	 */
154
	protected function loadDictionary(Connection $db)
155
	{
156
		$dictionaryStart = microtime(true);
157
		$dictionary = $this->getDictionary($db);
158
		$dictionaryDuration = microtime(true) - $dictionaryStart;
159
		$this->logger->info('Dictionary loaded in ' . round($dictionaryDuration, 6) . 's');
160
		return $dictionary;
161
	}
162
163
	/**
164
	 * @param \Illuminate\Database\Connection $db
165
	 * @param int                             $currentId
166
	 * @param int                             $batchSize
167
	 * @return array
168
	 */
169 2
	protected function getLogs(Connection $db, $currentId, $batchSize)
170
	{
171 2
		return $db->table('logs')
172 2
			->select('id', 'message')
173 2
			->where('id', '>', $currentId)
174 2
			->orderBy('id', 'asc')
175 2
			->limit($batchSize)
176 2
			->get();
177
	}
178
179
	/**
180
	 * @param \Illuminate\Database\Query\Builder $builder
181
	 * @param  array                             $data
182
	 */
183
	protected function batchInsert(\Illuminate\Database\Query\Builder $builder, array $data)
184
	{
185 1
		$builder->getConnection()->transaction(function () use ($builder, $data) {
186
			// Batch in group of 250 entries to prevent "Too many SQL variables" SQL error
187
			$insertBatchSize = 250;
188
			$insertBatchCount = ceil(count($data) / $insertBatchSize);
189
			for ($i = 0; $i < $insertBatchCount; ++$i) {
190
				$insertedData = array_slice($data, $i * $insertBatchSize, $insertBatchSize);
191
192
				$builder->insert($insertedData);
193
			}
194 1
		});
195 1
	}
196
}
197