1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
/*************************************************************** |
4
|
|
|
* Copyright notice |
5
|
|
|
* |
6
|
|
|
* (c) 2009 AOE media ([email protected]) |
7
|
|
|
* All rights reserved |
8
|
|
|
* |
9
|
|
|
* This script is part of the TYPO3 project. The TYPO3 project is |
10
|
|
|
* free software; you can redistribute it and/or modify |
11
|
|
|
* it under the terms of the GNU General Public License as published by |
12
|
|
|
* the Free Software Foundation; either version 2 of the License, or |
13
|
|
|
* (at your option) any later version. |
14
|
|
|
* |
15
|
|
|
* The GNU General Public License can be found at |
16
|
|
|
* http://www.gnu.org/copyleft/gpl.html. |
17
|
|
|
* |
18
|
|
|
* This script is distributed in the hope that it will be useful, |
19
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
20
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
21
|
|
|
* GNU General Public License for more details. |
22
|
|
|
* |
23
|
|
|
* This copyright notice MUST APPEAR in all copies of the script! |
24
|
|
|
***************************************************************/ |
25
|
|
|
class tx_crawler_domain_queue_repository extends AOE\Crawler\Domain\Repository\AbstractRepository |
26
|
|
|
{ |
27
|
|
|
protected $objectClassname = 'tx_crawler_domain_queue_entry'; |
28
|
|
|
|
29
|
|
|
protected $tableName = 'tx_crawler_queue'; |
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* This mehtod is used to find the youngest entry for a given process. |
33
|
|
|
* |
34
|
|
|
* @param tx_crawler_domain_process $process |
35
|
|
|
* |
36
|
|
|
* @return tx_crawler_domain_queue_entry $entry |
37
|
|
|
*/ |
38
|
|
|
public function findYoungestEntryForProcess(tx_crawler_domain_process $process) |
39
|
|
|
{ |
40
|
|
|
return $this->getFirstOrLastObjectByProcess($process, 'exec_time ASC'); |
41
|
|
|
} |
42
|
|
|
|
43
|
|
|
/** |
44
|
|
|
* This method is used to find the oldest entry for a given process. |
45
|
|
|
* |
46
|
|
|
* @param tx_crawler_domain_process $process |
47
|
|
|
* |
48
|
|
|
* @return tx_crawler_domain_queue_entry |
49
|
|
|
*/ |
50
|
|
|
public function findOldestEntryForProcess(tx_crawler_domain_process $process) |
51
|
|
|
{ |
52
|
|
|
return $this->getFirstOrLastObjectByProcess($process, 'exec_time DESC'); |
53
|
|
|
} |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* This internal helper method is used to create an instance of an entry object |
57
|
|
|
* |
58
|
|
|
* @param tx_crawler_domain_process $process |
59
|
|
|
* @param string $orderby first matching item will be returned as object |
60
|
|
|
* |
61
|
|
|
* @return tx_crawler_domain_queue_entry |
62
|
|
|
*/ |
63
|
|
|
protected function getFirstOrLastObjectByProcess($process, $orderby) |
64
|
|
|
{ |
65
|
|
|
$db = $this->getDB(); |
66
|
|
|
$where = 'process_id_completed=' . $db->fullQuoteStr($process->getProcess_id(), $this->tableName) . |
67
|
|
|
' AND exec_time > 0 '; |
68
|
|
|
$limit = 1; |
69
|
|
|
$groupby = ''; |
70
|
|
|
|
71
|
|
|
$res = $db->exec_SELECTgetRows('*', 'tx_crawler_queue', $where, $groupby, $orderby, $limit); |
72
|
|
|
if ($res) { |
73
|
|
|
$first = $res[0]; |
74
|
|
|
} else { |
75
|
|
|
$first = []; |
76
|
|
|
} |
77
|
|
|
$resultObject = new tx_crawler_domain_queue_entry($first); |
78
|
|
|
|
79
|
|
|
return $resultObject; |
80
|
|
|
} |
81
|
|
|
|
82
|
|
|
/** |
83
|
|
|
* Counts all executed items of a process. |
84
|
|
|
* |
85
|
|
|
* @param tx_crawler_domain_process $process |
86
|
|
|
* |
87
|
|
|
* @return int |
88
|
|
|
*/ |
89
|
|
|
public function countExecutedItemsByProcess($process) |
90
|
|
|
{ |
91
|
|
|
return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = ' . $this->getDB()->fullQuoteStr( |
92
|
|
|
$process->getProcess_id(), |
93
|
|
|
$this->tableName |
94
|
|
|
)); |
95
|
|
|
} |
96
|
|
|
|
97
|
|
|
/** |
98
|
|
|
* Counts items of a process which yet have not been processed/executed |
99
|
|
|
* |
100
|
|
|
* @param tx_crawler_domain_process $process |
101
|
|
|
* |
102
|
|
|
* @return int |
103
|
|
|
*/ |
104
|
|
|
public function countNonExecutedItemsByProcess($process) |
105
|
|
|
{ |
106
|
|
|
return $this->countItemsByWhereClause('exec_time = 0 AND process_id = ' . $this->getDB()->fullQuoteStr( |
107
|
|
|
$process->getProcess_id(), |
108
|
|
|
$this->tableName |
109
|
|
|
)); |
110
|
|
|
} |
111
|
|
|
|
112
|
|
|
/** |
113
|
|
|
* This method can be used to count all queue entrys which are |
114
|
|
|
* scheduled for now or a earler date. |
115
|
|
|
* |
116
|
|
|
* @param void |
117
|
|
|
* |
118
|
|
|
* @return int |
119
|
|
|
*/ |
120
|
|
|
public function countAllPendingItems() |
121
|
|
|
{ |
122
|
|
|
return $this->countItemsByWhereClause('exec_time = 0 AND scheduled < ' . time()); |
123
|
|
|
} |
124
|
|
|
|
125
|
|
|
/** |
126
|
|
|
* This method can be used to count all queue entrys which are |
127
|
|
|
* scheduled for now or a earler date and are assigned to a process. |
128
|
|
|
* |
129
|
|
|
* @param void |
130
|
|
|
* |
131
|
|
|
* @return int |
132
|
|
|
*/ |
133
|
|
|
public function countAllAssignedPendingItems() |
134
|
|
|
{ |
135
|
|
|
return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id != ''"); |
136
|
|
|
} |
137
|
|
|
|
138
|
|
|
/** |
139
|
|
|
* This method can be used to count all queue entrys which are |
140
|
|
|
* scheduled for now or a earler date and are not assigned to a process. |
141
|
|
|
* |
142
|
|
|
* @param void |
143
|
|
|
* |
144
|
|
|
* @return int |
145
|
|
|
*/ |
146
|
|
|
public function countAllUnassignedPendingItems() |
147
|
|
|
{ |
148
|
|
|
return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id = ''"); |
149
|
|
|
} |
150
|
|
|
|
151
|
|
|
/** |
152
|
|
|
* Internal method to count items by a given where clause |
153
|
|
|
* |
154
|
|
|
* @param string $where |
155
|
|
|
* |
156
|
|
|
* @return mixed |
157
|
|
|
*/ |
158
|
|
|
protected function countItemsByWhereClause($where) |
159
|
|
|
{ |
160
|
|
|
$db = $this->getDB(); |
161
|
|
|
$rs = $db->exec_SELECTquery('count(*) as anz', $this->tableName, $where); |
162
|
|
|
$res = $db->sql_fetch_assoc($rs); |
163
|
|
|
|
164
|
|
|
return $res['anz']; |
165
|
|
|
} |
166
|
|
|
|
167
|
|
|
/** |
168
|
|
|
* Count pending queue entries grouped by configuration key |
169
|
|
|
* |
170
|
|
|
* @return array |
171
|
|
|
*/ |
172
|
|
|
public function countPendingItemsGroupedByConfigurationKey() |
173
|
|
|
{ |
174
|
|
|
$db = $this->getDB(); |
175
|
|
|
$res = $db->exec_SELECTquery( |
176
|
|
|
"configuration, count(*) as unprocessed, sum(process_id != '') as assignedButUnprocessed", |
177
|
|
|
$this->tableName, |
178
|
|
|
'exec_time = 0 AND scheduled < ' . time(), |
179
|
|
|
'configuration' |
180
|
|
|
); |
181
|
|
|
$rows = []; |
182
|
|
|
while ($row = $db->sql_fetch_assoc($res)) { |
183
|
|
|
$rows[] = $row; |
184
|
|
|
} |
185
|
|
|
|
186
|
|
|
return $rows; |
187
|
|
|
} |
188
|
|
|
|
189
|
|
|
/** |
190
|
|
|
* Get set id with unprocessed entries |
191
|
|
|
* |
192
|
|
|
* @param void |
193
|
|
|
* |
194
|
|
|
* @return array array of set ids |
195
|
|
|
*/ |
196
|
|
|
public function getSetIdWithUnprocessedEntries() |
197
|
|
|
{ |
198
|
|
|
$db = $this->getDB(); |
199
|
|
|
$res = $db->exec_SELECTquery( |
200
|
|
|
'set_id', |
201
|
|
|
$this->tableName, |
202
|
|
|
'exec_time = 0 AND scheduled < ' . time(), |
203
|
|
|
'set_id' |
204
|
|
|
); |
205
|
|
|
$setIds = []; |
206
|
|
|
while ($row = $db->sql_fetch_assoc($res)) { |
207
|
|
|
$setIds[] = intval($row['set_id']); |
208
|
|
|
} |
209
|
|
|
|
210
|
|
|
return $setIds; |
211
|
|
|
} |
212
|
|
|
|
213
|
|
|
/** |
214
|
|
|
* Get total queue entries by configuration |
215
|
|
|
* |
216
|
|
|
* @param array set ids |
|
|
|
|
217
|
|
|
* |
218
|
|
|
* @return array totals by configuration (keys) |
219
|
|
|
*/ |
220
|
|
|
public function getTotalQueueEntriesByConfiguration(array $setIds) |
221
|
|
|
{ |
222
|
|
|
$totals = []; |
223
|
|
|
if (count($setIds) > 0) { |
224
|
|
|
$db = $this->getDB(); |
225
|
|
|
$res = $db->exec_SELECTquery( |
226
|
|
|
'configuration, count(*) as c', |
227
|
|
|
$this->tableName, |
228
|
|
|
'set_id in (' . implode(',', $setIds) . ') AND scheduled < ' . time(), |
229
|
|
|
'configuration' |
230
|
|
|
); |
231
|
|
|
while ($row = $db->sql_fetch_assoc($res)) { |
232
|
|
|
$totals[$row['configuration']] = $row['c']; |
233
|
|
|
} |
234
|
|
|
} |
235
|
|
|
|
236
|
|
|
return $totals; |
237
|
|
|
} |
238
|
|
|
|
239
|
|
|
/** |
240
|
|
|
* Get the timestamps of the last processed entries |
241
|
|
|
* |
242
|
|
|
* @param int |
243
|
|
|
* |
244
|
|
|
* @return array |
245
|
|
|
*/ |
246
|
|
|
public function getLastProcessedEntriesTimestamps($limit = 100) |
247
|
|
|
{ |
248
|
|
|
$db = $this->getDB(); |
249
|
|
|
$res = $db->exec_SELECTquery( |
250
|
|
|
'exec_time', |
251
|
|
|
$this->tableName, |
252
|
|
|
'', |
253
|
|
|
'', |
254
|
|
|
'exec_time desc', |
255
|
|
|
$limit |
256
|
|
|
); |
257
|
|
|
|
258
|
|
|
$rows = []; |
259
|
|
|
while (($row = $db->sql_fetch_assoc($res)) !== false) { |
260
|
|
|
$rows[] = $row['exec_time']; |
261
|
|
|
} |
262
|
|
|
|
263
|
|
|
return $rows; |
264
|
|
|
} |
265
|
|
|
|
266
|
|
|
/** |
267
|
|
|
* Get the last processed entries |
268
|
|
|
* |
269
|
|
|
* @param string $selectFields |
270
|
|
|
* @param int $limit |
271
|
|
|
* |
272
|
|
|
* @return array |
273
|
|
|
*/ |
274
|
|
|
public function getLastProcessedEntries($selectFields = '*', $limit = 100) |
275
|
|
|
{ |
276
|
|
|
$db = $this->getDB(); |
277
|
|
|
$res = $db->exec_SELECTquery( |
278
|
|
|
$selectFields, |
279
|
|
|
$this->tableName, |
280
|
|
|
'', |
281
|
|
|
'', |
282
|
|
|
'exec_time desc', |
283
|
|
|
$limit |
284
|
|
|
); |
285
|
|
|
|
286
|
|
|
$rows = []; |
287
|
|
|
while (($row = $db->sql_fetch_assoc($res)) !== false) { |
288
|
|
|
$rows[] = $row; |
289
|
|
|
} |
290
|
|
|
|
291
|
|
|
return $rows; |
292
|
|
|
} |
293
|
|
|
|
294
|
|
|
/** |
295
|
|
|
* Get performance statistics data |
296
|
|
|
* |
297
|
|
|
* @param int start timestamp |
298
|
|
|
* @param int end timestamp |
|
|
|
|
299
|
|
|
* |
300
|
|
|
* @return array performance data |
301
|
|
|
*/ |
302
|
|
|
public function getPerformanceData($start, $end) |
303
|
|
|
{ |
304
|
|
|
$db = $this->getDB(); |
305
|
|
|
$res = $db->exec_SELECTquery( |
306
|
|
|
'process_id_completed, min(exec_time) as start, max(exec_time) as end, count(*) as urlcount', |
307
|
|
|
$this->tableName, |
308
|
|
|
'exec_time != 0 and exec_time >= ' . intval($start) . ' and exec_time <= ' . intval($end), |
309
|
|
|
'process_id_completed' |
310
|
|
|
); |
311
|
|
|
|
312
|
|
|
$rows = []; |
313
|
|
|
while (($row = $db->sql_fetch_assoc($res)) !== false) { |
314
|
|
|
$rows[$row['process_id_completed']] = $row; |
315
|
|
|
} |
316
|
|
|
|
317
|
|
|
return $rows; |
318
|
|
|
} |
319
|
|
|
|
320
|
|
|
/** |
321
|
|
|
* This method is used to count all processes in the process table. |
322
|
|
|
* |
323
|
|
|
* @param string $where Where clause |
324
|
|
|
* |
325
|
|
|
* @return integer |
326
|
|
|
*/ |
327
|
|
|
public function countAll($where = '1 = 1') |
328
|
|
|
{ |
329
|
|
|
return $this->countByWhere($where); |
330
|
|
|
} |
331
|
|
|
} |
332
|
|
|
|
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths