Completed
Push — TYPO3_7 ( 85059e...d37b91 )
by Tomas Norre
10:37
created

QueueRepository::getSetIdWithUnprocessedEntries()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 16

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 6

Importance

Changes 0
Metric Value
cc 2
nc 2
nop 0
dl 0
loc 16
ccs 0
cts 15
cp 0
crap 6
rs 9.7333
c 0
b 0
f 0
1
<?php
2
namespace AOE\Crawler\Domain\Repository;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2017 AOE GmbH <[email protected]>
8
 *
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 3 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
use AOE\Crawler\Domain\Model\Process;
29
use AOE\Crawler\Domain\Model\Queue;
30
use TYPO3\CMS\Core\Utility\MathUtility;
31
32
/**
33
 * Class QueueRepository
34
 *
35
 * @package AOE\Crawler\Domain\Repository
36
 */
37
class QueueRepository extends AbstractRepository
38
{
39
    /**
40
     * @var string
41
     */
42
    protected $tableName = 'tx_crawler_queue';
43
44
    /**
45
     * This method is used to find the youngest entry for a given process.
46
     *
47
     * @param Process $process
48
     *
49
     * @return Queue $entry
50
     */
51
    public function findYoungestEntryForProcess(Process $process)
52
    {
53
        return $this->getFirstOrLastObjectByProcess($process, 'exec_time ASC');
54
    }
55
56
    /**
57
     * This method is used to find the oldest entry for a given process.
58
     *
59
     * @param Process $process
60
     *
61
     * @return Queue
62
     */
63
    public function findOldestEntryForProcess(Process $process)
64
    {
65
        return $this->getFirstOrLastObjectByProcess($process, 'exec_time DESC');
66
    }
67
68
    /**
69
     * This internal helper method is used to create an instance of an entry object
70
     *
71
     * @param Process $process
72
     * @param string $orderby first matching item will be returned as object
73
     *
74
     * @return Queue
75
     */
76
    protected function getFirstOrLastObjectByProcess($process, $orderby)
77
    {
78
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
79
        $where = 'process_id_completed=' . $db->fullQuoteStr($process->getProcessId(), $this->tableName) .
80
                   ' AND exec_time > 0 ';
81
        $limit = 1;
82
        $groupby = '';
83
84
        $res = $db->exec_SELECTgetRows('*', 'tx_crawler_queue', $where, $groupby, $orderby, $limit);
85
        if ($res) {
86
            $first = $res[0];
87
        } else {
88
            $first = [];
89
        }
90
        $queueObject = new Queue();
91
        $queueObject->setQid($first['qid']);
92
        $queueObject->setPageId($first['page_id']);
93
        $queueObject->setParameters($first['parameters']);
94
        $queueObject->setScheduled($first['scheduled']);
95
        $queueObject->setExecTime($first['exec_time']);
96
        $queueObject->setSetId($first['set_id']);
97
        $queueObject->setResultData($first['result_data']);
98
        $queueObject->setProcessScheduled($first['process_scheduled']);
99
        $queueObject->setProcessId($first['process_id']);
100
        $queueObject->setProcessIdCompleted($first['process_id_completed']);
101
        $queueObject->setParametersHash($first['parameters_hash']);
102
        $queueObject->setConfigurationHash($first['configuration_hash']);
103
        $queueObject->setConfiguration($first['configuration']);
104
105
        return $queueObject;
106
    }
107
108
    /**
109
     * Counts all executed items of a process.
110
     *
111
     * @param Process $process
112
     *
113
     * @return int
114
     */
115
    public function countExecutedItemsByProcess($process)
116
    {
117
        return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = ' . $this->getDB()->fullQuoteStr(
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
118
            $process->getProcessId(),
119
            $this->tableName
120
        ));
121
    }
122
123
    /**
124
     * Counts items of a process which yet have not been processed/executed
125
     *
126
     * @param Process $process
127
     *
128
     * @return int
129
     */
130
    public function countNonExecutedItemsByProcess($process)
131
    {
132
        return $this->countItemsByWhereClause('exec_time = 0 AND process_id = ' . $this->getDB()->fullQuoteStr(
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
133
            $process->getProcessId(),
134
            $this->tableName
135
        ));
136
    }
137
138
    /**
139
     * Method to determine unprocessed Items in the crawler queue.
140
     *
141
     * @return array
142
     */
143
    public function getUnprocessedItems()
144
    {
145
        $rows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
146
            '*',
147
            'tx_crawler_queue',
148
            'exec_time = 0',
149
            '',
150
            'page_id, scheduled'
151
        );
152
153
        return $rows;
154
    }
155
156
    /**
157
     * Count items which have not been processed yet
158
     *
159
     * @return int
160
     */
161
    public function countUnprocessedItems()
162
    {
163
        return count($this->getUnprocessedItems());
164
    }
165
166
    /**
167
     * This method can be used to count all queue entrys which are
168
     * scheduled for now or a earlier date.
169
     *
170
     * @return int
171
     */
172
    public function countAllPendingItems()
173
    {
174
        return $this->countItemsByWhereClause('exec_time = 0 AND scheduled < ' . time());
175
    }
176
177
    /**
178
     * This method can be used to count all queue entrys which are
179
     * scheduled for now or a earlier date and are assigned to a process.
180
     *
181
     * @return int
182
     */
183
    public function countAllAssignedPendingItems()
184
    {
185
        return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id != ''");
186
    }
187
188
    /**
189
     * This method can be used to count all queue entrys which are
190
     * scheduled for now or a earlier date and are not assigned to a process.
191
     *
192
     * @return int
193
     */
194
    public function countAllUnassignedPendingItems()
195
    {
196
        return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id = ''");
197
    }
198
199
    /**
200
     * Internal method to count items by a given where clause
201
     *
202
     * @param string $where
203
     *
204
     * @return mixed
205
     */
206
    protected function countItemsByWhereClause($where)
207
    {
208
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
209
        $rs = $db->exec_SELECTquery('count(*) as anz', $this->tableName, $where);
210
        $res = $db->sql_fetch_assoc($rs);
211
212
        return $res['anz'];
213
    }
214
215
    /**
216
     * Count pending queue entries grouped by configuration key
217
     *
218
     * @return array
219
     */
220
    public function countPendingItemsGroupedByConfigurationKey()
221
    {
222
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
223
        $res = $db->exec_SELECTquery(
224
            "configuration, count(*) as unprocessed, sum(process_id != '') as assignedButUnprocessed",
225
            $this->tableName,
226
            'exec_time = 0 AND scheduled < ' . time(),
227
            'configuration'
228
        );
229
        $rows = [];
230
        while ($row = $db->sql_fetch_assoc($res)) {
231
            $rows[] = $row;
232
        }
233
234
        return $rows;
235
    }
236
237
    /**
238
     * Get set id with unprocessed entries
239
     *
240
     * @param void
241
     *
242
     * @return array array of set ids
243
     */
244
    public function getSetIdWithUnprocessedEntries()
245
    {
246
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
247
        $res = $db->exec_SELECTquery(
248
            'set_id',
249
            $this->tableName,
250
            'exec_time = 0 AND scheduled < ' . time(),
251
            'set_id'
252
        );
253
        $setIds = [];
254
        while ($row = $db->sql_fetch_assoc($res)) {
255
            $setIds[] = intval($row['set_id']);
256
        }
257
258
        return $setIds;
259
    }
260
261
    /**
262
     * Get total queue entries by configuration
263
     *
264
     * @param array $setIds
265
     *
266
     * @return array totals by configuration (keys)
267
     */
268
    public function getTotalQueueEntriesByConfiguration(array $setIds)
269
    {
270
        $totals = [];
271
        if (count($setIds) > 0) {
272
            $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
273
            $res = $db->exec_SELECTquery(
274
                'configuration, count(*) as c',
275
                $this->tableName,
276
                'set_id in (' . implode(',', $setIds) . ') AND scheduled < ' . time(),
277
                'configuration'
278
            );
279
            while ($row = $db->sql_fetch_assoc($res)) {
280
                $totals[$row['configuration']] = $row['c'];
281
            }
282
        }
283
284
        return $totals;
285
    }
286
287
    /**
288
     * Get the timestamps of the last processed entries
289
     *
290
     * @param int $limit
291
     *
292
     * @return array
293
     */
294
    public function getLastProcessedEntriesTimestamps($limit = 100)
295
    {
296
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
297
        $res = $db->exec_SELECTquery(
298
            'exec_time',
299
            $this->tableName,
300
            '',
301
            '',
302
            'exec_time desc',
303
            $limit
304
        );
305
306
        $rows = [];
307
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
308
            $rows[] = intval($row['exec_time']);
309
        }
310
311
        return $rows;
312
    }
313
314
    /**
315
     * Get the last processed entries
316
     *
317
     * @param string $selectFields
318
     * @param int $limit
319
     *
320
     * @return array
321
     */
322
    public function getLastProcessedEntries($selectFields = '*', $limit = 100)
323
    {
324
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
325
        $res = $db->exec_SELECTquery(
326
            $selectFields,
327
            $this->tableName,
328
            '',
329
            '',
330
            'exec_time desc',
331
            $limit
332
        );
333
334
        $rows = [];
335
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
336
            $rows[] = $row;
337
        }
338
339
        return $rows;
340
    }
341
342
    /**
343
     * Get performance statistics data
344
     *
345
     * @param int $start timestamp
346
     * @param int $end timestamp
347
     *
348
     * @return array performance data
349
     */
350
    public function getPerformanceData($start, $end)
351
    {
352
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
353
        $res = $db->exec_SELECTquery(
354
            'process_id_completed, min(exec_time) as start, max(exec_time) as end, count(*) as urlcount',
355
            $this->tableName,
356
            'exec_time != 0 and exec_time >= ' . intval($start) . ' and exec_time <= ' . intval($end),
357
            'process_id_completed'
358
        );
359
360
        $rows = [];
361
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
362
            $rows[$row['process_id_completed']] = $row;
363
        }
364
365
        return $rows;
366
    }
367
368
    /**
369
     * Determines if a page is queued
370
     *
371
     * @param $uid
372
     * @param bool $unprocessed_only
373
     * @param bool $timed_only
374
     * @param bool $timestamp
375
     *
376
     * @return bool
377
     */
378
    public function isPageInQueue($uid, $unprocessed_only = true, $timed_only = false, $timestamp = false)
379
    {
380
        if (!MathUtility::canBeInterpretedAsInteger($uid)) {
381
            throw new \InvalidArgumentException('Invalid parameter type', 1468931945);
382
        }
383
384
        $isPageInQueue = false;
385
386
        $whereClause = 'page_id = ' . (integer)$uid;
387
388
        if (false !== $unprocessed_only) {
389
            $whereClause .= ' AND exec_time = 0';
390
        }
391
392
        if (false !== $timed_only) {
393
            $whereClause .= ' AND scheduled != 0';
394
        }
395
396
        if (false !== $timestamp) {
397
            $whereClause .= ' AND scheduled = ' . (integer)$timestamp;
398
        }
399
400
        $count = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows(
401
            '*',
402
            'tx_crawler_queue',
403
            $whereClause
404
        );
405
406
        if (false !== $count && $count > 0) {
407
            $isPageInQueue = true;
408
        }
409
410
        return $isPageInQueue;
411
    }
412
413
    /**
414
     * Method to check if a page is in the queue which is timed for a
415
     * date when it should be crawled
416
     *
417
     * @param int $uid uid of the page
418
     *
419
     * @return boolean
420
     *
421
     */
422
    public function isPageInQueueTimed($uid)
423
    {
424
        $uid = intval($uid);
425
        return $this->isPageInQueue($uid, true);
426
    }
427
428
    /**
429
     * This method is used to count all processes in the process table.
430
     *
431
     * @param  string $where Where clause
432
     *
433
     * @return integer
434
     */
435
    public function countAll($where = '1 = 1')
436
    {
437
        return $this->countByWhere($where);
438
    }
439
}
440