Completed
Push — issue/473 ( 4d9f89...4df17b )
by Tomas Norre
05:03
created

QueueRepository::findOldestEntryForProcess()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 2
CRAP Score 1.125

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 1
dl 0
loc 4
rs 10
c 0
b 0
f 0
ccs 2
cts 4
cp 0.5
crap 1.125
1
<?php
2
namespace AOE\Crawler\Domain\Repository;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2017 AOE GmbH <[email protected]>
8
 *
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 3 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
use AOE\Crawler\Domain\Model\Process;
29
use AOE\Crawler\Domain\Model\Queue;
30
use TYPO3\CMS\Core\Utility\MathUtility;
31
32
/**
33
 * Class QueueRepository
34
 *
35
 * @package AOE\Crawler\Domain\Repository
36
 */
37
class QueueRepository extends AbstractRepository
38
{
39
    /**
40
     * @var string
41
     */
42
    protected $tableName = 'tx_crawler_queue';
43
44
    /**
45
     * This method is used to find the youngest entry for a given process.
46
     *
47
     * @param Process $process
48
     *
49
     * @return Queue $entry
50
     */
51 1
    public function findYoungestEntryForProcess(Process $process)
52
    {
53 1
        return $this->getFirstOrLastObjectByProcess($process, 'exec_time ASC');
54
    }
55
56
    /**
57
     * This method is used to find the oldest entry for a given process.
58
     *
59
     * @param Process $process
60
     *
61
     * @return Queue
62
     */
63 1
    public function findOldestEntryForProcess(Process $process)
64
    {
65 1
        return $this->getFirstOrLastObjectByProcess($process, 'exec_time DESC');
66
    }
67
68
    /**
69
     * This internal helper method is used to create an instance of an entry object
70
     *
71
     * @param Process $process
72
     * @param string $orderby first matching item will be returned as object
73
     *
74
     * @return Queue
75
     */
76 5
    protected function getFirstOrLastObjectByProcess($process, $orderby)
77
    {
78 5
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
79 5
        $where = 'process_id_completed=' . $db->fullQuoteStr($process->getProcessId(), $this->tableName) .
80 5
                   ' AND exec_time > 0 ';
81 5
        $limit = 1;
82 5
        $groupby = '';
83
84 5
        $res = $db->exec_SELECTgetRows('*', 'tx_crawler_queue', $where, $groupby, $orderby, $limit);
85 5
        if ($res) {
86 4
            $first = $res[0];
87
        } else {
88 1
            $first = [];
89
        }
90 5
        $queueObject = new Queue();
91 5
        $queueObject->setQid($first['qid']);
92 5
        $queueObject->setPageId($first['page_id']);
93 5
        $queueObject->setParameters($first['parameters']);
94 5
        $queueObject->setScheduled($first['scheduled']);
95 5
        $queueObject->setExecTime($first['exec_time']);
96 5
        $queueObject->setSetId($first['set_id']);
97 5
        $queueObject->setResultData($first['result_data']);
98 5
        $queueObject->setProcessScheduled($first['process_scheduled']);
99 5
        $queueObject->setProcessId($first['process_id']);
100 5
        $queueObject->setProcessIdCompleted($first['process_id_completed']);
101 5
        $queueObject->setParametersHash($first['parameters_hash']);
102 5
        $queueObject->setConfigurationHash($first['configuration_hash']);
103 5
        $queueObject->setConfiguration($first['configuration']);
104
105 5
        return $queueObject;
106
    }
107
108
    /**
109
     * Counts all executed items of a process.
110
     *
111
     * @param Process $process
112
     *
113
     * @return int
114
     */
115 1
    public function countExecutedItemsByProcess($process)
116
    {
117 1
        return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = ' . $this->getDB()->fullQuoteStr(
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
118 1
            $process->getProcessId(),
119 1
            $this->tableName
120
        ));
121
    }
122
123
    /**
124
     * Counts items of a process which yet have not been processed/executed
125
     *
126
     * @param Process $process
127
     *
128
     * @return int
129
     */
130 1
    public function countNonExecutedItemsByProcess($process)
131
    {
132 1
        return $this->countItemsByWhereClause('exec_time = 0 AND process_id = ' . $this->getDB()->fullQuoteStr(
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
133 1
            $process->getProcessId(),
134 1
            $this->tableName
135
        ));
136
    }
137
138
    /**
139
     * Method to determine unprocessed Items in the crawler queue.
140
     *
141
     * @return array
142
     */
143 2
    public function getUnprocessedItems()
144
    {
145 2
        $rows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
146 2
            '*',
147 2
            'tx_crawler_queue',
148 2
            'exec_time = 0',
149 2
            '',
150 2
            'page_id, scheduled'
151
        );
152
153 2
        return $rows;
154
    }
155
156
    /**
157
     * Count items which have not been processed yet
158
     *
159
     * @return int
160
     */
161 2
    public function countUnprocessedItems()
162
    {
163 2
        return count($this->getUnprocessedItems());
164
    }
165
166
    /**
167
     * This method can be used to count all queue entrys which are
168
     * scheduled for now or a earlier date.
169
     *
170
     * @return int
171
     */
172 2
    public function countAllPendingItems()
173
    {
174 2
        return $this->countItemsByWhereClause('exec_time = 0 AND scheduled < ' . time());
175
    }
176
177
    /**
178
     * This method can be used to count all queue entrys which are
179
     * scheduled for now or a earlier date and are assigned to a process.
180
     *
181
     * @return int
182
     */
183 2
    public function countAllAssignedPendingItems()
184
    {
185 2
        return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id != ''");
186
    }
187
188
    /**
189
     * This method can be used to count all queue entrys which are
190
     * scheduled for now or a earlier date and are not assigned to a process.
191
     *
192
     * @return int
193
     */
194 1
    public function countAllUnassignedPendingItems()
195
    {
196 1
        return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id = ''");
197
    }
198
199
    /**
200
     * Internal method to count items by a given where clause
201
     *
202
     * @param string $where
203
     *
204
     * @return mixed
205
     */
206 8
    protected function countItemsByWhereClause($where)
207
    {
208 8
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
209 8
        $rs = $db->exec_SELECTquery('count(*) as anz', $this->tableName, $where);
210 8
        $res = $db->sql_fetch_assoc($rs);
211
212 8
        return $res['anz'];
213
    }
214
215
    /**
216
     * Count pending queue entries grouped by configuration key
217
     *
218
     * @return array
219
     */
220 1
    public function countPendingItemsGroupedByConfigurationKey()
221
    {
222 1
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
223 1
        $res = $db->exec_SELECTquery(
224 1
            "configuration, count(*) as unprocessed, sum(process_id != '') as assignedButUnprocessed",
225 1
            $this->tableName,
226 1
            'exec_time = 0 AND scheduled < ' . time(),
227 1
            'configuration'
228
        );
229 1
        $rows = [];
230 1
        while ($row = $db->sql_fetch_assoc($res)) {
231 1
            $rows[] = $row;
232
        }
233
234 1
        return $rows;
235
    }
236
237
    /**
238
     * Get set id with unprocessed entries
239
     *
240
     * @param void
241
     *
242
     * @return array array of set ids
243
     */
244 1
    public function getSetIdWithUnprocessedEntries()
245
    {
246 1
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
247 1
        $res = $db->exec_SELECTquery(
248 1
            'set_id',
249 1
            $this->tableName,
250 1
            'exec_time = 0 AND scheduled < ' . time(),
251 1
            'set_id'
252
        );
253 1
        $setIds = [];
254 1
        while ($row = $db->sql_fetch_assoc($res)) {
255 1
            $setIds[] = intval($row['set_id']);
256
        }
257
258 1
        return $setIds;
259
    }
260
261
    /**
262
     * Get total queue entries by configuration
263
     *
264
     * @param array $setIds
265
     *
266
     * @return array totals by configuration (keys)
267
     */
268 1
    public function getTotalQueueEntriesByConfiguration(array $setIds)
269
    {
270 1
        $totals = [];
271 1
        if (count($setIds) > 0) {
272 1
            $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
273 1
            $res = $db->exec_SELECTquery(
274 1
                'configuration, count(*) as c',
275 1
                $this->tableName,
276 1
                'set_id in (' . implode(',', $setIds) . ') AND scheduled < ' . time(),
277 1
                'configuration'
278
            );
279 1
            while ($row = $db->sql_fetch_assoc($res)) {
280 1
                $totals[$row['configuration']] = $row['c'];
281
            }
282
        }
283
284 1
        return $totals;
285
    }
286
287
    /**
288
     * Get the timestamps of the last processed entries
289
     *
290
     * @param int $limit
291
     *
292
     * @return array
293
     */
294 1
    public function getLastProcessedEntriesTimestamps($limit = 100)
295
    {
296 1
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
297 1
        $res = $db->exec_SELECTquery(
298 1
            'exec_time',
299 1
            $this->tableName,
300 1
            '',
301 1
            '',
302 1
            'exec_time desc',
303 1
            $limit
304
        );
305
306 1
        $rows = [];
307 1
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
308 1
            $rows[] = intval($row['exec_time']);
309
        }
310
311 1
        return $rows;
312
    }
313
314
    /**
315
     * Get the last processed entries
316
     *
317
     * @param string $selectFields
318
     * @param int $limit
319
     *
320
     * @return array
321
     */
322 1
    public function getLastProcessedEntries($selectFields = '*', $limit = 100)
323
    {
324 1
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
325 1
        $res = $db->exec_SELECTquery(
326 1
            $selectFields,
327 1
            $this->tableName,
328 1
            '',
329 1
            '',
330 1
            'exec_time desc',
331 1
            $limit
332
        );
333
334 1
        $rows = [];
335 1
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
336 1
            $rows[] = $row;
337
        }
338
339 1
        return $rows;
340
    }
341
342
    /**
343
     * Get performance statistics data
344
     *
345
     * @param int $start timestamp
346
     * @param int $end timestamp
347
     *
348
     * @return array performance data
349
     */
350 1
    public function getPerformanceData($start, $end)
351
    {
352 1
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
353 1
        $res = $db->exec_SELECTquery(
354 1
            'process_id_completed, min(exec_time) as start, max(exec_time) as end, count(*) as urlcount',
355 1
            $this->tableName,
356 1
            'exec_time != 0 and exec_time >= ' . intval($start) . ' and exec_time <= ' . intval($end),
357 1
            'process_id_completed'
358
        );
359
360 1
        $rows = [];
361 1
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
362 1
            $rows[$row['process_id_completed']] = $row;
363
        }
364
365 1
        return $rows;
366
    }
367
368
    /**
369
     * Determines if a page is queued
370
     *
371
     * @param $uid
372
     * @param bool $unprocessed_only
373
     * @param bool $timed_only
374
     * @param bool $timestamp
375
     *
376
     * @return bool
377
     */
378 7
    public function isPageInQueue($uid, $unprocessed_only = true, $timed_only = false, $timestamp = false)
379
    {
380 7
        if (!MathUtility::canBeInterpretedAsInteger($uid)) {
381 1
            throw new \InvalidArgumentException('Invalid parameter type', 1468931945);
382
        }
383
384 6
        $isPageInQueue = false;
385
386 6
        $whereClause = 'page_id = ' . (integer)$uid;
387
388 6
        if (false !== $unprocessed_only) {
389 3
            $whereClause .= ' AND exec_time = 0';
390
        }
391
392 6
        if (false !== $timed_only) {
393 1
            $whereClause .= ' AND scheduled != 0';
394
        }
395
396 6
        if (false !== $timestamp) {
397 1
            $whereClause .= ' AND scheduled = ' . (integer)$timestamp;
398
        }
399
400 6
        $count = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows(
401 6
            '*',
402 6
            'tx_crawler_queue',
403 6
            $whereClause
404
        );
405
406 6
        if (false !== $count && $count > 0) {
407 5
            $isPageInQueue = true;
408
        }
409
410 6
        return $isPageInQueue;
411
    }
412
413
    /**
414
     * Method to check if a page is in the queue which is timed for a
415
     * date when it should be crawled
416
     *
417
     * @param int $uid uid of the page
418
     *
419
     * @return boolean
420
     *
421
     */
422 1
    public function isPageInQueueTimed($uid)
423
    {
424 1
        $uid = intval($uid);
425 1
        return $this->isPageInQueue($uid, true);
426
    }
427
428
    /**
429
     * This method is used to count all processes in the process table.
430
     *
431
     * @param  string $where Where clause
432
     *
433
     * @return integer
434
     */
435 8
    public function countAll($where = '1 = 1')
436
    {
437 8
        return $this->countByWhere($where);
438
    }
439
}
440