Completed
Push — issue/355 ( 6f96f2...e2dfcd )
by Tomas Norre
40:06 queued 25:06
created

QueueRepository::isPageInQueue()   B

Complexity

Conditions 7
Paths 17

Size

Total Lines 34

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 56

Importance

Changes 0
Metric Value
cc 7
nc 17
nop 4
dl 0
loc 34
ccs 0
cts 0
cp 0
crap 56
rs 8.4426
c 0
b 0
f 0
1
<?php
2
namespace AOE\Crawler\Domain\Repository;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2017 AOE GmbH <[email protected]>
8
 *
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 3 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
use AOE\Crawler\Domain\Model\Process;
29
use AOE\Crawler\Domain\Model\Queue;
30
use TYPO3\CMS\Core\Utility\MathUtility;
31
32
/**
33
 * Class QueueRepository
34
 *
35
 * @package AOE\Crawler\Domain\Repository
36
 */
37
class QueueRepository extends AbstractRepository
38
{
39
    /**
40
     * @var string
41
     */
42
    protected $tableName = 'tx_crawler_queue';
43
44
    /**
45
     * This method is used to find the youngest entry for a given process.
46
     *
47
     * @param Process $process
48
     *
49
     * @return Queue $entry
50 1
     */
51
    public function findYoungestEntryForProcess(Process $process)
52 1
    {
53
        return $this->getFirstOrLastObjectByProcess($process, 'exec_time ASC');
54
    }
55
56
    /**
57
     * This method is used to find the oldest entry for a given process.
58
     *
59
     * @param Process $process
60
     *
61
     * @return Queue
62 1
     */
63
    public function findOldestEntryForProcess(Process $process)
64 1
    {
65
        return $this->getFirstOrLastObjectByProcess($process, 'exec_time DESC');
66
    }
67
68
    /**
69
     * This internal helper method is used to create an instance of an entry object
70
     *
71
     * @param Process $process
72
     * @param string $orderby first matching item will be returned as object
73
     *
74
     * @return Queue
75 5
     */
76
    protected function getFirstOrLastObjectByProcess($process, $orderby)
77 5
    {
78 5
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
79 5
        $where = 'process_id_completed=' . $db->fullQuoteStr($process->getProcessId(), $this->tableName) .
80 5
                   ' AND exec_time > 0 ';
81 5
        $limit = 1;
82
        $groupby = '';
83 5
84 5
        $res = $db->exec_SELECTgetRows('*', 'tx_crawler_queue', $where, $groupby, $orderby, $limit);
85 4
        if ($res) {
86
            $first = $res[0];
87 1
        } else {
88
            $first = [];
89 5
        }
90
        $resultObject = new Queue($first);
91 5
92
        return $resultObject;
93
    }
94
95
    /**
96
     * Counts all executed items of a process.
97
     *
98
     * @param Process $process
99
     *
100
     * @return int
101 1
     */
102
    public function countExecutedItemsByProcess($process)
103 1
    {
104 1
        return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = ' . $this->getDB()->fullQuoteStr(
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
105 1
            $process->getProcessId(),
106
                $this->tableName
107
        ));
108
    }
109
110
    /**
111
     * Counts items of a process which yet have not been processed/executed
112
     *
113
     * @param Process $process
114
     *
115
     * @return int
116 1
     */
117
    public function countNonExecutedItemsByProcess($process)
118 1
    {
119 1
        return $this->countItemsByWhereClause('exec_time = 0 AND process_id = ' . $this->getDB()->fullQuoteStr(
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
120 1
            $process->getProcessId(),
121
                $this->tableName
122
        ));
123
    }
124
125
    /**
126
     * Method to determine unprocessed Items in the crawler queue.
127
     *
128
     * @return array
129 1
     */
130
    public function getUnprocessedItems()
131 1
    {
132
        $rows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
133
            '*',
134
            'tx_crawler_queue',
135
            'exec_time = 0',
136
            '',
137
            'page_id, scheduled'
138
        );
139
140 2
        return $rows;
141
    }
142 2
143
    /**
144
     * Count items which have not been processed yet
145
     *
146
     * @return int
147
     */
148
    public function countUnprocessedItems()
149
    {
150
        return count($this->getUnprocessedItems());
151 2
    }
152
153 2
    /**
154
     * This method can be used to count all queue entrys which are
155
     * scheduled for now or a earlier date.
156
     *
157
     * @return int
158
     */
159
    public function countAllPendingItems()
160
    {
161
        return $this->countItemsByWhereClause('exec_time = 0 AND scheduled < ' . time());
162 1
    }
163
164 1
    /**
165
     * This method can be used to count all queue entrys which are
166
     * scheduled for now or a earlier date and are assigned to a process.
167
     *
168
     * @return int
169
     */
170
    public function countAllAssignedPendingItems()
171
    {
172
        return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id != ''");
173
    }
174 9
175
    /**
176 9
     * This method can be used to count all queue entrys which are
177 9
     * scheduled for now or a earlier date and are not assigned to a process.
178 9
     *
179
     * @return int
180 9
     */
181
    public function countAllUnassignedPendingItems()
182
    {
183
        return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id = ''");
184
    }
185
186
    /**
187
     * Internal method to count items by a given where clause
188 1
     *
189
     * @param string $where
190 1
     *
191 1
     * @return mixed
192 1
     */
193 1
    protected function countItemsByWhereClause($where)
194 1
    {
195 1
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
196
        $rs = $db->exec_SELECTquery('count(*) as anz', $this->tableName, $where);
197 1
        $res = $db->sql_fetch_assoc($rs);
198 1
199 1
        return $res['anz'];
200
    }
201
202 1
    /**
203
     * Count pending queue entries grouped by configuration key
204
     *
205
     * @return array
206
     */
207
    public function countPendingItemsGroupedByConfigurationKey()
208
    {
209
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
210
        $res = $db->exec_SELECTquery(
211
            "configuration, count(*) as unprocessed, sum(process_id != '') as assignedButUnprocessed",
212 1
            $this->tableName,
213
            'exec_time = 0 AND scheduled < ' . time(),
214 1
            'configuration'
215 1
        );
216 1
        $rows = [];
217 1
        while ($row = $db->sql_fetch_assoc($res)) {
218 1
            $rows[] = $row;
219 1
        }
220
221 1
        return $rows;
222 1
    }
223 1
224
    /**
225
     * Get set id with unprocessed entries
226 1
     *
227
     * @param void
228
     *
229
     * @return array array of set ids
230
     */
231
    public function getSetIdWithUnprocessedEntries()
232
    {
233
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
234
        $res = $db->exec_SELECTquery(
235
            'set_id',
236 1
            $this->tableName,
237
            'exec_time = 0 AND scheduled < ' . time(),
238 1
            'set_id'
239 1
        );
240 1
        $setIds = [];
241 1
        while ($row = $db->sql_fetch_assoc($res)) {
242 1
            $setIds[] = intval($row['set_id']);
243 1
        }
244 1
245 1
        return $setIds;
246
    }
247 1
248 1
    /**
249
     * Get total queue entries by configuration
250
     *
251
     * @param array $setIds
252 1
     *
253
     * @return array totals by configuration (keys)
254
     */
255
    public function getTotalQueueEntriesByConfiguration(array $setIds)
256
    {
257
        $totals = [];
258
        if (count($setIds) > 0) {
259
            $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
260
            $res = $db->exec_SELECTquery(
261
                'configuration, count(*) as c',
262 1
                $this->tableName,
263
                'set_id in (' . implode(',', $setIds) . ') AND scheduled < ' . time(),
264 1
                'configuration'
265 1
            );
266 1
            while ($row = $db->sql_fetch_assoc($res)) {
267 1
                $totals[$row['configuration']] = $row['c'];
268 1
            }
269 1
        }
270 1
271 1
        return $totals;
272
    }
273
274 1
    /**
275 1
     * Get the timestamps of the last processed entries
276 1
     *
277
     * @param int $limit
278
     *
279 1
     * @return array
280
     */
281
    public function getLastProcessedEntriesTimestamps($limit = 100)
282
    {
283
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
284
        $res = $db->exec_SELECTquery(
285
            'exec_time',
286
            $this->tableName,
287
            '',
288
            '',
289
            'exec_time desc',
290 1
            $limit
291
        );
292 1
293 1
        $rows = [];
294 1
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
295 1
            $rows[] = intval($row['exec_time']);
296 1
        }
297 1
298 1
        return $rows;
299 1
    }
300
301
    /**
302 1
     * Get the last processed entries
303 1
     *
304 1
     * @param string $selectFields
305
     * @param int $limit
306
     *
307 1
     * @return array
308
     */
309
    public function getLastProcessedEntries($selectFields = '*', $limit = 100)
310
    {
311
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
312
        $res = $db->exec_SELECTquery(
313
            $selectFields,
314
            $this->tableName,
315
            '',
316
            '',
317
            'exec_time desc',
318 1
            $limit
319
        );
320 1
321 1
        $rows = [];
322 1
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
323 1
            $rows[] = $row;
324 1
        }
325 1
326
        return $rows;
327
    }
328 1
329 1
    /**
330 1
     * Get performance statistics data
331
     *
332
     * @param int $start timestamp
333 1
     * @param int $end timestamp
334
     *
335
     * @return array performance data
336
     */
337
    public function getPerformanceData($start, $end)
338
    {
339
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
340
        $res = $db->exec_SELECTquery(
341
            'process_id_completed, min(exec_time) as start, max(exec_time) as end, count(*) as urlcount',
342
            $this->tableName,
343 8
            'exec_time != 0 and exec_time >= ' . intval($start) . ' and exec_time <= ' . intval($end),
344
            'process_id_completed'
345 8
        );
346
347
        $rows = [];
348
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
349
            $rows[$row['process_id_completed']] = $row;
350
        }
351
352
        return $rows;
353
    }
354
355
    /**
356
     * Determines if a page is queued
357
     *
358
     * @param $uid
359
     * @param bool $unprocessed_only
360
     * @param bool $timed_only
361
     * @param bool $timestamp
362
     *
363
     * @return bool
364
     */
365
    public function isPageInQueue($uid, $unprocessed_only = true, $timed_only = false, $timestamp = false)
366
    {
367
        if (!MathUtility::canBeInterpretedAsInteger($uid)) {
368
            throw new \InvalidArgumentException('Invalid parameter type', 1468931945);
369
        }
370
371
        $isPageInQueue = false;
372
373
        $whereClause = 'page_id = ' . (integer)$uid;
374
375
        if (false !== $unprocessed_only) {
376
            $whereClause .= ' AND exec_time = 0';
377
        }
378
379
        if (false !== $timed_only) {
380
            $whereClause .= ' AND scheduled != 0';
381
        }
382
383
        if (false !== $timestamp) {
384
            $whereClause .= ' AND scheduled = ' . (integer)$timestamp;
385
        }
386
387
        $count = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows(
388
            '*',
389
            'tx_crawler_queue',
390
            $whereClause
391
        );
392
393
        if (false !== $count && $count > 0) {
394
            $isPageInQueue = true;
395
        }
396
397
        return $isPageInQueue;
398
    }
399
400
    /**
401
     * Method to check if a page is in the queue which is timed for a
402
     * date when it should be crawled
403
     *
404
     * @param int $uid uid of the page
405
     *
406
     * @return boolean
407
     *
408
     */
409
    public function isPageInQueueTimed($uid)
410
    {
411
        $uid = intval($uid);
412
        return $this->isPageInQueue($uid, true);
413
    }
414
415
    /**
416
     * This method is used to count all processes in the process table.
417
     *
418
     * @param  string $where Where clause
419
     *
420
     * @return integer
421
     */
422
    public function countAll($where = '1 = 1')
423
    {
424
        return $this->countByWhere($where);
425
    }
426
}
427