Completed
Push — master ( 6a3171...e461ad )
by Tomas Norre
07:14
created

QueueRepository::isPageInQueueTimed()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 3
CRAP Score 1.064

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 1
dl 0
loc 5
ccs 3
cts 5
cp 0.6
crap 1.064
rs 10
c 0
b 0
f 0
1
<?php
2
namespace AOE\Crawler\Domain\Repository;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2017 AOE GmbH <[email protected]>
8
 *
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 3 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
use AOE\Crawler\Domain\Model\Process;
29
use AOE\Crawler\Domain\Model\Queue;
30
use TYPO3\CMS\Core\Utility\MathUtility;
31
32
/**
33
 * Class QueueRepository
34
 *
35
 * @package AOE\Crawler\Domain\Repository
36
 */
37
class QueueRepository extends AbstractRepository
38
{
39
    /**
40
     * @var string
41
     */
42
    protected $tableName = 'tx_crawler_queue';
43
44
    /**
45
     * This method is used to find the youngest entry for a given process.
46
     *
47
     * @param Process $process
48
     *
49
     * @return Queue $entry
50
     */
51 1
    public function findYoungestEntryForProcess(Process $process)
52
    {
53 1
        return $this->getFirstOrLastObjectByProcess($process, 'exec_time ASC');
54
    }
55
56
    /**
57
     * This method is used to find the oldest entry for a given process.
58
     *
59
     * @param Process $process
60
     *
61
     * @return Queue
62
     */
63 1
    public function findOldestEntryForProcess(Process $process)
64
    {
65 1
        return $this->getFirstOrLastObjectByProcess($process, 'exec_time DESC');
66
    }
67
68
    /**
69
     * This internal helper method is used to create an instance of an entry object
70
     *
71
     * @param Process $process
72
     * @param string $orderby first matching item will be returned as object
73
     *
74
     * @return Queue
75
     */
76 5
    protected function getFirstOrLastObjectByProcess($process, $orderby)
77
    {
78 5
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
79 5
        $where = 'process_id_completed=' . $db->fullQuoteStr($process->getProcessId(), $this->tableName) .
80 5
                   ' AND exec_time > 0 ';
81 5
        $limit = 1;
82 5
        $groupby = '';
83
84 5
        $res = $db->exec_SELECTgetRows('*', 'tx_crawler_queue', $where, $groupby, $orderby, $limit);
85 5
        if ($res) {
86 4
            $first = $res[0];
87
        } else {
88 1
            $first = [];
89
        }
90 5
        $resultObject = new Queue($first);
91
92 5
        return $resultObject;
93
    }
94
95
    /**
96
     * Counts all executed items of a process.
97
     *
98
     * @param Process $process
99
     *
100
     * @return int
101
     */
102 1
    public function countExecutedItemsByProcess($process)
103
    {
104 1
        return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = ' . $this->getDB()->fullQuoteStr(
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
105 1
            $process->getProcessId(),
106 1
                $this->tableName
107
        ));
108
    }
109
110
    /**
111
     * Counts items of a process which yet have not been processed/executed
112
     *
113
     * @param Process $process
114
     *
115
     * @return int
116
     */
117 1
    public function countNonExecutedItemsByProcess($process)
118
    {
119 1
        return $this->countItemsByWhereClause('exec_time = 0 AND process_id = ' . $this->getDB()->fullQuoteStr(
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
120 1
            $process->getProcessId(),
121 1
                $this->tableName
122
        ));
123
    }
124
125
    /**
126
     * Method to determine unprocessed Items in the crawler queue.
127
     *
128
     * @return array
129
     */
130 2
    public function getUnprocessedItems()
131
    {
132 2
        $rows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
133 2
            '*',
134 2
            'tx_crawler_queue',
135 2
            'exec_time = 0',
136 2
            '',
137 2
            'page_id, scheduled'
138
        );
139
140 2
        return $rows;
141
    }
142
143
    /**
144
     * Count items which have not been processed yet
145
     *
146
     * @return int
147
     */
148 2
    public function countUnprocessedItems()
149
    {
150 2
        return count($this->getUnprocessedItems());
151
    }
152
153
    /**
154
     * This method can be used to count all queue entrys which are
155
     * scheduled for now or a earlier date.
156
     *
157
     * @return int
158
     */
159 2
    public function countAllPendingItems()
160
    {
161 2
        return $this->countItemsByWhereClause('exec_time = 0 AND scheduled < ' . time());
162
    }
163
164
    /**
165
     * This method can be used to count all queue entrys which are
166
     * scheduled for now or a earlier date and are assigned to a process.
167
     *
168
     * @return int
169
     */
170 2
    public function countAllAssignedPendingItems()
171
    {
172 2
        return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id != ''");
173
    }
174
175
    /**
176
     * This method can be used to count all queue entrys which are
177
     * scheduled for now or a earlier date and are not assigned to a process.
178
     *
179
     * @return int
180
     */
181 1
    public function countAllUnassignedPendingItems()
182
    {
183 1
        return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id = ''");
184
    }
185
186
    /**
187
     * Internal method to count items by a given where clause
188
     *
189
     * @param string $where
190
     *
191
     * @return mixed
192
     */
193 8
    protected function countItemsByWhereClause($where)
194
    {
195 8
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
196 8
        $rs = $db->exec_SELECTquery('count(*) as anz', $this->tableName, $where);
197 8
        $res = $db->sql_fetch_assoc($rs);
198
199 8
        return $res['anz'];
200
    }
201
202
    /**
203
     * Count pending queue entries grouped by configuration key
204
     *
205
     * @return array
206
     */
207 1
    public function countPendingItemsGroupedByConfigurationKey()
208
    {
209 1
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
210 1
        $res = $db->exec_SELECTquery(
211 1
            "configuration, count(*) as unprocessed, sum(process_id != '') as assignedButUnprocessed",
212 1
            $this->tableName,
213 1
            'exec_time = 0 AND scheduled < ' . time(),
214 1
            'configuration'
215
        );
216 1
        $rows = [];
217 1
        while ($row = $db->sql_fetch_assoc($res)) {
218 1
            $rows[] = $row;
219
        }
220
221 1
        return $rows;
222
    }
223
224
    /**
225
     * Get set id with unprocessed entries
226
     *
227
     * @param void
228
     *
229
     * @return array array of set ids
230
     */
231 1
    public function getSetIdWithUnprocessedEntries()
232
    {
233 1
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
234 1
        $res = $db->exec_SELECTquery(
235 1
            'set_id',
236 1
            $this->tableName,
237 1
            'exec_time = 0 AND scheduled < ' . time(),
238 1
            'set_id'
239
        );
240 1
        $setIds = [];
241 1
        while ($row = $db->sql_fetch_assoc($res)) {
242 1
            $setIds[] = intval($row['set_id']);
243
        }
244
245 1
        return $setIds;
246
    }
247
248
    /**
249
     * Get total queue entries by configuration
250
     *
251
     * @param array $setIds
252
     *
253
     * @return array totals by configuration (keys)
254
     */
255 1
    public function getTotalQueueEntriesByConfiguration(array $setIds)
256
    {
257 1
        $totals = [];
258 1
        if (count($setIds) > 0) {
259 1
            $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
260 1
            $res = $db->exec_SELECTquery(
261 1
                'configuration, count(*) as c',
262 1
                $this->tableName,
263 1
                'set_id in (' . implode(',', $setIds) . ') AND scheduled < ' . time(),
264 1
                'configuration'
265
            );
266 1
            while ($row = $db->sql_fetch_assoc($res)) {
267 1
                $totals[$row['configuration']] = $row['c'];
268
            }
269
        }
270
271 1
        return $totals;
272
    }
273
274
    /**
275
     * Get the timestamps of the last processed entries
276
     *
277
     * @param int $limit
278
     *
279
     * @return array
280
     */
281 1
    public function getLastProcessedEntriesTimestamps($limit = 100)
282
    {
283 1
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
284 1
        $res = $db->exec_SELECTquery(
285 1
            'exec_time',
286 1
            $this->tableName,
287 1
            '',
288 1
            '',
289 1
            'exec_time desc',
290 1
            $limit
291
        );
292
293 1
        $rows = [];
294 1
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
295 1
            $rows[] = intval($row['exec_time']);
296
        }
297
298 1
        return $rows;
299
    }
300
301
    /**
302
     * Get the last processed entries
303
     *
304
     * @param string $selectFields
305
     * @param int $limit
306
     *
307
     * @return array
308
     */
309 1
    public function getLastProcessedEntries($selectFields = '*', $limit = 100)
310
    {
311 1
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
312 1
        $res = $db->exec_SELECTquery(
313 1
            $selectFields,
314 1
            $this->tableName,
315 1
            '',
316 1
            '',
317 1
            'exec_time desc',
318 1
            $limit
319
        );
320
321 1
        $rows = [];
322 1
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
323 1
            $rows[] = $row;
324
        }
325
326 1
        return $rows;
327
    }
328
329
    /**
330
     * Get performance statistics data
331
     *
332
     * @param int $start timestamp
333
     * @param int $end timestamp
334
     *
335
     * @return array performance data
336
     */
337 1
    public function getPerformanceData($start, $end)
338
    {
339 1
        $db = $this->getDB();
0 ignored issues
show
Deprecated Code introduced by
The method AOE\Crawler\Domain\Repos...ractRepository::getDB() has been deprecated with message: since crawler v6.5.1, will be removed in crawler v9.0.0.

This method has been deprecated. The supplier of the class has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the method will be removed from the class and what other method or class to use instead.

Loading history...
340 1
        $res = $db->exec_SELECTquery(
341 1
            'process_id_completed, min(exec_time) as start, max(exec_time) as end, count(*) as urlcount',
342 1
            $this->tableName,
343 1
            'exec_time != 0 and exec_time >= ' . intval($start) . ' and exec_time <= ' . intval($end),
344 1
            'process_id_completed'
345
        );
346
347 1
        $rows = [];
348 1
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
349 1
            $rows[$row['process_id_completed']] = $row;
350
        }
351
352 1
        return $rows;
353
    }
354
355
    /**
356
     * Determines if a page is queued
357
     *
358
     * @param $uid
359
     * @param bool $unprocessed_only
360
     * @param bool $timed_only
361
     * @param bool $timestamp
362
     *
363
     * @return bool
364
     */
365 7
    public function isPageInQueue($uid, $unprocessed_only = true, $timed_only = false, $timestamp = false)
366
    {
367 7
        if (!MathUtility::canBeInterpretedAsInteger($uid)) {
368 1
            throw new \InvalidArgumentException('Invalid parameter type', 1468931945);
369
        }
370
371 6
        $isPageInQueue = false;
372
373 6
        $whereClause = 'page_id = ' . (integer)$uid;
374
375 6
        if (false !== $unprocessed_only) {
376 3
            $whereClause .= ' AND exec_time = 0';
377
        }
378
379 6
        if (false !== $timed_only) {
380 1
            $whereClause .= ' AND scheduled != 0';
381
        }
382
383 6
        if (false !== $timestamp) {
384 1
            $whereClause .= ' AND scheduled = ' . (integer)$timestamp;
385
        }
386
387 6
        $count = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows(
388 6
            '*',
389 6
            'tx_crawler_queue',
390 6
            $whereClause
391
        );
392
393 6
        if (false !== $count && $count > 0) {
394 5
            $isPageInQueue = true;
395
        }
396
397 6
        return $isPageInQueue;
398
    }
399
400
    /**
401
     * Method to check if a page is in the queue which is timed for a
402
     * date when it should be crawled
403
     *
404
     * @param int $uid uid of the page
405
     *
406
     * @return boolean
407
     *
408
     */
409 1
    public function isPageInQueueTimed($uid)
410
    {
411 1
        $uid = intval($uid);
412 1
        return $this->isPageInQueue($uid, true);
413
    }
414
415
    /**
416
     * This method is used to count all processes in the process table.
417
     *
418
     * @param  string $where Where clause
419
     *
420
     * @return integer
421
     */
422 8
    public function countAll($where = '1 = 1')
423
    {
424 8
        return $this->countByWhere($where);
425
    }
426
}
427