Completed
Push — deprecate-functions ( 6a3171...00abd0 )
by Tomas Norre
05:55
created

QueueRepository::isPageInQueue()   B

Complexity

Conditions 7
Paths 17

Size

Total Lines 34

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 18
CRAP Score 8.4275

Importance

Changes 0
Metric Value
cc 7
nc 17
nop 4
dl 0
loc 34
ccs 18
cts 26
cp 0.6923
crap 8.4275
rs 8.4426
c 0
b 0
f 0
1
<?php
2
namespace AOE\Crawler\Domain\Repository;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2017 AOE GmbH <[email protected]>
8
 *
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 3 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
use AOE\Crawler\Domain\Model\Process;
29
use AOE\Crawler\Domain\Model\Queue;
30
use TYPO3\CMS\Core\Utility\MathUtility;
31
32
/**
33
 * Class QueueRepository
34
 *
35
 * @package AOE\Crawler\Domain\Repository
36
 */
37
class QueueRepository extends AbstractRepository
38
{
39
    /**
40
     * @var string
41
     */
42
    protected $tableName = 'tx_crawler_queue';
43
44
    /**
45
     * This method is used to find the youngest entry for a given process.
46
     *
47
     * @param Process $process
48
     *
49
     * @return Queue $entry
50
     */
51 1
    public function findYoungestEntryForProcess(Process $process)
52
    {
53 1
        return $this->getFirstOrLastObjectByProcess($process, 'exec_time ASC');
54
    }
55
56
    /**
57
     * This method is used to find the oldest entry for a given process.
58
     *
59
     * @param Process $process
60
     *
61
     * @return Queue
62
     */
63 1
    public function findOldestEntryForProcess(Process $process)
64
    {
65 1
        return $this->getFirstOrLastObjectByProcess($process, 'exec_time DESC');
66
    }
67
68
    /**
69
     * This internal helper method is used to create an instance of an entry object
70
     *
71
     * @param Process $process
72
     * @param string $orderby first matching item will be returned as object
73
     *
74
     * @return Queue
75
     */
76 5
    protected function getFirstOrLastObjectByProcess($process, $orderby)
77
    {
78 5
        $db = $this->getDB();
79 5
        $where = 'process_id_completed=' . $db->fullQuoteStr($process->getProcessId(), $this->tableName) .
80 5
                   ' AND exec_time > 0 ';
81 5
        $limit = 1;
82 5
        $groupby = '';
83
84 5
        $res = $db->exec_SELECTgetRows('*', 'tx_crawler_queue', $where, $groupby, $orderby, $limit);
85 5
        if ($res) {
86 4
            $first = $res[0];
87
        } else {
88 1
            $first = [];
89
        }
90 5
        $resultObject = new Queue($first);
91
92 5
        return $resultObject;
93
    }
94
95
    /**
96
     * Counts all executed items of a process.
97
     *
98
     * @param Process $process
99
     *
100
     * @return int
101
     */
102 1
    public function countExecutedItemsByProcess($process)
103
    {
104 1
        return $this->countItemsByWhereClause('exec_time > 0 AND process_id_completed = ' . $this->getDB()->fullQuoteStr(
105 1
            $process->getProcessId(),
106 1
                $this->tableName
107
        ));
108
    }
109
110
    /**
111
     * Counts items of a process which yet have not been processed/executed
112
     *
113
     * @param Process $process
114
     *
115
     * @return int
116
     */
117 1
    public function countNonExecutedItemsByProcess($process)
118
    {
119 1
        return $this->countItemsByWhereClause('exec_time = 0 AND process_id = ' . $this->getDB()->fullQuoteStr(
120 1
            $process->getProcessId(),
121 1
                $this->tableName
122
        ));
123
    }
124
125
    /**
126
     * Method to determine unprocessed Items in the crawler queue.
127
     *
128
     * @return array
129
     */
130 2
    public function getUnprocessedItems()
131
    {
132 2
        $rows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows(
133 2
            '*',
134 2
            'tx_crawler_queue',
135 2
            'exec_time = 0',
136 2
            '',
137 2
            'page_id, scheduled'
138
        );
139
140 2
        return $rows;
141
    }
142
143
    /**
144
     * Count items which have not been processed yet
145
     *
146
     * @return int
147
     */
148 2
    public function countUnprocessedItems()
149
    {
150 2
        return count($this->getUnprocessedItems());
151
    }
152
153
    /**
154
     * This method can be used to count all queue entrys which are
155
     * scheduled for now or a earlier date.
156
     *
157
     * @return int
158
     */
159 2
    public function countAllPendingItems()
160
    {
161 2
        return $this->countItemsByWhereClause('exec_time = 0 AND scheduled < ' . time());
162
    }
163
164
    /**
165
     * This method can be used to count all queue entrys which are
166
     * scheduled for now or a earlier date and are assigned to a process.
167
     *
168
     * @return int
169
     */
170 2
    public function countAllAssignedPendingItems()
171
    {
172 2
        return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id != ''");
173
    }
174
175
    /**
176
     * This method can be used to count all queue entrys which are
177
     * scheduled for now or a earlier date and are not assigned to a process.
178
     *
179
     * @return int
180
     */
181 1
    public function countAllUnassignedPendingItems()
182
    {
183 1
        return $this->countItemsByWhereClause("exec_time = 0 AND scheduled < " . time() . " AND process_id = ''");
184
    }
185
186
    /**
187
     * Internal method to count items by a given where clause
188
     *
189
     * @param string $where
190
     *
191
     * @return mixed
192
     */
193 8
    protected function countItemsByWhereClause($where)
194
    {
195 8
        $db = $this->getDB();
196 8
        $rs = $db->exec_SELECTquery('count(*) as anz', $this->tableName, $where);
197 8
        $res = $db->sql_fetch_assoc($rs);
198
199 8
        return $res['anz'];
200
    }
201
202
    /**
203
     * Count pending queue entries grouped by configuration key
204
     *
205
     * @return array
206
     */
207 1
    public function countPendingItemsGroupedByConfigurationKey()
208
    {
209 1
        $db = $this->getDB();
210 1
        $res = $db->exec_SELECTquery(
211 1
            "configuration, count(*) as unprocessed, sum(process_id != '') as assignedButUnprocessed",
212 1
            $this->tableName,
213 1
            'exec_time = 0 AND scheduled < ' . time(),
214 1
            'configuration'
215
        );
216 1
        $rows = [];
217 1
        while ($row = $db->sql_fetch_assoc($res)) {
218 1
            $rows[] = $row;
219
        }
220
221 1
        return $rows;
222
    }
223
224
    /**
225
     * Get set id with unprocessed entries
226
     *
227
     * @param void
228
     *
229
     * @return array array of set ids
230
     */
231 1
    public function getSetIdWithUnprocessedEntries()
232
    {
233 1
        $db = $this->getDB();
234 1
        $res = $db->exec_SELECTquery(
235 1
            'set_id',
236 1
            $this->tableName,
237 1
            'exec_time = 0 AND scheduled < ' . time(),
238 1
            'set_id'
239
        );
240 1
        $setIds = [];
241 1
        while ($row = $db->sql_fetch_assoc($res)) {
242 1
            $setIds[] = intval($row['set_id']);
243
        }
244
245 1
        return $setIds;
246
    }
247
248
    /**
249
     * Get total queue entries by configuration
250
     *
251
     * @param array $setIds
252
     *
253
     * @return array totals by configuration (keys)
254
     */
255 1
    public function getTotalQueueEntriesByConfiguration(array $setIds)
256
    {
257 1
        $totals = [];
258 1
        if (count($setIds) > 0) {
259 1
            $db = $this->getDB();
260 1
            $res = $db->exec_SELECTquery(
261 1
                'configuration, count(*) as c',
262 1
                $this->tableName,
263 1
                'set_id in (' . implode(',', $setIds) . ') AND scheduled < ' . time(),
264 1
                'configuration'
265
            );
266 1
            while ($row = $db->sql_fetch_assoc($res)) {
267 1
                $totals[$row['configuration']] = $row['c'];
268
            }
269
        }
270
271 1
        return $totals;
272
    }
273
274
    /**
275
     * Get the timestamps of the last processed entries
276
     *
277
     * @param int $limit
278
     *
279
     * @return array
280
     */
281 1
    public function getLastProcessedEntriesTimestamps($limit = 100)
282
    {
283 1
        $db = $this->getDB();
284 1
        $res = $db->exec_SELECTquery(
285 1
            'exec_time',
286 1
            $this->tableName,
287 1
            '',
288 1
            '',
289 1
            'exec_time desc',
290 1
            $limit
291
        );
292
293 1
        $rows = [];
294 1
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
295 1
            $rows[] = intval($row['exec_time']);
296
        }
297
298 1
        return $rows;
299
    }
300
301
    /**
302
     * Get the last processed entries
303
     *
304
     * @param string $selectFields
305
     * @param int $limit
306
     *
307
     * @return array
308
     */
309 1
    public function getLastProcessedEntries($selectFields = '*', $limit = 100)
310
    {
311 1
        $db = $this->getDB();
312 1
        $res = $db->exec_SELECTquery(
313 1
            $selectFields,
314 1
            $this->tableName,
315 1
            '',
316 1
            '',
317 1
            'exec_time desc',
318 1
            $limit
319
        );
320
321 1
        $rows = [];
322 1
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
323 1
            $rows[] = $row;
324
        }
325
326 1
        return $rows;
327
    }
328
329
    /**
330
     * Get performance statistics data
331
     *
332
     * @param int $start timestamp
333
     * @param int $end timestamp
334
     *
335
     * @return array performance data
336
     */
337 1
    public function getPerformanceData($start, $end)
338
    {
339 1
        $db = $this->getDB();
340 1
        $res = $db->exec_SELECTquery(
341 1
            'process_id_completed, min(exec_time) as start, max(exec_time) as end, count(*) as urlcount',
342 1
            $this->tableName,
343 1
            'exec_time != 0 and exec_time >= ' . intval($start) . ' and exec_time <= ' . intval($end),
344 1
            'process_id_completed'
345
        );
346
347 1
        $rows = [];
348 1
        while (($row = $db->sql_fetch_assoc($res)) !== false) {
349 1
            $rows[$row['process_id_completed']] = $row;
350
        }
351
352 1
        return $rows;
353
    }
354
355
    /**
356
     * Determines if a page is queued
357
     *
358
     * @param $uid
359
     * @param bool $unprocessed_only
360
     * @param bool $timed_only
361
     * @param bool $timestamp
362
     *
363
     * @return bool
364
     */
365 7
    public function isPageInQueue($uid, $unprocessed_only = true, $timed_only = false, $timestamp = false)
366
    {
367 7
        if (!MathUtility::canBeInterpretedAsInteger($uid)) {
368 1
            throw new \InvalidArgumentException('Invalid parameter type', 1468931945);
369
        }
370
371 6
        $isPageInQueue = false;
372
373 6
        $whereClause = 'page_id = ' . (integer)$uid;
374
375 6
        if (false !== $unprocessed_only) {
376 3
            $whereClause .= ' AND exec_time = 0';
377
        }
378
379 6
        if (false !== $timed_only) {
380 1
            $whereClause .= ' AND scheduled != 0';
381
        }
382
383 6
        if (false !== $timestamp) {
384 1
            $whereClause .= ' AND scheduled = ' . (integer)$timestamp;
385
        }
386
387 6
        $count = $GLOBALS['TYPO3_DB']->exec_SELECTcountRows(
388 6
            '*',
389 6
            'tx_crawler_queue',
390 6
            $whereClause
391
        );
392
393 6
        if (false !== $count && $count > 0) {
394 5
            $isPageInQueue = true;
395
        }
396
397 6
        return $isPageInQueue;
398
    }
399
400
    /**
401
     * Method to check if a page is in the queue which is timed for a
402
     * date when it should be crawled
403
     *
404
     * @param int $uid uid of the page
405
     *
406
     * @return boolean
407
     *
408
     */
409 1
    public function isPageInQueueTimed($uid)
410
    {
411 1
        $uid = intval($uid);
412 1
        return $this->isPageInQueue($uid, true);
413
    }
414
415
    /**
416
     * This method is used to count all processes in the process table.
417
     *
418
     * @param  string $where Where clause
419
     *
420
     * @return integer
421
     */
422 8
    public function countAll($where = '1 = 1')
423
    {
424 8
        return $this->countByWhere($where);
425
    }
426
}
427