Passed
Push — master ( e20b48...b0a48d )
by Timo
04:39
created

getTopKeyWordsWithOrWithoutHits()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 22
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 8
CRAP Score 2.0054

Importance

Changes 0
Metric Value
dl 0
loc 22
ccs 8
cts 9
cp 0.8889
rs 9.2
c 0
b 0
f 0
cc 2
eloc 14
nc 2
nop 4
crap 2.0054
1
<?php declare(strict_types = 1);
2
namespace ApacheSolrForTypo3\Solr\Domain\Search\Statistics;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2016 Thomas Hohn <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Solr\System\Records\AbstractRepository;
28
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
29
30
/**
31
 * Calculates the SearchQueryStatistics
32
 *
33
 * @author Thomas Hohn <[email protected]>
34
 */
35
class StatisticsRepository extends AbstractRepository
36
{
37
    /**
38
     * @var string
39
     */
40
    protected $table = 'tx_solr_statistics';
41
42
    /**
43 1
     * Fetches must popular search keys words from the table tx_solr_statistics
44
     *
45 1
     * @param int $rootPageId
46 1
     * @param int $days number of days of history to query
47 1
     * @param int $limit
48 1
     * @return mixed
49
     */
50 1
    public function getSearchStatistics(int $rootPageId, int $days = 30, $limit = 10)
51 1
    {
52 1
        $now = time();
53 1
        $timeStart = (int)($now - 86400 * $days); // 86400 seconds/day
54 1
        $limit = (int)$limit;
55 1
56
        $statisticsRows = $this->getPreparedQueryBuilderForSearchStatisticsAndTopKeywords($rootPageId, $timeStart, $limit)
57
            ->execute()->fetchAll();
58
59 1
        $statisticsRows = $this->mergeRowsWithSameKeyword($statisticsRows);
60
61 1
        $sumCount = $statisticsRows['sumCount'];
62 1
        foreach ($statisticsRows as $statisticsRow) {
63
            $sumCount += $statisticsRow['count'];
64
        }
65
66 1
        $statisticsRows = array_map(function($row) use ($sumCount) {
67
            $row['percent'] = $row['count'] * 100 / $sumCount;
68
            return $row;
69 1
        }, $statisticsRows);
70
71 1
        return $statisticsRows;
72
    }
73
74
    /**
75
     * Returns prepared QueryBuilder for two purposes:
76
     * for getSearchStatistics() and getTopKeyWordsWithOrWithoutHits() methods
77
     *
78
     * @param int $rootPageId
79
     * @param int $timeStart
80
     * @param int $limit
81
     * @return QueryBuilder
82 1
     */
83
    protected function getPreparedQueryBuilderForSearchStatisticsAndTopKeywords(int $rootPageId, int $timeStart, int $limit) : QueryBuilder
84 1
    {
85
        $queryBuilder = $this->getQueryBuilder();
86
        $statisticsQueryBuilder = $queryBuilder
87
            ->select('keywords', 'num_found as hits')
88
            ->add('select', $queryBuilder->expr()->count('keywords', 'count'), true)
89
            ->from($this->table)
90
            ->andWhere(
91
                $queryBuilder->expr()->gt('tstamp', $timeStart),
92
                $queryBuilder->expr()->eq('root_pid', $rootPageId)
93
            )
94
            ->groupBy('keywords', 'num_found')
95 2
            ->orderBy('count', 'DESC')
96
            ->addOrderBy('hits', 'DESC')
97 2
            ->addOrderBy('keywords', 'ASC')
98
            ->setMaxResults($limit);
99
100
        return $statisticsQueryBuilder;
101
    }
102
103
    /**
104
     * Find Top search keywords with results
105
     *
106
     * @param int $rootPageId
107
     * @param int $days number of days of history to query
108
     * @param int $limit
109 3
     * @return array
110
     */
111 3
    public function getTopKeyWordsWithHits($rootPageId, $days = 30, $limit = 10)
112 3
    {
113 3
        return $this->getTopKeyWordsWithOrWithoutHits($rootPageId, $days, $limit, false);
114
    }
115 3
116 3
    /**
117
     * Find Top search keywords without results
118
     *
119 3
     * @param int $rootPageId
120 2
     * @param int $days number of days of history to query
121
     * @param int $limit
122 1
     * @return array
123
     */
124
    public function getTopKeyWordsWithoutHits($rootPageId, $days = 30, $limit = 10)
125 3
    {
126 3
        return $this->getTopKeyWordsWithOrWithoutHits($rootPageId, $days, $limit, true);
127 3
    }
128 3
129 3
    /**
130 3
     * Find Top search keywords with or without results
131
     *
132
     * @param int $rootPageId
133
     * @param int $days number of days of history to query
134 3
     * @param int $limit
135
     * @param bool $withoutHits
136 3
     * @return array
137
     */
138
    protected function getTopKeyWordsWithOrWithoutHits($rootPageId, $days = 30, $limit = 10, $withoutHits = false)
139
    {
140
        $rootPageId = (int)$rootPageId;
141
        $limit = (int)$limit;
142
        $withoutHits = (bool)$withoutHits;
143
144
        $now = time();
145
        $timeStart = $now - 86400 * intval($days); // 86400 seconds/day
146 4
147
        $queryBuilder = $this->getPreparedQueryBuilderForSearchStatisticsAndTopKeywords($rootPageId, $timeStart, $limit);
148 4
        // Check if we want without or with hits
149 4
        if ($withoutHits === true) {
150 2
            $queryBuilder->andWhere($queryBuilder->expr()->eq('num_found', 0));
151
        } else {
152 2
            $queryBuilder->andWhere($queryBuilder->expr()->gt('num_found', 0));
153 2
        }
154
155
        $statisticsRows = $queryBuilder->execute()->fetchAll();
156 2
        $statisticsRows = $this->mergeRowsWithSameKeyword($statisticsRows);
157 2
158
        return $statisticsRows;
159
    }
160 2
161
    /**
162 2
     * This method groups rows with the same term and different count and hits
163 2
     * and calculates the average.
164
     *
165
     * @param array $statisticsRows
166 4
     * @return array
167
     */
168
    protected function mergeRowsWithSameKeyword(array $statisticsRows)
169
    {
170
        $result = [];
171
        foreach ($statisticsRows as $statisticsRow) {
172
            $term = html_entity_decode($statisticsRow['keywords'], ENT_QUOTES);
173
174
            $mergedRow = isset($result[$term]) ? $result[$term] : ['mergedrows' => 0, 'count' => 0];
175
            $mergedRow['mergedrows']++;
176
177
            // for the hits we need to take the average
178
            $avgHits = $this->getAverageFromField($mergedRow, $statisticsRow, 'hits');
179
            $mergedRow['hits'] = (int)$avgHits;
180
181
            // for the count we need to take the sum, because it's the sum of searches
182
            $mergedRow['count'] = $mergedRow['count'] + $statisticsRow['count'];
183
184
            $mergedRow['keywords'] = $term;
185
            $result[$term] = $mergedRow;
186
        }
187
188
        return array_values($result);
189
    }
190
191
    /**
192
     * Get number of queries over time
193
     *
194
     * @param int $rootPageId
195
     * @param int $days number of days of history to query
196
     * @param int $bucketSeconds Seconds per bucket
197
     * @return array [labels, data]
198
     */
199
    public function getQueriesOverTime($rootPageId, $days = 30, $bucketSeconds = 3600)
200
    {
201 2
        $now = time();
202
        $timeStart = $now - 86400 * intval($days); // 86400 seconds/day
203
204 2
        $queryBuilder = $this->getQueryBuilder();
205 2
        $result = $queryBuilder
206 2
            ->addSelectLiteral(
207
                'FLOOR(`tstamp`/' . $bucketSeconds . ') AS `bucket`',
208
                // @todo: Works only with MySQL. Add own column with Date type to prevent converting DateTime to Date
209 1
                'unix_timestamp(from_unixtime(`tstamp`, "%y-%m-%d")) AS `timestamp`',
210 1
                $queryBuilder->expr()->count('*', 'numQueries')
211 1
            )
212 1
            ->from($this->table)
213 1
            ->andWhere(
214
                $queryBuilder->expr()->gt('tstamp', $timeStart),
215
                $queryBuilder->expr()->eq('root_pid', $rootPageId)
216
            )
217
            ->groupBy('bucket', 'timestamp')
218
            ->orderBy('bucket', 'ASC')
219 4
            ->execute()->fetchAll();
220
221 4
        return $result;
222
    }
223
224
    /**
225
     * This method is used to get an average value from merged statistic rows.
226
     *
227
     * @param array $mergedRow
228
     * @param array $statisticsRow
229
     * @param string $fieldName
230
     * @return float|int
231
     */
232
    protected function getAverageFromField(array &$mergedRow, array $statisticsRow, $fieldName)
233
    {
234
        // when this is the first row we can take it.
235
        if ($mergedRow['mergedrows'] === 1) {
236
            $avgCount = $statisticsRow[$fieldName];
237
            return $avgCount;
238
        }
239
240
        $oldAverage = $mergedRow[$fieldName];
241
        $oldMergeRows = $mergedRow['mergedrows'] - 1;
242
        $oldCount = $oldAverage * $oldMergeRows;
243
        $avgCount = (($oldCount + $statisticsRow[$fieldName]) / $mergedRow['mergedrows']);
244
        return $avgCount;
245
    }
246
}
247