Passed
Push — master ( 93b1b5...cf5d08 )
by Timo
04:01
created

StatisticsRepository::countByRootPageId()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 9
Code Lines 7

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
dl 0
loc 9
ccs 0
cts 0
cp 0
rs 9.6666
c 0
b 0
f 0
cc 1
eloc 7
nc 1
nop 1
crap 2
1
<?php declare(strict_types = 1);
2
namespace ApacheSolrForTypo3\Solr\Domain\Search\Statistics;
3
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2016 Thomas Hohn <[email protected]>
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *
19
 *  This script is distributed in the hope that it will be useful,
20
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22
 *  GNU General Public License for more details.
23
 *
24
 *  This copyright notice MUST APPEAR in all copies of the script!
25
 ***************************************************************/
26
27
use ApacheSolrForTypo3\Solr\System\Records\AbstractRepository;
28
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
29
30
/**
31
 * Calculates the SearchQueryStatistics
32
 *
33
 * @author Thomas Hohn <[email protected]>
34
 */
35
class StatisticsRepository extends AbstractRepository
36
{
37
    /**
38
     * @var string
39
     */
40
    protected $table = 'tx_solr_statistics';
41
42
    /**
43 1
     * Fetches must popular search keys words from the table tx_solr_statistics
44
     *
45 1
     * @param int $rootPageId
46 1
     * @param int $days number of days of history to query
47 1
     * @param int $limit
48 1
     * @return mixed
49
     */
50 1
    public function getSearchStatistics(int $rootPageId, int $days = 30, $limit = 10)
51 1
    {
52 1
        $now = time();
53 1
        $timeStart = (int)($now - 86400 * $days); // 86400 seconds/day
54 1
        $limit = (int)$limit;
55 1
56
        $statisticsRows = $this->getPreparedQueryBuilderForSearchStatisticsAndTopKeywords($rootPageId, $timeStart, $limit)
57
            ->execute()->fetchAll();
58
59 1
        $statisticsRows = $this->mergeRowsWithSameKeyword($statisticsRows);
60
61 1
        $sumCount = $statisticsRows['sumCount'];
62 1
        foreach ($statisticsRows as $statisticsRow) {
63
            $sumCount += $statisticsRow['count'];
64
        }
65
66 1
        $statisticsRows = array_map(function($row) use ($sumCount) {
67
            $row['percent'] = $row['count'] * 100 / $sumCount;
68
            return $row;
69 1
        }, $statisticsRows);
70
71 1
        return $statisticsRows;
72
    }
73
74
    /**
75
     * Returns prepared QueryBuilder for two purposes:
76
     * for getSearchStatistics() and getTopKeyWordsWithOrWithoutHits() methods
77
     *
78
     * @param int $rootPageId
79
     * @param int $timeStart
80
     * @param int $limit
81
     * @return QueryBuilder
82 1
     */
83
    protected function getPreparedQueryBuilderForSearchStatisticsAndTopKeywords(int $rootPageId, int $timeStart, int $limit) : QueryBuilder
84 1
    {
85
        $queryBuilder = $this->getQueryBuilder();
86
        $statisticsQueryBuilder = $queryBuilder
87
            ->select('keywords', 'num_found AS hits')
88
            ->add('select', $queryBuilder->expr()->count('keywords', 'count'), true)
89
            ->from($this->table)
90
            ->andWhere(
91
                $queryBuilder->expr()->gt('tstamp', $timeStart),
92
                $queryBuilder->expr()->eq('root_pid', $rootPageId)
93
            )
94
            ->groupBy('keywords', 'num_found')
95 2
            ->orderBy('count', 'DESC')
96
            ->addOrderBy('hits', 'DESC')
97 2
            ->addOrderBy('keywords', 'ASC')
98
            ->setMaxResults($limit);
99
100
        return $statisticsQueryBuilder;
101
    }
102
103
    /**
104
     * Find Top search keywords with results
105
     *
106
     * @param int $rootPageId
107
     * @param int $days number of days of history to query
108
     * @param int $limit
109 3
     * @return array
110
     */
111 3
    public function getTopKeyWordsWithHits(int $rootPageId, int $days = 30, int $limit = 10) : array
112 3
    {
113 3
        return $this->getTopKeyWordsWithOrWithoutHits($rootPageId, $days, $limit, false);
114
    }
115 3
116 3
    /**
117
     * Find Top search keywords without results
118
     *
119 3
     * @param int $rootPageId
120 2
     * @param int $days number of days of history to query
121
     * @param int $limit
122 1
     * @return array
123
     */
124
    public function getTopKeyWordsWithoutHits(int $rootPageId, int $days = 30, int $limit = 10) : array
125 3
    {
126 3
        return $this->getTopKeyWordsWithOrWithoutHits($rootPageId, $days, $limit, true);
127 3
    }
128 3
129 3
    /**
130 3
     * Find Top search keywords with or without results
131
     *
132
     * @param int $rootPageId
133
     * @param int $days number of days of history to query
134 3
     * @param int $limit
135
     * @param bool $withoutHits
136 3
     * @return array
137
     */
138
    protected function getTopKeyWordsWithOrWithoutHits(int $rootPageId, int $days = 30, int $limit = 10, bool $withoutHits = false) : array
139
    {
140
        $now = time();
141
        $timeStart = $now - 86400 * $days; // 86400 seconds/day
142
143
        $queryBuilder = $this->getPreparedQueryBuilderForSearchStatisticsAndTopKeywords($rootPageId, $timeStart, $limit);
144
        // Check if we want without or with hits
145
        if ($withoutHits === true) {
146 4
            $queryBuilder->andWhere($queryBuilder->expr()->eq('num_found', 0));
147
        } else {
148 4
            $queryBuilder->andWhere($queryBuilder->expr()->gt('num_found', 0));
149 4
        }
150 2
151
        $statisticsRows = $queryBuilder->execute()->fetchAll();
152 2
        $statisticsRows = $this->mergeRowsWithSameKeyword($statisticsRows);
153 2
154
        return $statisticsRows;
155
    }
156 2
157 2
    /**
158
     * This method groups rows with the same term and different count and hits
159
     * and calculates the average.
160 2
     *
161
     * @param array $statisticsRows
162 2
     * @return array
163 2
     */
164
    protected function mergeRowsWithSameKeyword(array $statisticsRows) : array
165
    {
166 4
        $result = [];
167
        foreach ($statisticsRows as $statisticsRow) {
168
            $term = html_entity_decode($statisticsRow['keywords'], ENT_QUOTES);
169
170
            $mergedRow = isset($result[$term]) ? $result[$term] : ['mergedrows' => 0, 'count' => 0];
171
            $mergedRow['mergedrows']++;
172
173
            // for the hits we need to take the average
174
            $avgHits = $this->getAverageFromField($mergedRow, $statisticsRow, 'hits');
175
            $mergedRow['hits'] = (int)$avgHits;
176
177
            // for the count we need to take the sum, because it's the sum of searches
178
            $mergedRow['count'] = $mergedRow['count'] + $statisticsRow['count'];
179
180
            $mergedRow['keywords'] = $term;
181
            $result[$term] = $mergedRow;
182
        }
183
184
        return array_values($result);
185
    }
186
187
    /**
188
     * Get number of queries over time
189
     *
190
     * @param int $rootPageId
191
     * @param int $days number of days of history to query
192
     * @param int $bucketSeconds Seconds per bucket
193
     * @return array [labels, data]
194
     */
195
    public function getQueriesOverTime(int $rootPageId, int $days = 30, int $bucketSeconds = 3600) : array
196
    {
197
        $now = time();
198
        $timeStart = $now - 86400 * intval($days); // 86400 seconds/day
199
200
        $queryBuilder = $this->getQueryBuilder();
201 2
        $result = $queryBuilder
202
            ->addSelectLiteral(
203
                'FLOOR(`tstamp`/' . $bucketSeconds . ') AS `bucket`',
204 2
                // @todo: Works only with MySQL. Add own column with Date type to prevent converting DateTime to Date
205 2
                'unix_timestamp(from_unixtime(`tstamp`, "%y-%m-%d")) AS `timestamp`',
206 2
                $queryBuilder->expr()->count('*', 'numQueries')
207
            )
208
            ->from($this->table)
209 1
            ->andWhere(
210 1
                $queryBuilder->expr()->gt('tstamp', $timeStart),
211 1
                $queryBuilder->expr()->eq('root_pid', $rootPageId)
212 1
            )
213 1
            ->groupBy('bucket', 'timestamp')
214
            ->orderBy('bucket', 'ASC')
215
            ->execute()->fetchAll();
216
217
        return $result;
218
    }
219 4
220
    /**
221 4
     * Regurns a result set by given plugin.tx_solr.search.frequentSearches.select configuration.
222
     *
223
     * @param array $frequentSearchConfiguration
224
     * @return array Array of frequent search terms, keys are the terms, values are hits
225
     */
226
    public function getFrequentSearchTermsFromStatisticsByFrequentSearchConfiguration(array $frequentSearchConfiguration) : array
227
    {
228
        $queryBuilder = $this->getQueryBuilder();
229
        $resultSet = $queryBuilder
230
            ->addSelectLiteral(
231
                $frequentSearchConfiguration['select.']['SELECT']
232
            )
233
            ->from($frequentSearchConfiguration['select.']['FROM'])
234
            ->add('where', $frequentSearchConfiguration['select.']['ADD_WHERE'], true)
235
            ->add('groupBy', $frequentSearchConfiguration['select.']['GROUP_BY'], true)
236
            ->add('orderBy', $frequentSearchConfiguration['select.']['ORDER_BY'])
237
            ->setMaxResults((int)$frequentSearchConfiguration['limit'])
238
            ->execute()->fetchAll();
239
240
        return $resultSet;
241
    }
242
243
    /**
244
     * This method is used to get an average value from merged statistic rows.
245
     *
246
     * @param array $mergedRow
247
     * @param array $statisticsRow
248
     * @param string $fieldName
249
     * @return float|int
250
     */
251
    protected function getAverageFromField(array &$mergedRow, array $statisticsRow, string $fieldName)
252
    {
253
        // when this is the first row we can take it.
254
        if ($mergedRow['mergedrows'] === 1) {
255
            $avgCount = $statisticsRow[$fieldName];
256
            return $avgCount;
257
        }
258
259
        $oldAverage = $mergedRow[$fieldName];
260
        $oldMergeRows = $mergedRow['mergedrows'] - 1;
261
        $oldCount = $oldAverage * $oldMergeRows;
262
        $avgCount = (($oldCount + $statisticsRow[$fieldName]) / $mergedRow['mergedrows']);
263
        return $avgCount;
264
    }
265
266
    /**
267
     * Persists statistics record
268
     *
269
     * @param array $statisticsRecord
270
     * @return void
271
     */
272
    public function saveStatisticsRecord(array $statisticsRecord)
273
    {
274
        $queryBuilder = $this->getQueryBuilder();
275
        $queryBuilder->insert($this->table)->values($statisticsRecord)->execute();
276
    }
277
278
    /**
279
     * Counts rows for specified site
280
     *
281
     * @param int $rootPageId
282
     * @return int
283
     */
284
    public function countByRootPageId(int $rootPageId): int
285
    {
286
        $queryBuilder = $this->getQueryBuilder();
287
        return (int)$this->getQueryBuilder()
288
            ->count('*')
289
            ->from($this->table)
290
            ->andWhere($queryBuilder->expr()->eq('root_pid', $rootPageId))
291
            ->execute()->fetchColumn(0);
292
    }
293
}
294