Passed
Push — master ( 3cd8c6...9e747f )
by Timo
38:54 queued 16:17
created

StatisticsRepository::getSearchStatistics()   B

Complexity

Conditions 2
Paths 2

Size

Total Lines 30
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 17
CRAP Score 2.1309

Importance

Changes 0
Metric Value
dl 0
loc 30
ccs 17
cts 25
cp 0.68
rs 8.8571
c 0
b 0
f 0
cc 2
eloc 21
nc 2
nop 3
crap 2.1309
1
<?php
2
3
namespace ApacheSolrForTypo3\Solr\Domain\Search\Statistics;
4
5
/***************************************************************
6
 *  Copyright notice
7
 *
8
 *  (c) 2016 Thomas Hohn <[email protected]>
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 2 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
/**
29
 * Calculates the SearchQueryStatistics
30
 *
31
 * @author Thomas Hohn <[email protected]>
32
 */
33
class StatisticsRepository
34
{
35
    /**
36
     * Fetches must popular search keys words from the table tx_solr_statistics
37
     *
38
     * @param int $rootPageId
39
     * @param int $days number of days of history to query
40
     * @param int $limit
41
     * @return mixed
42
     */
43 1
    public function getSearchStatistics($rootPageId, $days = 30, $limit = 10)
44
    {
45 1
        $now = time();
46 1
        $timeStart = (int)($now - 86400 * intval($days)); // 86400 seconds/day
47 1
        $rootPageId = (int)$rootPageId;
48 1
        $limit = (int)$limit;
49
50 1
        $statisticsRows = (array)$this->getDatabase()->exec_SELECTgetRows(
51 1
            'keywords, count(keywords) as count, num_found as hits',
52 1
            'tx_solr_statistics',
53 1
            'tstamp > ' . $timeStart . ' AND root_pid = ' . $rootPageId,
54 1
            'keywords, num_found',
55 1
            'count DESC, hits DESC, keywords ASC',
56
            $limit
57
        );
58
59 1
        $statisticsRows = $this->mergeRowsWithSameKeyword($statisticsRows);
60
61 1
        $sumCount = $statisticsRows['sumCount'];
62 1
        foreach ($statisticsRows as $statisticsRow) {
63
            $sumCount += $statisticsRow['count'];
64
        }
65
66 1
        $statisticsRows = array_map(function($row) use ($sumCount) {
67
            $row['percent'] = $row['count'] * 100 / $sumCount;
68
            return $row;
69 1
        }, $statisticsRows);
70
71 1
        return $statisticsRows;
72
    }
73
74
    /**
75
     * Find Top search keywords with results
76
     *
77
     * @param int $rootPageId
78
     * @param int $days number of days of history to query
79
     * @param int $limit
80
     * @return array
81
     */
82 1
    public function getTopKeyWordsWithHits($rootPageId, $days = 30, $limit = 10)
83
    {
84 1
        return $this->getTopKeyWordsWithOrWithoutHits($rootPageId, $days, $limit, false);
85
    }
86
87
    /**
88
     * Find Top search keywords without results
89
     *
90
     * @param int $rootPageId
91
     * @param int $days number of days of history to query
92
     * @param int $limit
93
     * @return array
94
     */
95 2
    public function getTopKeyWordsWithoutHits($rootPageId, $days = 30, $limit = 10)
96
    {
97 2
        return $this->getTopKeyWordsWithOrWithoutHits($rootPageId, $days, $limit, true);
98
    }
99
100
    /**
101
     * Find Top search keywords with or without results
102
     *
103
     * @param int $rootPageId
104
     * @param int $days number of days of history to query
105
     * @param int $limit
106
     * @param bool $withoutHits
107
     * @return array
108
     */
109 3
    protected function getTopKeyWordsWithOrWithoutHits($rootPageId, $days = 30, $limit = 10, $withoutHits = false)
110
    {
111 3
        $rootPageId = (int)$rootPageId;
112 3
        $limit = (int)$limit;
113 3
        $withoutHits = (bool)$withoutHits;
114
115 3
        $now = time();
116 3
        $timeStart = $now - 86400 * intval($days); // 86400 seconds/day
117
118
        // Check if we want without or with hits
119 3
        if ($withoutHits === true) {
120 2
            $comparisonOperator = '=';
121
        } else {
122 1
            $comparisonOperator = '>';
123
        }
124
125 3
        $statisticsRows = (array)$this->getDatabase()->exec_SELECTgetRows(
126 3
            'keywords, count(keywords) as count, num_found as hits',
127 3
            'tx_solr_statistics',
128 3
            'tstamp > ' . $timeStart . ' AND root_pid = ' . $rootPageId . ' AND num_found ' . $comparisonOperator . ' 0',
129 3
            'keywords, num_found',
130 3
            'count DESC, hits DESC, keywords ASC',
131
            $limit
132
        );
133
134 3
        $statisticsRows = $this->mergeRowsWithSameKeyword($statisticsRows);
135
136 3
        return $statisticsRows;
137
    }
138
139
    /**
140
     * This method groups rows with the same term and different count and hits
141
     * and calculates the average.
142
     *
143
     * @param array $statisticsRows
144
     * @return array
145
     */
146 4
    protected function mergeRowsWithSameKeyword(array $statisticsRows)
147
    {
148 4
        $result = [];
149 4
        foreach ($statisticsRows as $statisticsRow) {
150 2
            $term = html_entity_decode($statisticsRow['keywords'], ENT_QUOTES);
151
152 2
            $mergedRow = isset($result[$term]) ? $result[$term] : ['mergedrows' => 0, 'count' => 0];
153 2
            $mergedRow['mergedrows']++;
154
155
            // for the hits we need to take the average
156 2
            $avgHits = $this->getAverageFromField($mergedRow, $statisticsRow, 'hits');
157 2
            $mergedRow['hits'] = (int)$avgHits;
158
159
            // for the count we need to take the sum, because it's the sum of searches
160 2
            $mergedRow['count'] = $mergedRow['count'] + $statisticsRow['count'];
161
162 2
            $mergedRow['keywords'] = $term;
163 2
            $result[$term] = $mergedRow;
164
        }
165
166 4
        return array_values($result);
167
    }
168
169
    /**
170
     * Get number of queries over time
171
     *
172
     * @param int $rootPageId
173
     * @param int $days number of days of history to query
174
     * @param int $bucketSeconds Seconds per bucket
175
     * @return array [labels, data]
176
     */
177
    public function getQueriesOverTime($rootPageId, $days = 30, $bucketSeconds = 3600)
178
    {
179
        $now = time();
180
        $timeStart = $now - 86400 * intval($days); // 86400 seconds/day
181
182
        $queries = $this->getDatabase()->exec_SELECTgetRows(
183
            'FLOOR(tstamp/' . $bucketSeconds . ') AS bucket, unix_timestamp(from_unixtime(tstamp, "%y-%m-%d")) as timestamp, COUNT(*) AS numQueries',
184
            'tx_solr_statistics',
185
            'tstamp > ' . $timeStart . ' AND root_pid = ' . $rootPageId,
186
            'bucket, timestamp',
187
            'bucket ASC'
188
        );
189
190
        return (array)$queries;
191
    }
192
193
    /**
194
     * This method is used to get an average value from merged statistic rows.
195
     *
196
     * @param array $mergedRow
197
     * @param array $statisticsRow
198
     * @param string $fieldName
199
     * @return float|int
200
     */
201 2
    protected function getAverageFromField(array &$mergedRow, array $statisticsRow, $fieldName)
202
    {
203
        // when this is the first row we can take it.
204 2
        if ($mergedRow['mergedrows'] === 1) {
205 2
            $avgCount = $statisticsRow[$fieldName];
206 2
            return $avgCount;
207
        }
208
209 1
        $oldAverage = $mergedRow[$fieldName];
210 1
        $oldMergeRows = $mergedRow['mergedrows'] - 1;
211 1
        $oldCount = $oldAverage * $oldMergeRows;
212 1
        $avgCount = (($oldCount + $statisticsRow[$fieldName]) / $mergedRow['mergedrows']);
213 1
        return $avgCount;
214
    }
215
216
    /**
217
     * @return \TYPO3\CMS\Core\Database\DatabaseConnection
218
     */
219 4
    protected function getDatabase()
220
    {
221 4
        return $GLOBALS['TYPO3_DB'];
222
    }
223
}
224