Completed
Push — master ( fb242c...3f99e9 )
by Timo
26:23 queued 04:18
created

StatisticsRepository::getSearchStatistics()   B

Complexity

Conditions 2
Paths 2

Size

Total Lines 31
Code Lines 21

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 19
CRAP Score 2.0552

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 31
ccs 19
cts 25
cp 0.76
rs 8.8571
cc 2
eloc 21
nc 2
nop 3
crap 2.0552
1
<?php
2
3
namespace ApacheSolrForTypo3\Solr\Domain\Search\Statistics;
4
5
/***************************************************************
6
 *  Copyright notice
7
 *
8
 *  (c) 2016 Thomas Hohn <[email protected]>
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 2 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
/**
29
 * Calculates the SearchQueryStatistics
30
 *
31
 * @author Thomas Hohn <[email protected]>
32
 */
33
class StatisticsRepository
34
{
35
    /**
36
     * Fetches must popular search keys words from the table tx_solr_statistics
37
     *
38
     * @param int $rootPageId
39
     * @param int $days number of days of history to query
40
     * @param int $limit
41
     * @return mixed
42
     */
43 1
    public function getSearchStatistics($rootPageId, $days = 30, $limit = 10)
44
    {
45 1
        $now = time();
46 1
        $timeStart = (int) ($now - 86400 * intval($days)); // 86400 seconds/day
47 1
        $rootPageId = (int) $rootPageId;
48 1
        $limit = (int) $limit;
49
50 1
        $statisticsRows = (array)$this->getDatabase()->exec_SELECTgetRows(
51 1
            'keywords, count(keywords) as count, num_found as hits',
52 1
            'tx_solr_statistics',
53 1
            'tstamp > ' . $timeStart . ' AND root_pid = ' . $rootPageId,
54 1
            'keywords, num_found',
55 1
            'count DESC, hits DESC, keywords ASC',
56
            $limit
57 1
        );
58
59 1
        $statisticsRows = $this->mergeRowsWithSameKeyword($statisticsRows);
60
61 1
        $sumCount = $statisticsRows['sumCount'];
62 1
        foreach ($statisticsRows as $statisticsRow) {
63
            $sumCount += $statisticsRow['count'];
64 1
        }
65
66 1
        $statisticsRows = array_map(function ($row) use ($sumCount) {
67
            $row['percent'] = $row['count'] * 100 / $sumCount;
68
69
            return $row;
70 1
        }, $statisticsRows);
71
72 1
        return $statisticsRows;
73
    }
74
75
    /**
76
     * Find Top search keywords with results
77
     *
78
     * @param int $rootPageId
79
     * @param int $days number of days of history to query
80
     * @param int $limit
81
     * @return array
82
     */
83 1
    public function getTopKeyWordsWithHits($rootPageId, $days = 30, $limit = 10)
84
    {
85 1
        return $this->getTopKeyWordsWithOrWithoutHits($rootPageId, $days, $limit, false);
86
    }
87
88
    /**
89
     * Find Top search keywords without results
90
     *
91
     * @param int $rootPageId
92
     * @param int $days number of days of history to query
93
     * @param int $limit
94
     * @return array
95
     */
96 2
    public function getTopKeyWordsWithoutHits($rootPageId, $days = 30, $limit = 10)
97
    {
98 2
        return $this->getTopKeyWordsWithOrWithoutHits($rootPageId, $days, $limit, true);
99
    }
100
101
    /**
102
     * Find Top search keywords with or without results
103
     *
104
     * @param int $rootPageId
105
     * @param int $days number of days of history to query
106
     * @param int $limit
107
     * @param bool $withoutHits
108
     * @return array
109
     */
110 3
    protected function getTopKeyWordsWithOrWithoutHits($rootPageId, $days = 30, $limit, $withoutHits)
111
    {
112 3
        $rootPageId = (int) $rootPageId;
113 3
        $limit = (int) $limit;
114 3
        $withoutHits = (bool) $withoutHits;
115
116 3
        $now = time();
117 3
        $timeStart = $now - 86400 * intval($days); // 86400 seconds/day
118
119
        // Check if we want without or with hits
120 3
        if ($withoutHits === true) {
121 2
            $comparisonOperator = '=';
122 2
        } else {
123 1
            $comparisonOperator = '>';
124
        }
125
126 3
        $statisticsRows = (array)$this->getDatabase()->exec_SELECTgetRows(
127 3
            'keywords, count(keywords) as count, num_found as hits',
128 3
            'tx_solr_statistics',
129 3
            'tstamp > ' . $timeStart . ' AND root_pid = ' . $rootPageId . ' AND num_found ' . $comparisonOperator . ' 0',
130 3
            'keywords, num_found',
131 3
            'count DESC, hits DESC, keywords ASC',
132
            $limit
133 3
        );
134
135 3
        $statisticsRows = $this->mergeRowsWithSameKeyword($statisticsRows);
136
137 3
        return $statisticsRows;
138
    }
139
140
    /**
141
     * This method groups rows with the same term and different count and hits
142
     * and calculates the average.
143
     *
144
     * @param array $statisticsRows
145
     * @return array
146
     */
147 4
    protected function mergeRowsWithSameKeyword(array $statisticsRows)
148
    {
149 4
        $result = [];
150 4
        foreach ($statisticsRows as $statisticsRow) {
151 2
            $term = html_entity_decode($statisticsRow['keywords'], ENT_QUOTES);
152
153 2
            $mergedRow = isset($result[$term]) ? $result[$term] : ['mergedrows' => 0, 'count' => 0];
154 2
            $mergedRow['mergedrows']++;
155
156
                // for the hits we need to take the average
157 2
            $avgHits = $this->getAverageFromField($mergedRow, $statisticsRow, 'hits');
158 2
            $mergedRow['hits'] = (int) $avgHits;
159
160
                // for the count we need to take the sum, because it's the sum of searches
161 2
            $mergedRow['count'] = $mergedRow['count'] + $statisticsRow['count'];
162
163 2
            $mergedRow['keywords'] = $term;
164 2
            $result[$term] = $mergedRow;
165 4
        }
166
167 4
        return array_values($result);
168
    }
169
170
    /**
171
     * Get number of queries over time
172
     *
173
     * @param int $rootPageId
174
     * @param int $days number of days of history to query
175
     * @param int $bucketSeconds Seconds per bucket
176
     * @return array [labels, data]
177
     */
178
    public function getQueriesOverTime($rootPageId, $days = 30, $bucketSeconds = 3600)
179
    {
180
        $now = time();
181
        $timeStart = $now - 86400 * intval($days); // 86400 seconds/day
182
183
        $queries = $this->getDatabase()->exec_SELECTgetRows(
184
            'FLOOR(tstamp/' . $bucketSeconds . ') AS bucket, unix_timestamp(from_unixtime(tstamp, "%y-%m-%d")) as timestamp, COUNT(*) AS numQueries',
185
            'tx_solr_statistics',
186
            'tstamp > ' . $timeStart . ' AND root_pid = ' . $rootPageId,
187
            'bucket, timestamp',
188
            'bucket ASC'
189
        );
190
191
        return $queries;
192
    }
193
194
    /**
195
     * This method is used to get an average value from merged statistic rows.
196
     *
197
     * @param array $mergedRow
198
     * @param array $statisticsRow
199
     * @param string $fieldName
200
     * @return float|int
201
     */
202 2
    protected function getAverageFromField(array &$mergedRow, array $statisticsRow,  $fieldName)
203
    {
204
        // when this is the first row we can take it.
205 2
        if ($mergedRow['mergedrows'] === 1) {
206 2
            $avgCount = $statisticsRow[$fieldName];
207 2
            return $avgCount;
208
        }
209
210 1
        $oldAverage = $mergedRow[$fieldName];
211 1
        $oldMergeRows = $mergedRow['mergedrows'] - 1;
212 1
        $oldCount = $oldAverage * $oldMergeRows;
213 1
        $avgCount = (($oldCount + $statisticsRow[$fieldName]) / $mergedRow['mergedrows']);
214 1
        return $avgCount;
215
    }
216
217
    /**
218
     * @return \TYPO3\CMS\Core\Database\DatabaseConnection
219
     */
220 4
    protected function getDatabase()
221
    {
222 4
        return $GLOBALS['TYPO3_DB'];
223
    }
224
}
225