Completed
Push — master ( 7682df...b1b8c7 )
by Timo
23:42 queued 14:36
created

StatisticsRepository   A

Complexity

Total Complexity 13

Size/Duplication

Total Lines 193
Duplicated Lines 0 %

Coupling/Cohesion

Components 1
Dependencies 1

Importance

Changes 2
Bugs 0 Features 2
Metric Value
c 2
b 0
f 2
dl 0
loc 193
rs 10
wmc 13
lcom 1
cbo 1

8 Methods

Rating   Name   Duplication   Size   Complexity  
A getTopKeyWordsWithHits() 0 4 1
B getSearchStatistics() 0 31 2
A getTopKeyWordsWithoutHits() 0 4 1
B getTopKeyWordsWithOrWithoutHits() 0 29 2
A mergeRowsWithSameKeyword() 0 22 3
A getQueriesOverTime() 0 15 1
A getAverageFromField() 0 14 2
A getDatabase() 0 4 1
1
<?php
2
3
namespace ApacheSolrForTypo3\Solr\Domain\Search\Statistics;
4
5
/***************************************************************
6
 *  Copyright notice
7
 *
8
 *  (c) 2016 Thomas Hohn <[email protected]>
9
 *  All rights reserved
10
 *
11
 *  This script is part of the TYPO3 project. The TYPO3 project is
12
 *  free software; you can redistribute it and/or modify
13
 *  it under the terms of the GNU General Public License as published by
14
 *  the Free Software Foundation; either version 2 of the License, or
15
 *  (at your option) any later version.
16
 *
17
 *  The GNU General Public License can be found at
18
 *  http://www.gnu.org/copyleft/gpl.html.
19
 *
20
 *  This script is distributed in the hope that it will be useful,
21
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
 *  GNU General Public License for more details.
24
 *
25
 *  This copyright notice MUST APPEAR in all copies of the script!
26
 ***************************************************************/
27
28
29
/**
30
 * Calculates the SearchQueryStatistics
31
 *
32
 * @author Thomas Hohn <[email protected]>
33
 * @package TYPO3
34
 * @subpackage solr
35
 */
36
class StatisticsRepository
37
{
38
    /**
39
     * Fetches must popular search keys words from the table tx_solr_statistics
40
     *
41
     * @param int $rootPageId
42
     * @param int $days number of days of history to query
43
     * @param int $limit
44
     * @return mixed
45
     */
46
    public function getSearchStatistics($rootPageId, $days = 30, $limit = 10)
47
    {
48
        $now = time();
49
        $timeStart = (int) ($now - 86400 * intval($days)); // 86400 seconds/day
50
        $rootPageId = (int) $rootPageId;
51
        $limit = (int) $limit;
52
53
        $statisticsRows = $this->getDatabase()->exec_SELECTgetRows(
54
            'keywords, count(keywords) as count, num_found as hits',
55
            'tx_solr_statistics',
56
            'tstamp > ' . $timeStart . ' AND root_pid = ' . $rootPageId,
57
            'keywords, num_found',
58
            'count DESC, hits DESC, keywords ASC',
59
            $limit
60
        );
61
62
        $statisticsRows = $this->mergeRowsWithSameKeyword($statisticsRows);
1 ignored issue
show
Bug introduced by
It seems like $statisticsRows can also be of type null; however, ApacheSolrForTypo3\Solr\...geRowsWithSameKeyword() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
63
64
        $sumCount = $statisticsRows['sumCount'];
65
        foreach ($statisticsRows as $statisticsRow) {
66
            $sumCount += $statisticsRow['count'];
67
        }
68
69
        $statisticsRows = array_map(function ($row) use ($sumCount) {
70
            $row['percent'] = $row['count'] * 100 / $sumCount;
71
72
            return $row;
73
        }, $statisticsRows);
74
75
        return $statisticsRows;
76
    }
77
78
    /**
79
     * Find Top search keywords with results
80
     *
81
     * @param int $rootPageId
82
     * @param int $days number of days of history to query
83
     * @param int $limit
84
     * @return array
85
     */
86
    public function getTopKeyWordsWithHits($rootPageId, $days = 30, $limit = 10)
87
    {
88
        return $this->getTopKeyWordsWithOrWithoutHits($rootPageId, $days, $limit, false);
89
    }
90
91
    /**
92
     * Find Top search keywords without results
93
     *
94
     * @param int $rootPageId
95
     * @param int $days number of days of history to query
96
     * @param int $limit
97
     * @return array
98
     */
99
    public function getTopKeyWordsWithoutHits($rootPageId, $days = 30, $limit = 10)
100
    {
101
        return $this->getTopKeyWordsWithOrWithoutHits($rootPageId, $days, $limit, true);
102
    }
103
104
    /**
105
     * Find Top search keywords with or without results
106
     *
107
     * @param int $rootPageId
108
     * @param int $days number of days of history to query
109
     * @param int $limit
110
     * @param bool $withoutHits
111
     * @return array
112
     */
113
    protected function getTopKeyWordsWithOrWithoutHits($rootPageId, $days = 30, $limit, $withoutHits)
114
    {
115
        $rootPageId = (int) $rootPageId;
116
        $limit = (int) $limit;
117
        $withoutHits = (bool) $withoutHits;
118
119
        $now = time();
120
        $timeStart = $now - 86400 * intval($days); // 86400 seconds/day
121
122
        // Check if we want without or with hits
123
        if ($withoutHits === true) {
124
            $comparisonOperator = '=';
125
        } else {
126
            $comparisonOperator = '>';
127
        }
128
129
        $statisticsRows = $this->getDatabase()->exec_SELECTgetRows(
130
            'keywords, count(keywords) as count, num_found as hits',
131
            'tx_solr_statistics',
132
            'tstamp > ' . $timeStart . ' AND root_pid = ' . $rootPageId . ' AND num_found ' . $comparisonOperator . ' 0',
133
            'keywords, num_found',
134
            'count DESC, hits DESC, keywords ASC',
135
            $limit
136
        );
137
138
        $statisticsRows = $this->mergeRowsWithSameKeyword($statisticsRows);
0 ignored issues
show
Bug introduced by
It seems like $statisticsRows can also be of type null; however, ApacheSolrForTypo3\Solr\...geRowsWithSameKeyword() does only seem to accept array, maybe add an additional type check?

If a method or function can return multiple different values and unless you are sure that you only can receive a single value in this context, we recommend to add an additional type check:

/**
 * @return array|string
 */
function returnsDifferentValues($x) {
    if ($x) {
        return 'foo';
    }

    return array();
}

$x = returnsDifferentValues($y);
if (is_array($x)) {
    // $x is an array.
}

If this a common case that PHP Analyzer should handle natively, please let us know by opening an issue.

Loading history...
139
140
        return $statisticsRows;
141
    }
142
143
144
    /**
145
     * This method groups rows with the same term and different cound and hits
146
     * and calculates the average.
147
     *
148
     * @param array $statisticsRows
149
     * @return array
150
     */
151
    protected function mergeRowsWithSameKeyword(array $statisticsRows)
152
    {
153
        $result = [];
154
        foreach ($statisticsRows as $statisticsRow) {
155
            $term = $statisticsRow['keywords'];
156
157
            $mergedRow = isset($result[$term]) ? $result[$term] : ['mergedrows' => 0, 'count' => 0];
158
            $mergedRow['mergedrows']++;
159
160
                // for the hits we need to take the average
161
            $avgHits = $this->getAverageFromField($mergedRow, $statisticsRow, 'hits');
162
            $mergedRow['hits'] = (int) $avgHits;
163
164
                // for the count we need to take the sum, because it's the sum of searches
165
            $mergedRow['count'] = $mergedRow['count'] + $statisticsRow['count'];
166
167
            $mergedRow['keywords'] = $statisticsRow['keywords'];
168
            $result[$term] = $mergedRow;
169
        }
170
171
        return array_values($result);
172
    }
173
174
    /**
175
     * Get number of queries over time
176
     *
177
     * @param int $rootPageId
178
     * @param int $days number of days of history to query
179
     * @param int $bucketSeconds Seconds per bucket
180
     * @return array [labels, data]
181
     */
182
    public function getQueriesOverTime($rootPageId, $days = 30, $bucketSeconds = 3600)
183
    {
184
        $now = time();
185
        $timeStart = $now - 86400 * intval($days); // 86400 seconds/day
186
187
        $queries = $this->getDatabase()->exec_SELECTgetRows(
188
            'FLOOR(tstamp/' . $bucketSeconds . ') AS bucket, tstamp, COUNT(*) AS numQueries',
189
            'tx_solr_statistics',
190
            'tstamp > ' . $timeStart . ' AND root_pid = ' . $rootPageId,
191
            'bucket',
192
            'bucket ASC'
193
        );
194
195
        return $queries;
196
    }
197
198
199
    /**
200
     * This method is used to get an average value from merged statistic rows.
201
     *
202
     * @param array $mergedRow
203
     * @param array $statisticsRow
204
     * @return float|int
205
     */
206
    protected function getAverageFromField(array &$mergedRow, array $statisticsRow,  $fieldName)
207
    {
208
        // when this is the first row we can take it.
209
        if ($mergedRow['mergedrows'] === 1) {
210
            $avgCount = $statisticsRow[$fieldName];
211
            return $avgCount;
212
        }
213
214
        $oldAverage = $mergedRow[$fieldName];
215
        $oldMergeRows = $mergedRow['mergedrows'] - 1;
216
        $oldCount = $oldAverage * $oldMergeRows;
217
        $avgCount = (($oldCount + $statisticsRow[$fieldName]) / $mergedRow['mergedrows']);
218
        return $avgCount;
219
    }
220
221
    /**
222
     * @return \TYPO3\CMS\Core\Database\DatabaseConnection
223
     */
224
    protected function getDatabase()
1 ignored issue
show
Coding Style introduced by
getDatabase uses the super-global variable $GLOBALS which is generally not recommended.

Instead of super-globals, we recommend to explicitly inject the dependencies of your class. This makes your code less dependent on global state and it becomes generally more testable:

// Bad
class Router
{
    public function generate($path)
    {
        return $_SERVER['HOST'].$path;
    }
}

// Better
class Router
{
    private $host;

    public function __construct($host)
    {
        $this->host = $host;
    }

    public function generate($path)
    {
        return $this->host.$path;
    }
}

class Controller
{
    public function myAction(Request $request)
    {
        // Instead of
        $page = isset($_GET['page']) ? intval($_GET['page']) : 1;

        // Better (assuming you use the Symfony2 request)
        $page = $request->query->get('page', 1);
    }
}
Loading history...
225
    {
226
        return $GLOBALS['TYPO3_DB'];
227
    }
228
}
229