SEARCHLOG::__construct()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
eloc 2
nc 1
nop 0
dl 0
loc 4
rs 10
c 0
b 0
f 0
ccs 0
cts 2
cp 0
crap 2
1
<?php
2
3
/*
4
For doing stuff with searchlogs.
5
6
To add a new searchlog do this:
7
    global $SEARCHLOG;
8
    $SEARCHLOG->add(
9
        array('query' => $searchstring,
10
              'page' => $page,
11
              'hits' => $count));
12
The date/time and IP address are automatically stored.
13
14
To get the ten most popular searches in the last day:
15
    global $SEARCHLOG;
16
    $popular_searches = $SEARCHLOG->popular_recent(10);
17
The return value is an array.  Each element of the form
18
    array(  'query' => '"new york"',
19
            'visible_name' => 'new york',
20
            'url' => 'https://www.theyworkforyou.com/search/?s=%22new+york%22&pop=1',
21
            'display' => '<a href="https://www.theyworkforyou.com/search/?s=%22new+york%22&pop=1">new york</a>")
22
Note that the url includes "pop=1" which stops popular searches feeding back
23
into being more popular.
24
25
*/
26
27
class SEARCHLOG {
28
    private $SEARCHURL;
29
    private $db;
30
31
    public function __construct() {
32
        $this->SEARCHURL = new \MySociety\TheyWorkForYou\Url('search');
33
34
        $this->db = new ParlDB();
35
    }
36
37
    public function add($searchlogdata) {
38
39
        $ip = getenv('REMOTE_ADDR');
40
        if (preg_match('#66\.249\.(6[4-9]|[78]\d|9[0-5])\.#', $ip)) { # Googlebot
41
            return;
42
        }
43
        if (isset($_SERVER['HTTP_USER_AGENT']) && preg_match('#simplepie|bot#i', $_SERVER['HTTP_USER_AGENT'])) {
44
            return;
45
        }
46
47
        $this->db->query("INSERT INTO search_query_log (
48
                query_string,
49
                page_number,
50
                count_hits,
51
                ip_address,
52
                query_time
53
            )
54
            VALUES (
55
                :query_string,
56
                :page_number,
57
                :count_hits,
58
                :ip_address,
59
                NOW()
60
            )
61
        ", [
62
            ':query_string' => $searchlogdata['query'],
63
            ':page_number' => $searchlogdata['page'],
64
            ':count_hits' => $searchlogdata['hits'],
65
            ':ip_address' => $ip,
66
        ]);
67
68
    }
69
70
    // Select popular queries
71
    public function popular_recent(int $count, ?int $max_chars = null, ?int $minimum_hits = 5, ?string $section_string = null) {
72
73
        // Banned terms
74
        $banned_terms = ["twat"];
75
76
        $query = "SELECT query_string, COUNT(DISTINCT ip_address) AS c FROM search_query_log
77
            WHERE count_hits != 0 AND page_number=1
78
            AND query_time > date_sub(NOW(), INTERVAL 3 DAY)";
79
80
        // allow limiting by section (e.g. to show a Scotland specific search)
81
        if (!empty($section_string)) {
82
            $query .= " AND query_string LIKE :section_string";
83
        }
84
85
        $query .= " GROUP BY query_string HAVING c >= :minimum_hits
86
            ORDER BY c desc LIMIT :count;";
87
88
        $params = [
89
            ':minimum_hits' => $minimum_hits,
90
            ':count' => $count,
91
        ];
92
93
        if (!empty($section_string)) {
94
            $params[':section_string'] = '%' . $section_string . '%';
95
        }
96
97
        $q = $this->db->query($query, $params);
98
99
100
        $popular_searches = [];
101
        foreach ($q as $row) {
102
            $row_array = $this->_db_row_to_array($row, $section_string);
103
104
            // exclude all queries where a banned term is part of the query
105
            $banned = false;
106
            foreach ($banned_terms as $term) {
107
                if (stripos($row_array['query'], $term) !== false) {
108
                    $banned = true;
109
                    break;
110
                }
111
            }
112
            if (!$banned) {
113
                array_push($popular_searches, $row_array);
114
            }
115
        }
116
117
        //maximum number of chars?
118
        if (isset($max_chars) && $max_chars > 0) {
119
            $lentotal = 0;
120
            $correct_amount = [];
121
            // Select a number of queries that will fit in the space
122
            foreach ($popular_searches as $popular_search) {
123
                $len = strlen($popular_search['visible_name']);
124
                if ($lentotal + $len > $max_chars) {
125
                    continue;
126
                }
127
                $lentotal += $len;
128
                array_push($correct_amount, $popular_search);
129
            }
130
            $popular_searches = $correct_amount;
131
        }
132
133
        return $popular_searches;
134
    }
135
136
    public function _db_row_to_array($row, $section_string = null) {
137
        $query = $row['query_string'];
138
        $this->SEARCHURL->insert(['s' => $query, 'pop' => 1]);
139
        $url = $this->SEARCHURL->generate();
140
        $htmlescape = 1;
141
        if (preg_match('#speaker:(\d+)#', $query, $m)) {
142
            $qq = $this->db->query('SELECT house, title, given_name, family_name, lordofname
143
                FROM member, person_names pn
144
                WHERE member.person_id = pn.person_id and member.person_id=:pid
145
                AND pn.type="name" AND pn.end_date = (SELECT MAX(end_date) from person_names where person_names.person_id=member.person_id)
146
                ORDER BY end_date DESC LIMIT 1', [':pid' => $m[1]])->first();
147
            if ($qq) {
148
                $name = member_full_name($qq['house'], $qq['title'], $qq['given_name'], $qq['family_name'], $qq['lordofname']);
149
                $query = preg_replace('#speaker:(\d+)#', $name, $query);
150
                #$htmlescape = 0;
151
            }
152
        }
153
        $visible_name = preg_replace('/"/', '', $query);
154
155
        if (isset($section_string)) {
156
            $visible_name = preg_replace('/' . preg_quote($section_string) . '/', '', $visible_name);
157
        }
158
159
        $rowarray = $row;
160
        $rowarray['query'] = $query;
161
        $rowarray['visible_name'] = $visible_name;
162
        $rowarray['url'] = $url;
163
        $rowarray['display'] = '<a href="' . $url . '">' . ($htmlescape ? _htmlentities($visible_name) : $visible_name) . '</a>';
164
165
        return $rowarray;
166
    }
167
168
    public function admin_recent_searches($count) {
169
170
        $q = $this->db->query("SELECT query_string, page_number, count_hits, ip_address, query_time
171
                FROM search_query_log ORDER BY query_time desc LIMIT $count");
172
        $searches_array = [];
173
        foreach ($q as $row) {
174
            array_push($searches_array, $this->_db_row_to_array($row));
175
        }
176
177
        return $searches_array;
178
    }
179
180
    public function admin_popular_searches($count) {
181
182
        $q =  $this->db->query("SELECT query_string, count(*) AS c FROM search_query_log
183
                WHERE count_hits != 0 AND query_string NOT LIKE '%speaker:%'
184
                AND query_time > date_sub(NOW(), INTERVAL 30 DAY)
185
                GROUP BY query_string ORDER BY c desc LIMIT $count;");
186
187
        $popular_searches = [];
188
        foreach ($q as $row) {
189
            array_push($popular_searches, $this->_db_row_to_array($row));
190
        }
191
192
        return $popular_searches;
193
    }
194
195
196
    public function admin_failed_searches() {
197
198
        $q = $this->db->query("SELECT query_string, page_number, count_hits, ip_address, query_time,
199
                COUNT(*) AS group_count, MIN(query_time) AS min_time, MAX(query_time) AS max_time,
200
                COUNT(distinct ip_address) as count_ips
201
                FROM search_query_log GROUP BY query_string HAVING count_hits = 0
202
                ORDER BY count_ips DESC, max_time DESC");
203
        $searches_array = [];
204
        foreach ($q as $row) {
205
            array_push($searches_array, $this->_db_row_to_array($row));
206
        }
207
208
        return $searches_array;
209
    }
210
211
}
212
213
global $SEARCHLOG;
214
$SEARCHLOG = new SEARCHLOG();
215