|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/* |
|
4
|
|
|
For doing stuff with searchlogs. |
|
5
|
|
|
|
|
6
|
|
|
To add a new searchlog do this: |
|
7
|
|
|
global $SEARCHLOG; |
|
8
|
|
|
$SEARCHLOG->add( |
|
9
|
|
|
array('query' => $searchstring, |
|
10
|
|
|
'page' => $page, |
|
11
|
|
|
'hits' => $count)); |
|
12
|
|
|
The date/time and IP address are automatically stored. |
|
13
|
|
|
|
|
14
|
|
|
To get the ten most popular searches in the last day: |
|
15
|
|
|
global $SEARCHLOG; |
|
16
|
|
|
$popular_searches = $SEARCHLOG->popular_recent(10); |
|
17
|
|
|
The return value is an array. Each element of the form |
|
18
|
|
|
array( 'query' => '"new york"', |
|
19
|
|
|
'visible_name' => 'new york', |
|
20
|
|
|
'url' => 'https://www.theyworkforyou.com/search/?s=%22new+york%22&pop=1', |
|
21
|
|
|
'display' => '<a href="https://www.theyworkforyou.com/search/?s=%22new+york%22&pop=1">new york</a>") |
|
22
|
|
|
Note that the url includes "pop=1" which stops popular searches feeding back |
|
23
|
|
|
into being more popular. |
|
24
|
|
|
|
|
25
|
|
|
*/ |
|
26
|
|
|
|
|
27
|
|
|
class SEARCHLOG { |
|
28
|
|
|
private $SEARCHURL; |
|
29
|
|
|
private $db; |
|
30
|
|
|
|
|
31
|
|
|
public function __construct() { |
|
32
|
|
|
$this->SEARCHURL = new \MySociety\TheyWorkForYou\Url('search'); |
|
33
|
|
|
|
|
34
|
|
|
$this->db = new ParlDB(); |
|
35
|
|
|
} |
|
36
|
|
|
|
|
37
|
|
|
public function add($searchlogdata) { |
|
38
|
|
|
|
|
39
|
|
|
$ip = getenv('REMOTE_ADDR'); |
|
40
|
|
|
if (preg_match('#66\.249\.(6[4-9]|[78]\d|9[0-5])\.#', $ip)) { # Googlebot |
|
41
|
|
|
return; |
|
42
|
|
|
} |
|
43
|
|
|
if (isset($_SERVER['HTTP_USER_AGENT']) && preg_match('#simplepie|bot#i', $_SERVER['HTTP_USER_AGENT'])) { |
|
44
|
|
|
return; |
|
45
|
|
|
} |
|
46
|
|
|
|
|
47
|
|
|
$this->db->query("INSERT INTO search_query_log ( |
|
48
|
|
|
query_string, |
|
49
|
|
|
page_number, |
|
50
|
|
|
count_hits, |
|
51
|
|
|
ip_address, |
|
52
|
|
|
query_time |
|
53
|
|
|
) |
|
54
|
|
|
VALUES ( |
|
55
|
|
|
:query_string, |
|
56
|
|
|
:page_number, |
|
57
|
|
|
:count_hits, |
|
58
|
|
|
:ip_address, |
|
59
|
|
|
NOW() |
|
60
|
|
|
) |
|
61
|
|
|
", [ |
|
62
|
|
|
':query_string' => $searchlogdata['query'], |
|
63
|
|
|
':page_number' => $searchlogdata['page'], |
|
64
|
|
|
':count_hits' => $searchlogdata['hits'], |
|
65
|
|
|
':ip_address' => $ip, |
|
66
|
|
|
]); |
|
67
|
|
|
|
|
68
|
|
|
} |
|
69
|
|
|
|
|
70
|
|
|
// Select popular queries |
|
71
|
|
|
public function popular_recent(int $count, ?int $max_chars = null, ?int $minimum_hits = 5, ?string $section_string = null) { |
|
72
|
|
|
|
|
73
|
|
|
// Banned terms |
|
74
|
|
|
$banned_terms = ["twat"]; |
|
75
|
|
|
|
|
76
|
|
|
$query = "SELECT query_string, COUNT(DISTINCT ip_address) AS c FROM search_query_log |
|
77
|
|
|
WHERE count_hits != 0 AND page_number=1 |
|
78
|
|
|
AND query_time > date_sub(NOW(), INTERVAL 3 DAY)"; |
|
79
|
|
|
|
|
80
|
|
|
// allow limiting by section (e.g. to show a Scotland specific search) |
|
81
|
|
|
if (!empty($section_string)) { |
|
82
|
|
|
$query .= " AND query_string LIKE :section_string"; |
|
83
|
|
|
} |
|
84
|
|
|
|
|
85
|
|
|
$query .= " GROUP BY query_string HAVING c >= :minimum_hits |
|
86
|
|
|
ORDER BY c desc LIMIT :count;"; |
|
87
|
|
|
|
|
88
|
|
|
$params = [ |
|
89
|
|
|
':minimum_hits' => $minimum_hits, |
|
90
|
|
|
':count' => $count, |
|
91
|
|
|
]; |
|
92
|
|
|
|
|
93
|
|
|
if (!empty($section_string)) { |
|
94
|
|
|
$params[':section_string'] = '%' . $section_string . '%'; |
|
95
|
|
|
} |
|
96
|
|
|
|
|
97
|
|
|
$q = $this->db->query($query, $params); |
|
98
|
|
|
|
|
99
|
|
|
|
|
100
|
|
|
$popular_searches = []; |
|
101
|
|
|
foreach ($q as $row) { |
|
102
|
|
|
$row_array = $this->_db_row_to_array($row, $section_string); |
|
103
|
|
|
|
|
104
|
|
|
// exclude all queries where a banned term is part of the query |
|
105
|
|
|
$banned = false; |
|
106
|
|
|
foreach ($banned_terms as $term) { |
|
107
|
|
|
if (stripos($row_array['query'], $term) !== false) { |
|
108
|
|
|
$banned = true; |
|
109
|
|
|
break; |
|
110
|
|
|
} |
|
111
|
|
|
} |
|
112
|
|
|
if (!$banned) { |
|
113
|
|
|
array_push($popular_searches, $row_array); |
|
114
|
|
|
} |
|
115
|
|
|
} |
|
116
|
|
|
|
|
117
|
|
|
//maximum number of chars? |
|
118
|
|
|
if (isset($max_chars) && $max_chars > 0) { |
|
119
|
|
|
$lentotal = 0; |
|
120
|
|
|
$correct_amount = []; |
|
121
|
|
|
// Select a number of queries that will fit in the space |
|
122
|
|
|
foreach ($popular_searches as $popular_search) { |
|
123
|
|
|
$len = strlen($popular_search['visible_name']); |
|
124
|
|
|
if ($lentotal + $len > $max_chars) { |
|
125
|
|
|
continue; |
|
126
|
|
|
} |
|
127
|
|
|
$lentotal += $len; |
|
128
|
|
|
array_push($correct_amount, $popular_search); |
|
129
|
|
|
} |
|
130
|
|
|
$popular_searches = $correct_amount; |
|
131
|
|
|
} |
|
132
|
|
|
|
|
133
|
|
|
return $popular_searches; |
|
134
|
|
|
} |
|
135
|
|
|
|
|
136
|
|
|
public function _db_row_to_array($row, $section_string = null) { |
|
137
|
|
|
$query = $row['query_string']; |
|
138
|
|
|
$this->SEARCHURL->insert(['s' => $query, 'pop' => 1]); |
|
139
|
|
|
$url = $this->SEARCHURL->generate(); |
|
140
|
|
|
$htmlescape = 1; |
|
141
|
|
|
if (preg_match('#speaker:(\d+)#', $query, $m)) { |
|
142
|
|
|
$qq = $this->db->query('SELECT house, title, given_name, family_name, lordofname |
|
143
|
|
|
FROM member, person_names pn |
|
144
|
|
|
WHERE member.person_id = pn.person_id and member.person_id=:pid |
|
145
|
|
|
AND pn.type="name" AND pn.end_date = (SELECT MAX(end_date) from person_names where person_names.person_id=member.person_id) |
|
146
|
|
|
ORDER BY end_date DESC LIMIT 1', [':pid' => $m[1]])->first(); |
|
147
|
|
|
if ($qq) { |
|
148
|
|
|
$name = member_full_name($qq['house'], $qq['title'], $qq['given_name'], $qq['family_name'], $qq['lordofname']); |
|
149
|
|
|
$query = preg_replace('#speaker:(\d+)#', $name, $query); |
|
150
|
|
|
#$htmlescape = 0; |
|
151
|
|
|
} |
|
152
|
|
|
} |
|
153
|
|
|
$visible_name = preg_replace('/"/', '', $query); |
|
154
|
|
|
|
|
155
|
|
|
if (isset($section_string)) { |
|
156
|
|
|
$visible_name = preg_replace('/' . preg_quote($section_string) . '/', '', $visible_name); |
|
157
|
|
|
} |
|
158
|
|
|
|
|
159
|
|
|
$rowarray = $row; |
|
160
|
|
|
$rowarray['query'] = $query; |
|
161
|
|
|
$rowarray['visible_name'] = $visible_name; |
|
162
|
|
|
$rowarray['url'] = $url; |
|
163
|
|
|
$rowarray['display'] = '<a href="' . $url . '">' . ($htmlescape ? _htmlentities($visible_name) : $visible_name) . '</a>'; |
|
164
|
|
|
|
|
165
|
|
|
return $rowarray; |
|
166
|
|
|
} |
|
167
|
|
|
|
|
168
|
|
|
public function admin_recent_searches($count) { |
|
169
|
|
|
|
|
170
|
|
|
$q = $this->db->query("SELECT query_string, page_number, count_hits, ip_address, query_time |
|
171
|
|
|
FROM search_query_log ORDER BY query_time desc LIMIT $count"); |
|
172
|
|
|
$searches_array = []; |
|
173
|
|
|
foreach ($q as $row) { |
|
174
|
|
|
array_push($searches_array, $this->_db_row_to_array($row)); |
|
175
|
|
|
} |
|
176
|
|
|
|
|
177
|
|
|
return $searches_array; |
|
178
|
|
|
} |
|
179
|
|
|
|
|
180
|
|
|
public function admin_popular_searches($count) { |
|
181
|
|
|
|
|
182
|
|
|
$q = $this->db->query("SELECT query_string, count(*) AS c FROM search_query_log |
|
183
|
|
|
WHERE count_hits != 0 AND query_string NOT LIKE '%speaker:%' |
|
184
|
|
|
AND query_time > date_sub(NOW(), INTERVAL 30 DAY) |
|
185
|
|
|
GROUP BY query_string ORDER BY c desc LIMIT $count;"); |
|
186
|
|
|
|
|
187
|
|
|
$popular_searches = []; |
|
188
|
|
|
foreach ($q as $row) { |
|
189
|
|
|
array_push($popular_searches, $this->_db_row_to_array($row)); |
|
190
|
|
|
} |
|
191
|
|
|
|
|
192
|
|
|
return $popular_searches; |
|
193
|
|
|
} |
|
194
|
|
|
|
|
195
|
|
|
|
|
196
|
|
|
public function admin_failed_searches() { |
|
197
|
|
|
|
|
198
|
|
|
$q = $this->db->query("SELECT query_string, page_number, count_hits, ip_address, query_time, |
|
199
|
|
|
COUNT(*) AS group_count, MIN(query_time) AS min_time, MAX(query_time) AS max_time, |
|
200
|
|
|
COUNT(distinct ip_address) as count_ips |
|
201
|
|
|
FROM search_query_log GROUP BY query_string HAVING count_hits = 0 |
|
202
|
|
|
ORDER BY count_ips DESC, max_time DESC"); |
|
203
|
|
|
$searches_array = []; |
|
204
|
|
|
foreach ($q as $row) { |
|
205
|
|
|
array_push($searches_array, $this->_db_row_to_array($row)); |
|
206
|
|
|
} |
|
207
|
|
|
|
|
208
|
|
|
return $searches_array; |
|
209
|
|
|
} |
|
210
|
|
|
|
|
211
|
|
|
} |
|
212
|
|
|
|
|
213
|
|
|
global $SEARCHLOG; |
|
214
|
|
|
$SEARCHLOG = new SEARCHLOG(); |
|
215
|
|
|
|