1
|
|
|
<?php declare(strict_types=1); |
2
|
|
|
|
3
|
|
|
namespace XoopsModules\Xhelp; |
4
|
|
|
|
5
|
|
|
/* |
6
|
|
|
***** BEGIN LICENSE BLOCK ***** |
7
|
|
|
This file is part of PHP Naive Bayesian Filter. |
8
|
|
|
|
9
|
|
|
The Initial Developer of the Original Code is |
10
|
|
|
Loic d'Anterroches [loic_at_xhtml.net]. |
11
|
|
|
Portions created by the Initial Developer are Copyright (C) 2003 |
12
|
|
|
the Initial Developer. All Rights Reserved. |
13
|
|
|
|
14
|
|
|
Contributor(s): |
15
|
|
|
See the source |
16
|
|
|
|
17
|
|
|
PHP Naive Bayesian Filter is free software; you can redistribute it |
18
|
|
|
and/or modify it under the terms of the GNU General Public License as |
19
|
|
|
published by the Free Software Foundation; either version 2 of |
20
|
|
|
the License, or (at your option) any later version. |
21
|
|
|
|
22
|
|
|
PHP Naive Bayesian Filter is distributed in the hope that it will |
23
|
|
|
be useful, but WITHOUT ANY WARRANTY; without even the implied |
24
|
|
|
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. |
25
|
|
|
See the GNU General Public License for more details. |
26
|
|
|
|
27
|
|
|
You should have received a copy of the GNU General Public License |
28
|
|
|
along with Foobar; if not, write to the Free Software |
29
|
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
30
|
|
|
|
31
|
|
|
Alternatively, the contents of this file may be used under the terms of |
32
|
|
|
the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), |
33
|
|
|
in which case the provisions of the LGPL are applicable instead |
34
|
|
|
of those above. |
35
|
|
|
|
36
|
|
|
***** END LICENSE BLOCK ***** |
37
|
|
|
*/ |
38
|
|
|
|
39
|
|
|
/** Access to the storage of the data for the filter. |
40
|
|
|
* |
41
|
|
|
* To avoid dependency with respect to any database, this class handle all the |
42
|
|
|
* access to the data storage. You can provide your own class as long as |
43
|
|
|
* all the methods are available. The current one rely on a MySQL database. |
44
|
|
|
* |
45
|
|
|
* methods: |
46
|
|
|
* - array getCategories() |
47
|
|
|
* - bool wordExists(string $word) |
48
|
|
|
* - array getWord(string $word, string $categoryid) |
49
|
|
|
*/ |
50
|
|
|
class NaiveBayesianStorage |
51
|
|
|
{ |
52
|
|
|
public $db; |
53
|
|
|
public $myts; |
54
|
|
|
|
55
|
|
|
/** |
56
|
|
|
* NaiveBayesianStorage constructor. |
57
|
|
|
*/ |
58
|
|
|
public function __construct() |
59
|
|
|
{ |
60
|
|
|
$this->db = \XoopsDatabaseFactory::getDatabaseConnection(); |
61
|
|
|
$this->myts = \MyTextSanitizer::getInstance(); |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
/** get the list of categories with basic data. |
65
|
|
|
* |
66
|
|
|
* @return array key = category ids, values = array(keys = 'probability', 'word_count') |
67
|
|
|
*/ |
68
|
|
|
public function getCategories(): array |
69
|
|
|
{ |
70
|
|
|
$categories = []; |
71
|
|
|
|
72
|
|
|
$ret = $this->db->query('SELECT * FROM ' . $this->db->prefix('xhelp_bayes_categories')); |
73
|
|
|
|
74
|
|
|
while (false !== ($arr = $this->db->fetchRow($ret))) { |
75
|
|
|
$categories[$arr['category_id']] = [ |
76
|
|
|
'probability' => $arr['probability'], |
77
|
|
|
'word_count' => $arr['word_count'], |
78
|
|
|
]; |
79
|
|
|
} |
80
|
|
|
|
81
|
|
|
return $categories; |
82
|
|
|
} |
83
|
|
|
|
84
|
|
|
/** see if the word is an already learnt word. |
85
|
|
|
* @param mixed $word |
86
|
|
|
* @return bool |
87
|
|
|
*/ |
88
|
|
|
public function wordExists($word): bool |
89
|
|
|
{ |
90
|
|
|
$criteria = new \Criteria('word', $word); |
91
|
|
|
|
92
|
|
|
$ret = $this->db->query('SELECT COUNT(*) AS WordCount FROM ' . $this->db->prefix('xhelp_bayes_wordfreqs') . $criteria->renderWhere()); |
93
|
|
|
|
94
|
|
|
if (!$ret) { |
95
|
|
|
return false; |
96
|
|
|
} |
97
|
|
|
|
98
|
|
|
$arr = $this->db->fetchRow($ret); |
99
|
|
|
|
100
|
|
|
return $arr['WordCount'] > 0; |
101
|
|
|
} |
102
|
|
|
|
103
|
|
|
/** get details of a word in a category. |
104
|
|
|
* @param mixed $word |
105
|
|
|
* @param mixed $category_id |
106
|
|
|
* @return array ('count' => count) |
107
|
|
|
*/ |
108
|
|
|
public function getWord($word, $category_id): array |
109
|
|
|
{ |
110
|
|
|
$details = []; |
111
|
|
|
$criteria = new \CriteriaCompo(new \Criteria('word', $word)); |
112
|
|
|
$criteria->add(new \Criteria('category_id', $category_id)); |
113
|
|
|
|
114
|
|
|
$ret = $this->db->query('SELECT count FROM ' . $this->db->prefix('xhelp_bayes_wordfreqs') . $criteria->renderWhere()); |
115
|
|
|
|
116
|
|
|
if ($ret) { |
117
|
|
|
$details = $this->db->fetchRow($ret); |
118
|
|
|
} else { |
119
|
|
|
$details['count'] = 0; |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
return $details; |
123
|
|
|
} |
124
|
|
|
|
125
|
|
|
/** update a word in a category. |
126
|
|
|
* If the word is new in this category it is added, else only the count is updated. |
127
|
|
|
* |
128
|
|
|
* |
129
|
|
|
* @param string $word |
130
|
|
|
* @param int $count |
131
|
|
|
* @param string $category_id |
132
|
|
|
* @return bool success |
133
|
|
|
* @internal param word $string |
134
|
|
|
* @internal param count $int |
135
|
|
|
* @paran string category id |
136
|
|
|
*/ |
137
|
|
|
public function updateWord(string $word, int $count, string $category_id): bool |
138
|
|
|
{ |
139
|
|
|
$oldword = $this->getWord($word, $category_id); |
140
|
|
|
if (0 == $oldword['count']) { |
141
|
|
|
$sql = \sprintf('INSERT INTO `%s` (word, category_id, COUNT) VALUES (%s, %s, %d)', $this->db->prefix('xhelp_bayes_wordfreqs'), $this->db->quoteString($this->cleanVar($word)), $this->db->quoteString($this->cleanVar($category_id)), $count); |
142
|
|
|
} else { |
143
|
|
|
$sql = \sprintf('UPDATE `%s` SET COUNT+=%d WHERE category_id = %s AND word = %s', $this->db->prefix('xhelp_bayes_wordfreqs'), $count, $this->db->quoteString($this->cleanVar($category_id)), $this->db->quoteString($this->cleanVar($word))); |
144
|
|
|
} |
145
|
|
|
|
146
|
|
|
$ret = $this->db->query($sql); |
147
|
|
|
|
148
|
|
|
if (!$ret) { |
149
|
|
|
return false; |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
return true; |
153
|
|
|
} |
154
|
|
|
|
155
|
|
|
/** remove a word from a category. |
156
|
|
|
* |
157
|
|
|
* @param string $word |
158
|
|
|
* @param int $count |
159
|
|
|
* @param string $category_id |
160
|
|
|
* @return bool success |
161
|
|
|
*/ |
162
|
|
|
public function removeWord(string $word, int $count, string $category_id): bool |
163
|
|
|
{ |
164
|
|
|
$oldword = $this->getWord($word, $category_id); |
165
|
|
|
if (0 != $oldword['count'] && 0 >= ($oldword['count'] - $count)) { |
166
|
|
|
$sql = \sprintf('DELETE FROM `%s` WHERE word = %s AND category_id = %s', $this->db->prefix('xhelp_bayes_wordfreqs'), $this->db->quoteString($this->cleanVar($word)), $this->db->quoteString($this->cleanVar($category_id))); |
167
|
|
|
} else { |
168
|
|
|
$sql = \sprintf('UPDATE `%s` SET COUNT-=%d WHERE category_id = %s AND word = %s', $this->db->prefix('xhelp_bayes_wordfreqs'), $count, $this->db->quoteString($this->cleanVar($category_id)), $this->db->quoteString($this->cleanVar($word))); |
169
|
|
|
} |
170
|
|
|
$ret = $this->db->query($sql); |
171
|
|
|
|
172
|
|
|
if (!$ret) { |
173
|
|
|
return false; |
174
|
|
|
} |
175
|
|
|
|
176
|
|
|
return true; |
177
|
|
|
} |
178
|
|
|
|
179
|
|
|
/** update the probabilities of the categories and word count. |
180
|
|
|
* This function must be run after a set of training |
181
|
|
|
* |
182
|
|
|
* @return bool sucess |
183
|
|
|
*/ |
184
|
|
|
public function updateProbabilities(): bool |
185
|
|
|
{ |
186
|
|
|
// first update the word count of each category |
187
|
|
|
$ret = $this->db->query('SELECT category_id, SUM(count) AS total FROM ' . $this->db->prefix('xhelp_bayes_wordfreqs') . ' GROUP BY category_id'); |
188
|
|
|
$total_words = 0; |
189
|
|
|
while (false !== ($arr = $this->db->fetchRow($ret))) { |
190
|
|
|
$total_words += $arr['total']; |
191
|
|
|
$cat[$arr['category_id']] = $arr['total']; |
192
|
|
|
} |
193
|
|
|
if (0 == $total_words) { |
|
|
|
|
194
|
|
|
$this->db->query('UPDATE ' . $this->db->prefix('xhelp_bayes_wordfreqs') . ' SET word_count=0, probability=0'); |
195
|
|
|
|
196
|
|
|
return true; |
197
|
|
|
} |
198
|
|
|
foreach ($cat as $cat_id => $cat_total) { |
|
|
|
|
199
|
|
|
//Calculate each category's probability |
200
|
|
|
$proba = $cat_total / $total_words; |
201
|
|
|
$this->db->query(\sprintf('UPDATE `%s` SET word_count = %d, probability = %f WHERE category_id = %s', $this->db->prefix('xhelp_bayes_wordfreqs'), $cat_total, $proba, $this->db->quoteString($this->cleanVar($cat_id)))); |
202
|
|
|
} |
203
|
|
|
|
204
|
|
|
return true; |
205
|
|
|
} |
206
|
|
|
|
207
|
|
|
/** save a reference in the database. |
208
|
|
|
* |
209
|
|
|
* @param mixed $doc_id |
210
|
|
|
* @param mixed $category_id |
211
|
|
|
* @param mixed $content |
212
|
|
|
* @return bool success |
213
|
|
|
*/ |
214
|
|
|
public function saveReference($doc_id, $category_id, $content): bool |
|
|
|
|
215
|
|
|
{ |
216
|
|
|
return true; |
217
|
|
|
} |
218
|
|
|
|
219
|
|
|
/** get a reference from the database. |
220
|
|
|
* |
221
|
|
|
* @param mixed $doc_id |
222
|
|
|
* @return array reference( category_id => ...., content => ....) |
223
|
|
|
*/ |
224
|
|
|
public function getReference($doc_id): array |
225
|
|
|
{ |
226
|
|
|
$helper = Helper::getInstance(); |
227
|
|
|
/** @var \XoopsModules\Xhelp\TicketHandler $ticketHandler */ |
228
|
|
|
$ticketHandler = $helper->getHandler('Ticket'); |
229
|
|
|
$ticket = $ticketHandler->get($doc_id); |
230
|
|
|
$ref = []; |
231
|
|
|
|
232
|
|
|
if (!$ticket) { |
233
|
|
|
return $ref; |
234
|
|
|
} |
235
|
|
|
|
236
|
|
|
$ref['id'] = $ticket->getVar('ticketid'); |
237
|
|
|
$ref['content'] = $ticket->getVar('subject') . "\r\n" . $ticket->getVar('description'); |
238
|
|
|
$ref['category_id'] = 'P' . $ticket->getVar('ticketid'); |
239
|
|
|
|
240
|
|
|
return $ref; |
241
|
|
|
} |
242
|
|
|
|
243
|
|
|
/** remove a reference from the database |
244
|
|
|
* |
245
|
|
|
* @param mixed $doc_id |
246
|
|
|
* @return bool sucess |
247
|
|
|
*/ |
248
|
|
|
public function removeReference($doc_id): bool |
|
|
|
|
249
|
|
|
{ |
250
|
|
|
return true; |
251
|
|
|
} |
252
|
|
|
|
253
|
|
|
/** |
254
|
|
|
* @param string $var |
255
|
|
|
* @return string |
256
|
|
|
*/ |
257
|
|
|
private function cleanVar(string $var): string |
258
|
|
|
{ |
259
|
|
|
return $this->myts->censorString($var); |
|
|
|
|
260
|
|
|
} |
261
|
|
|
} |
262
|
|
|
|