NaiveBayesianStorage   A
last analyzed

Complexity

Total Complexity 23

Size/Duplication

Total Lines 210
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 70
c 1
b 0
f 0
dl 0
loc 210
rs 10
wmc 23

11 Methods

Rating   Name   Duplication   Size   Complexity  
A getCategories() 0 14 2
A saveReference() 0 3 1
A removeWord() 0 15 4
A updateWord() 0 16 3
A getWord() 0 15 2
A removeReference() 0 3 1
A cleanVar() 0 3 1
A updateProbabilities() 0 21 4
A wordExists() 0 13 2
A __construct() 0 4 1
A getReference() 0 17 2
1
<?php declare(strict_types=1);
2
3
namespace XoopsModules\Xhelp;
4
5
/*
6
 ***** BEGIN LICENSE BLOCK *****
7
 This file is part of PHP Naive Bayesian Filter.
8
9
 The Initial Developer of the Original Code is
10
 Loic d'Anterroches [loic_at_xhtml.net].
11
 Portions created by the Initial Developer are Copyright (C) 2003
12
 the Initial Developer. All Rights Reserved.
13
14
 Contributor(s):
15
 See the source
16
17
 PHP Naive Bayesian Filter is free software; you can redistribute it
18
 and/or modify it under the terms of the GNU General Public License as
19
 published by the Free Software Foundation; either version 2 of
20
 the License, or (at your option) any later version.
21
22
 PHP Naive Bayesian Filter is distributed in the hope that it will
23
 be useful, but WITHOUT ANY WARRANTY; without even the implied
24
 warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
25
 See the GNU General Public License for more details.
26
27
 You should have received a copy of the GNU General Public License
28
 along with Foobar; if not, write to the Free Software
29
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
30
31
 Alternatively, the contents of this file may be used under the terms of
32
 the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
33
 in which case the provisions of the LGPL are applicable instead
34
 of those above.
35
36
 ***** END LICENSE BLOCK *****
37
 */
38
39
/** Access to the storage of the data for the filter.
40
 *
41
 * To avoid dependency with respect to any database, this class handle all the
42
 * access to the data storage. You can provide your own class as long as
43
 * all the methods are available. The current one rely on a MySQL database.
44
 *
45
 * methods:
46
 * - array getCategories()
47
 * - bool  wordExists(string $word)
48
 * - array getWord(string $word, string $categoryid)
49
 */
50
class NaiveBayesianStorage
51
{
52
    public $db;
53
    public $myts;
54
55
    /**
56
     * NaiveBayesianStorage constructor.
57
     */
58
    public function __construct()
59
    {
60
        $this->db   = \XoopsDatabaseFactory::getDatabaseConnection();
61
        $this->myts = \MyTextSanitizer::getInstance();
62
    }
63
64
    /** get the list of categories with basic data.
65
     *
66
     * @return array key = category ids, values = array(keys = 'probability', 'word_count')
67
     */
68
    public function getCategories(): array
69
    {
70
        $categories = [];
71
72
        $ret = $this->db->query('SELECT * FROM ' . $this->db->prefix('xhelp_bayes_categories'));
73
74
        while (false !== ($arr = $this->db->fetchRow($ret))) {
75
            $categories[$arr['category_id']] = [
76
                'probability' => $arr['probability'],
77
                'word_count'  => $arr['word_count'],
78
            ];
79
        }
80
81
        return $categories;
82
    }
83
84
    /** see if the word is an already learnt word.
85
     * @param mixed $word
86
     * @return bool
87
     */
88
    public function wordExists($word): bool
89
    {
90
        $criteria = new \Criteria('word', $word);
91
92
        $ret = $this->db->query('SELECT COUNT(*) AS WordCount FROM ' . $this->db->prefix('xhelp_bayes_wordfreqs') . $criteria->renderWhere());
93
94
        if (!$ret) {
95
            return false;
96
        }
97
98
        $arr = $this->db->fetchRow($ret);
99
100
        return $arr['WordCount'] > 0;
101
    }
102
103
    /** get details of a word in a category.
104
     * @param mixed $word
105
     * @param mixed $category_id
106
     * @return array ('count' => count)
107
     */
108
    public function getWord($word, $category_id): array
109
    {
110
        $details  = [];
111
        $criteria = new \CriteriaCompo(new \Criteria('word', $word));
112
        $criteria->add(new \Criteria('category_id', $category_id));
113
114
        $ret = $this->db->query('SELECT count FROM ' . $this->db->prefix('xhelp_bayes_wordfreqs') . $criteria->renderWhere());
115
116
        if ($ret) {
117
            $details = $this->db->fetchRow($ret);
118
        } else {
119
            $details['count'] = 0;
120
        }
121
122
        return $details;
123
    }
124
125
    /** update a word in a category.
126
     * If the word is new in this category it is added, else only the count is updated.
127
     *
128
     *
129
     * @param string $word
130
     * @param int    $count
131
     * @param string $category_id
132
     * @return bool success
133
     * @internal param word $string
134
     * @internal param count $int
135
     * @paran    string category id
136
     */
137
    public function updateWord(string $word, int $count, string $category_id): bool
138
    {
139
        $oldword = $this->getWord($word, $category_id);
140
        if (0 == $oldword['count']) {
141
            $sql = \sprintf('INSERT INTO `%s` (word, category_id, COUNT) VALUES (%s, %s, %d)', $this->db->prefix('xhelp_bayes_wordfreqs'), $this->db->quoteString($this->cleanVar($word)), $this->db->quoteString($this->cleanVar($category_id)), $count);
142
        } else {
143
            $sql = \sprintf('UPDATE `%s` SET COUNT+=%d WHERE category_id = %s AND word = %s', $this->db->prefix('xhelp_bayes_wordfreqs'), $count, $this->db->quoteString($this->cleanVar($category_id)), $this->db->quoteString($this->cleanVar($word)));
144
        }
145
146
        $ret = $this->db->query($sql);
147
148
        if (!$ret) {
149
            return false;
150
        }
151
152
        return true;
153
    }
154
155
    /** remove a word from a category.
156
     *
157
     * @param string $word
158
     * @param int    $count
159
     * @param string $category_id
160
     * @return bool success
161
     */
162
    public function removeWord(string $word, int $count, string $category_id): bool
163
    {
164
        $oldword = $this->getWord($word, $category_id);
165
        if (0 != $oldword['count'] && 0 >= ($oldword['count'] - $count)) {
166
            $sql = \sprintf('DELETE FROM `%s` WHERE word = %s AND category_id = %s', $this->db->prefix('xhelp_bayes_wordfreqs'), $this->db->quoteString($this->cleanVar($word)), $this->db->quoteString($this->cleanVar($category_id)));
167
        } else {
168
            $sql = \sprintf('UPDATE `%s` SET COUNT-=%d WHERE category_id = %s AND word = %s', $this->db->prefix('xhelp_bayes_wordfreqs'), $count, $this->db->quoteString($this->cleanVar($category_id)), $this->db->quoteString($this->cleanVar($word)));
169
        }
170
        $ret = $this->db->query($sql);
171
172
        if (!$ret) {
173
            return false;
174
        }
175
176
        return true;
177
    }
178
179
    /** update the probabilities of the categories and word count.
180
     * This function must be run after a set of training
181
     *
182
     * @return bool sucess
183
     */
184
    public function updateProbabilities(): bool
185
    {
186
        // first update the word count of each category
187
        $ret         = $this->db->query('SELECT category_id, SUM(count) AS total FROM ' . $this->db->prefix('xhelp_bayes_wordfreqs') . ' GROUP BY category_id');
188
        $total_words = 0;
189
        while (false !== ($arr = $this->db->fetchRow($ret))) {
190
            $total_words              += $arr['total'];
191
            $cat[$arr['category_id']] = $arr['total'];
192
        }
193
        if (0 == $total_words) {
0 ignored issues
show
introduced by
The condition 0 == $total_words is always true.
Loading history...
194
            $this->db->query('UPDATE ' . $this->db->prefix('xhelp_bayes_wordfreqs') . ' SET word_count=0, probability=0');
195
196
            return true;
197
        }
198
        foreach ($cat as $cat_id => $cat_total) {
0 ignored issues
show
Comprehensibility Best Practice introduced by
The variable $cat does not seem to be defined for all execution paths leading up to this point.
Loading history...
199
            //Calculate each category's probability
200
            $proba = $cat_total / $total_words;
201
            $this->db->query(\sprintf('UPDATE `%s` SET word_count = %d, probability = %f WHERE category_id = %s', $this->db->prefix('xhelp_bayes_wordfreqs'), $cat_total, $proba, $this->db->quoteString($this->cleanVar($cat_id))));
202
        }
203
204
        return true;
205
    }
206
207
    /** save a reference in the database.
208
     *
209
     * @param mixed $doc_id
210
     * @param mixed $category_id
211
     * @param mixed $content
212
     * @return bool success
213
     */
214
    public function saveReference($doc_id, $category_id, $content): bool
0 ignored issues
show
Unused Code introduced by
The parameter $content is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

214
    public function saveReference($doc_id, $category_id, /** @scrutinizer ignore-unused */ $content): bool

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
Unused Code introduced by
The parameter $doc_id is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

214
    public function saveReference(/** @scrutinizer ignore-unused */ $doc_id, $category_id, $content): bool

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
Unused Code introduced by
The parameter $category_id is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

214
    public function saveReference($doc_id, /** @scrutinizer ignore-unused */ $category_id, $content): bool

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
215
    {
216
        return true;
217
    }
218
219
    /** get a reference from the database.
220
     *
221
     * @param mixed $doc_id
222
     * @return array reference( category_id => ...., content => ....)
223
     */
224
    public function getReference($doc_id): array
225
    {
226
        $helper = Helper::getInstance();
227
        /** @var \XoopsModules\Xhelp\TicketHandler $ticketHandler */
228
        $ticketHandler = $helper->getHandler('Ticket');
229
        $ticket        = $ticketHandler->get($doc_id);
230
        $ref           = [];
231
232
        if (!$ticket) {
233
            return $ref;
234
        }
235
236
        $ref['id']          = $ticket->getVar('ticketid');
237
        $ref['content']     = $ticket->getVar('subject') . "\r\n" . $ticket->getVar('description');
238
        $ref['category_id'] = 'P' . $ticket->getVar('ticketid');
239
240
        return $ref;
241
    }
242
243
    /** remove a reference from the database
244
     *
245
     * @param mixed $doc_id
246
     * @return bool sucess
247
     */
248
    public function removeReference($doc_id): bool
0 ignored issues
show
Unused Code introduced by
The parameter $doc_id is not used and could be removed. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unused  annotation

248
    public function removeReference(/** @scrutinizer ignore-unused */ $doc_id): bool

This check looks for parameters that have been defined for a function or method, but which are not used in the method body.

Loading history...
249
    {
250
        return true;
251
    }
252
253
    /**
254
     * @param string $var
255
     * @return string
256
     */
257
    private function cleanVar(string $var): string
258
    {
259
        return $this->myts->censorString($var);
0 ignored issues
show
Deprecated Code introduced by
The function MyTextSanitizer::censorString() has been deprecated. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-deprecated  annotation

259
        return /** @scrutinizer ignore-deprecated */ $this->myts->censorString($var);
Loading history...
260
    }
261
}
262