Test Failed
Pull Request — master (#54)
by
unknown
02:33
created

Apriori::train()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 5
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 5
rs 9.4285
c 0
b 0
f 0
cc 1
eloc 3
nc 1
nop 2
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Association;
6
7
use Phpml\Helper\Predictable;
8
use Phpml\Helper\Trainable;
9
10
class Apriori implements Associator
11
{
12
    use Trainable, Predictable;
13
14
    const ARRAY_KEY_ANTECEDENT = 'antecedent';
15
16
    const ARRAY_KEY_CONFIDENCE = 'confidence';
17
18
    const ARRAY_KEY_CONSEQUENT = 'consequent';
19
20
    const ARRAY_KEY_SUPPORT = 'support';
21
22
    /**
23
     * Minimum relative probability of frequent transactions.
24
     *
25
     * @var float
26
     */
27
    private $confidence;
28
29
    /**
30
     * The large set contains frequent k-length item sets.
31
     *
32
     * @var mixed[][][]
33
     */
34
    private $large;
35
36
    /**
37
     * Minimum relative frequency of transactions.
38
     *
39
     * @var float
40
     */
41
    private $support;
42
43
    /**
44
     * The generated Apriori association rules.
45
     *
46
     * @var mixed[][]
47
     */
48
    private $rules;
49
50
    /**
51
     * Apriori constructor.
52
     *
53
     * @param float $support
54
     * @param float $confidence
55
     */
56
    public function __construct(float $support = 0.0, float $confidence = 0.0)
57
    {
58
        $this->support = $support;
59
        $this->confidence = $confidence;
60
    }
61
62
    /**
63
     * @param array $samples
64
     * @param array $targets
65
     */
66
    public function train(array $samples, array $targets)
67
    {
68
        $this->samples = array_merge($this->samples, $samples);
69
        $this->targets = array_merge($this->targets, $targets);
70
    }
71
72
    /**
73
     * Get all association rules which are generated for every k-length frequent item set.
74
     *
75
     * @return mixed[][]
76
     */
77
    public function getRules() : array
78
    {
79
        if (!$this->large) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->large of type array[][] is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
80
            $this->large = $this->apriori();
81
        }
82
83
        if ($this->rules) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->rules of type array[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
84
            return $this->rules;
85
        }
86
87
        $this->rules = [];
88
89
        $this->generateAllRules();
90
91
        return $this->rules;
92
    }
93
94
    /**
95
     * Generates frequent item sets.
96
     *
97
     * @return mixed[][][]
98
     */
99
    public function apriori() : array
100
    {
101
        $L = [];
102
        $L[1] = $this->items();
103
        $L[1] = $this->frequent($L[1]);
104
105
        for ($k = 2; !empty($L[$k - 1]); ++$k) {
106
            $L[$k] = $this->candidates($L[$k - 1]);
107
            $L[$k] = $this->frequent($L[$k]);
108
        }
109
110
        return $L;
111
    }
112
113
    /**
114
     * @param mixed[] $sample
115
     *
116
     * @return mixed[][]
117
     */
118
    protected function predictSample(array $sample) : array
119
    {
120
        $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
121
            return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
122
        }));
123
124
        return array_map(function ($rule) {
125
            return $rule[self::ARRAY_KEY_CONSEQUENT];
126
        }, $predicts);
127
    }
128
129
    /**
130
     * Generate rules for each k-length frequent item set.
131
     */
132
    private function generateAllRules()
133
    {
134
        for ($k = 2; !empty($this->large[$k]); ++$k) {
135
            foreach ($this->large[$k] as $frequent) {
136
                $this->generateRules($frequent);
137
            }
138
        }
139
    }
140
141
    /**
142
     * Generate confident rules for frequent item set.
143
     *
144
     * @param mixed[] $frequent
145
     */
146
    private function generateRules(array $frequent)
147
    {
148
        foreach ($this->antecedents($frequent) as $antecedent) {
149
            if ($this->confidence <= ($confidence = $this->confidence($frequent, $antecedent))) {
150
                $consequent = array_values(array_diff($frequent, $antecedent));
151
                $this->rules[] = [
152
                    self::ARRAY_KEY_ANTECEDENT => $antecedent,
153
                    self::ARRAY_KEY_CONSEQUENT => $consequent,
154
                    self::ARRAY_KEY_SUPPORT => $this->support($consequent),
155
                    self::ARRAY_KEY_CONFIDENCE => $confidence,
156
                ];
157
            }
158
        }
159
    }
160
161
    /**
162
     * Generates the power set for given item set $sample.
163
     *
164
     * @param mixed[] $sample
165
     *
166
     * @return mixed[][]
167
     */
168
    private function powerSet(array $sample) : array
169
    {
170
        $results = [[]];
171
        foreach ($sample as $item) {
172
            foreach ($results as $combination) {
173
                $results[] = array_merge([$item], $combination);
174
            }
175
        }
176
177
        return $results;
178
    }
179
180
    /**
181
     * Generates all proper subsets for given set $sample without the empty set.
182
     *
183
     * @param mixed[] $sample
184
     *
185
     * @return mixed[][]
186
     */
187
    private function antecedents(array $sample) : array
188
    {
189
        $cardinality = count($sample);
190
        $antecedents = $this->powerSet($sample);
191
192
        return array_filter($antecedents, function ($antecedent) use ($cardinality) {
193
            return (count($antecedent) != $cardinality) && ($antecedent != []);
194
        });
195
    }
196
197
    /**
198
     * Calculates frequent k = 1 item sets.
199
     *
200
     * @return mixed[][]
201
     */
202
    private function items() : array
203
    {
204
        $items = [];
205
206
        foreach ($this->samples as $sample) {
207
            foreach ($sample as $item) {
208
                if (!in_array($item, $items, true)) {
209
                    $items[] = $item;
210
                }
211
            }
212
        }
213
214
        return array_map(function ($entry) {
215
            return [$entry];
216
        }, $items);
217
    }
218
219
    /**
220
     * Returns frequent item sets only.
221
     *
222
     * @param mixed[][] $samples
223
     *
224
     * @return mixed[][]
225
     */
226
    private function frequent(array $samples) : array
227
    {
228
        return array_filter($samples, function ($entry) {
229
            return $this->support($entry) >= $this->support;
230
        });
231
    }
232
233
    /**
234
     * Calculates frequent k item sets, where count($samples) == $k - 1.
235
     *
236
     * @param mixed[][] $samples
237
     *
238
     * @return mixed[][]
239
     */
240
    private function candidates(array $samples) : array
241
    {
242
        $candidates = [];
243
244
        foreach ($samples as $p) {
245
            foreach ($samples as $q) {
246
                if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) {
247
                    continue;
248
                }
249
250
                $candidate = array_unique(array_merge($p, $q));
251
252
                if ($this->contains($candidates, $candidate)) {
253
                    continue;
254
                }
255
256
                foreach ((array) $this->samples as $sample) {
257
                    if ($this->subset($sample, $candidate)) {
258
                        $candidates[] = $candidate;
259
                        continue 2;
260
                    }
261
                }
262
            }
263
        }
264
265
        return $candidates;
266
    }
267
268
    /**
269
     * Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain
270
     * $set.
271
     *
272
     * @param mixed[] $set
273
     * @param mixed[] $subset
274
     *
275
     * @return float
276
     */
277
    private function confidence(array $set, array $subset) : float
278
    {
279
        return $this->support($set) / $this->support($subset);
280
    }
281
282
    /**
283
     * Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data
284
     * pool.
285
     *
286
     * @see \Phpml\Association\Apriori::samples
287
     *
288
     * @param mixed[] $sample
289
     *
290
     * @return float
291
     */
292
    private function support(array $sample) : float
293
    {
294
        return $this->frequency($sample) / count($this->samples);
295
    }
296
297
    /**
298
     * Counts occurrences of $sample as subset in data pool.
299
     *
300
     * @see \Phpml\Association\Apriori::samples
301
     *
302
     * @param mixed[] $sample
303
     *
304
     * @return int
305
     */
306
    private function frequency(array $sample) : int
307
    {
308
        return count(array_filter($this->samples, function ($entry) use ($sample) {
309
            return $this->subset($entry, $sample);
310
        }));
311
    }
312
313
    /**
314
     * Returns true if set is an element of system.
315
     *
316
     * @see \Phpml\Association\Apriori::equals()
317
     *
318
     * @param mixed[][] $system
319
     * @param mixed[]   $set
320
     *
321
     * @return bool
322
     */
323
    private function contains(array $system, array $set) : bool
324
    {
325
        return (bool) array_filter($system, function ($entry) use ($set) {
326
            return $this->equals($entry, $set);
327
        });
328
    }
329
330
    /**
331
     * Returns true if subset is a (proper) subset of set by its items string representation.
332
     *
333
     * @param mixed[] $set
334
     * @param mixed[] $subset
335
     *
336
     * @return bool
337
     */
338
    private function subset(array $set, array $subset) : bool
339
    {
340
        return !array_diff($subset, array_intersect($subset, $set));
341
    }
342
343
    /**
344
     * Returns true if string representation of items does not differ.
345
     *
346
     * @param mixed[] $set1
347
     * @param mixed[] $set2
348
     *
349
     * @return bool
350
     */
351
    private function equals(array $set1, array $set2) : bool
352
    {
353
        return array_diff($set1, $set2) == array_diff($set2, $set1);
354
    }
355
}
356