Passed
Push — master ( e83f7b...d953ef )
by Arkadiusz
03:28
created

src/Phpml/Association/Apriori.php (2 issues)

Check for implicit conversion of array to boolean.

Best Practice Bug Minor

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Association;
6
7
use Phpml\Helper\Predictable;
8
use Phpml\Helper\Trainable;
9
10
class Apriori implements Associator
11
{
12
    use Trainable, Predictable;
13
14
    public const ARRAY_KEY_ANTECEDENT = 'antecedent';
15
16
    public const ARRAY_KEY_CONFIDENCE = 'confidence';
17
18
    public const ARRAY_KEY_CONSEQUENT = 'consequent';
19
20
    public const ARRAY_KEY_SUPPORT = 'support';
21
22
    /**
23
     * Minimum relative probability of frequent transactions.
24
     *
25
     * @var float
26
     */
27
    private $confidence;
28
29
    /**
30
     * The large set contains frequent k-length item sets.
31
     *
32
     * @var mixed[][][]
33
     */
34
    private $large = [];
35
36
    /**
37
     * Minimum relative frequency of transactions.
38
     *
39
     * @var float
40
     */
41
    private $support;
42
43
    /**
44
     * The generated Apriori association rules.
45
     *
46
     * @var mixed[][]
47
     */
48
    private $rules = [];
49
50
    /**
51
     * Apriori constructor.
52
     */
53
    public function __construct(float $support = 0.0, float $confidence = 0.0)
54
    {
55
        $this->support = $support;
56
        $this->confidence = $confidence;
57
    }
58
59
    /**
60
     * Get all association rules which are generated for every k-length frequent item set.
61
     *
62
     * @return mixed[][]
63
     */
64
    public function getRules(): array
65
    {
66
        if (!$this->large) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->large of type array[][] is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
67
            $this->large = $this->apriori();
68
        }
69
70
        if ($this->rules) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->rules of type array[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
71
            return $this->rules;
72
        }
73
74
        $this->rules = [];
75
76
        $this->generateAllRules();
77
78
        return $this->rules;
79
    }
80
81
    /**
82
     * Generates frequent item sets.
83
     *
84
     * @return mixed[][][]
85
     */
86
    public function apriori(): array
87
    {
88
        $L = [];
89
        $L[1] = $this->items();
90
        $L[1] = $this->frequent($L[1]);
91
92
        for ($k = 2; !empty($L[$k - 1]); ++$k) {
93
            $L[$k] = $this->candidates($L[$k - 1]);
94
            $L[$k] = $this->frequent($L[$k]);
95
        }
96
97
        return $L;
98
    }
99
100
    /**
101
     * @param mixed[] $sample
102
     *
103
     * @return mixed[][]
104
     */
105
    protected function predictSample(array $sample): array
106
    {
107
        $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
108
            return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
109
        }));
110
111
        return array_map(function ($rule) {
112
            return $rule[self::ARRAY_KEY_CONSEQUENT];
113
        }, $predicts);
114
    }
115
116
    /**
117
     * Generate rules for each k-length frequent item set.
118
     */
119
    private function generateAllRules(): void
120
    {
121
        for ($k = 2; !empty($this->large[$k]); ++$k) {
122
            foreach ($this->large[$k] as $frequent) {
123
                $this->generateRules($frequent);
124
            }
125
        }
126
    }
127
128
    /**
129
     * Generate confident rules for frequent item set.
130
     *
131
     * @param mixed[] $frequent
132
     */
133
    private function generateRules(array $frequent): void
134
    {
135
        foreach ($this->antecedents($frequent) as $antecedent) {
136
            $confidence = $this->confidence($frequent, $antecedent);
137
            if ($this->confidence <= $confidence) {
138
                $consequent = array_values(array_diff($frequent, $antecedent));
139
                $this->rules[] = [
140
                    self::ARRAY_KEY_ANTECEDENT => $antecedent,
141
                    self::ARRAY_KEY_CONSEQUENT => $consequent,
142
                    self::ARRAY_KEY_SUPPORT => $this->support($consequent),
143
                    self::ARRAY_KEY_CONFIDENCE => $confidence,
144
                ];
145
            }
146
        }
147
    }
148
149
    /**
150
     * Generates the power set for given item set $sample.
151
     *
152
     * @param mixed[] $sample
153
     *
154
     * @return mixed[][]
155
     */
156
    private function powerSet(array $sample): array
157
    {
158
        $results = [[]];
159
        foreach ($sample as $item) {
160
            foreach ($results as $combination) {
161
                $results[] = array_merge([$item], $combination);
162
            }
163
        }
164
165
        return $results;
166
    }
167
168
    /**
169
     * Generates all proper subsets for given set $sample without the empty set.
170
     *
171
     * @param mixed[] $sample
172
     *
173
     * @return mixed[][]
174
     */
175
    private function antecedents(array $sample): array
176
    {
177
        $cardinality = count($sample);
178
        $antecedents = $this->powerSet($sample);
179
180
        return array_filter($antecedents, function ($antecedent) use ($cardinality) {
181
            return (count($antecedent) != $cardinality) && ($antecedent != []);
182
        });
183
    }
184
185
    /**
186
     * Calculates frequent k = 1 item sets.
187
     *
188
     * @return mixed[][]
189
     */
190
    private function items(): array
191
    {
192
        $items = [];
193
194
        foreach ($this->samples as $sample) {
195
            foreach ($sample as $item) {
196
                if (!in_array($item, $items, true)) {
197
                    $items[] = $item;
198
                }
199
            }
200
        }
201
202
        return array_map(function ($entry) {
203
            return [$entry];
204
        }, $items);
205
    }
206
207
    /**
208
     * Returns frequent item sets only.
209
     *
210
     * @param mixed[][] $samples
211
     *
212
     * @return mixed[][]
213
     */
214
    private function frequent(array $samples): array
215
    {
216
        return array_filter($samples, function ($entry) {
217
            return $this->support($entry) >= $this->support;
218
        });
219
    }
220
221
    /**
222
     * Calculates frequent k item sets, where count($samples) == $k - 1.
223
     *
224
     * @param mixed[][] $samples
225
     *
226
     * @return mixed[][]
227
     */
228
    private function candidates(array $samples): array
229
    {
230
        $candidates = [];
231
232
        foreach ($samples as $p) {
233
            foreach ($samples as $q) {
234
                if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) {
235
                    continue;
236
                }
237
238
                $candidate = array_unique(array_merge($p, $q));
239
240
                if ($this->contains($candidates, $candidate)) {
241
                    continue;
242
                }
243
244
                foreach ((array) $this->samples as $sample) {
245
                    if ($this->subset($sample, $candidate)) {
246
                        $candidates[] = $candidate;
247
248
                        continue 2;
249
                    }
250
                }
251
            }
252
        }
253
254
        return $candidates;
255
    }
256
257
    /**
258
     * Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain
259
     * $set.
260
     *
261
     * @param mixed[] $set
262
     * @param mixed[] $subset
263
     */
264
    private function confidence(array $set, array $subset): float
265
    {
266
        return $this->support($set) / $this->support($subset);
267
    }
268
269
    /**
270
     * Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data
271
     * pool.
272
     *
273
     * @see \Phpml\Association\Apriori::samples
274
     *
275
     * @param mixed[] $sample
276
     */
277
    private function support(array $sample): float
278
    {
279
        return $this->frequency($sample) / count($this->samples);
280
    }
281
282
    /**
283
     * Counts occurrences of $sample as subset in data pool.
284
     *
285
     * @see \Phpml\Association\Apriori::samples
286
     *
287
     * @param mixed[] $sample
288
     */
289
    private function frequency(array $sample): int
290
    {
291
        return count(array_filter($this->samples, function ($entry) use ($sample) {
292
            return $this->subset($entry, $sample);
293
        }));
294
    }
295
296
    /**
297
     * Returns true if set is an element of system.
298
     *
299
     * @see \Phpml\Association\Apriori::equals()
300
     *
301
     * @param mixed[][] $system
302
     * @param mixed[]   $set
303
     */
304
    private function contains(array $system, array $set): bool
305
    {
306
        return (bool) array_filter($system, function ($entry) use ($set) {
307
            return $this->equals($entry, $set);
308
        });
309
    }
310
311
    /**
312
     * Returns true if subset is a (proper) subset of set by its items string representation.
313
     *
314
     * @param mixed[] $set
315
     * @param mixed[] $subset
316
     */
317
    private function subset(array $set, array $subset): bool
318
    {
319
        return !array_diff($subset, array_intersect($subset, $set));
320
    }
321
322
    /**
323
     * Returns true if string representation of items does not differ.
324
     *
325
     * @param mixed[] $set1
326
     * @param mixed[] $set2
327
     */
328
    private function equals(array $set1, array $set2): bool
329
    {
330
        return array_diff($set1, $set2) == array_diff($set2, $set1);
331
    }
332
}
333