Passed
Push — master ( e83f7b...d953ef )
by Arkadiusz
03:28
created

src/Phpml/Association/Apriori.php (2 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Association;
6
7
use Phpml\Helper\Predictable;
8
use Phpml\Helper\Trainable;
9
10
class Apriori implements Associator
11
{
12
    use Trainable, Predictable;
13
14
    public const ARRAY_KEY_ANTECEDENT = 'antecedent';
15
16
    public const ARRAY_KEY_CONFIDENCE = 'confidence';
17
18
    public const ARRAY_KEY_CONSEQUENT = 'consequent';
19
20
    public const ARRAY_KEY_SUPPORT = 'support';
21
22
    /**
23
     * Minimum relative probability of frequent transactions.
24
     *
25
     * @var float
26
     */
27
    private $confidence;
28
29
    /**
30
     * The large set contains frequent k-length item sets.
31
     *
32
     * @var mixed[][][]
33
     */
34
    private $large = [];
35
36
    /**
37
     * Minimum relative frequency of transactions.
38
     *
39
     * @var float
40
     */
41
    private $support;
42
43
    /**
44
     * The generated Apriori association rules.
45
     *
46
     * @var mixed[][]
47
     */
48
    private $rules = [];
49
50
    /**
51
     * Apriori constructor.
52
     */
53
    public function __construct(float $support = 0.0, float $confidence = 0.0)
54
    {
55
        $this->support = $support;
56
        $this->confidence = $confidence;
57
    }
58
59
    /**
60
     * Get all association rules which are generated for every k-length frequent item set.
61
     *
62
     * @return mixed[][]
63
     */
64
    public function getRules(): array
65
    {
66
        if (!$this->large) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->large of type array[][] is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
67
            $this->large = $this->apriori();
68
        }
69
70
        if ($this->rules) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->rules of type array[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
71
            return $this->rules;
72
        }
73
74
        $this->rules = [];
75
76
        $this->generateAllRules();
77
78
        return $this->rules;
79
    }
80
81
    /**
82
     * Generates frequent item sets.
83
     *
84
     * @return mixed[][][]
85
     */
86
    public function apriori(): array
87
    {
88
        $L = [];
89
        $L[1] = $this->items();
90
        $L[1] = $this->frequent($L[1]);
91
92
        for ($k = 2; !empty($L[$k - 1]); ++$k) {
93
            $L[$k] = $this->candidates($L[$k - 1]);
94
            $L[$k] = $this->frequent($L[$k]);
95
        }
96
97
        return $L;
98
    }
99
100
    /**
101
     * @param mixed[] $sample
102
     *
103
     * @return mixed[][]
104
     */
105
    protected function predictSample(array $sample): array
106
    {
107
        $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
108
            return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
109
        }));
110
111
        return array_map(function ($rule) {
112
            return $rule[self::ARRAY_KEY_CONSEQUENT];
113
        }, $predicts);
114
    }
115
116
    /**
117
     * Generate rules for each k-length frequent item set.
118
     */
119
    private function generateAllRules(): void
120
    {
121
        for ($k = 2; !empty($this->large[$k]); ++$k) {
122
            foreach ($this->large[$k] as $frequent) {
123
                $this->generateRules($frequent);
124
            }
125
        }
126
    }
127
128
    /**
129
     * Generate confident rules for frequent item set.
130
     *
131
     * @param mixed[] $frequent
132
     */
133
    private function generateRules(array $frequent): void
134
    {
135
        foreach ($this->antecedents($frequent) as $antecedent) {
136
            $confidence = $this->confidence($frequent, $antecedent);
137
            if ($this->confidence <= $confidence) {
138
                $consequent = array_values(array_diff($frequent, $antecedent));
139
                $this->rules[] = [
140
                    self::ARRAY_KEY_ANTECEDENT => $antecedent,
141
                    self::ARRAY_KEY_CONSEQUENT => $consequent,
142
                    self::ARRAY_KEY_SUPPORT => $this->support($consequent),
143
                    self::ARRAY_KEY_CONFIDENCE => $confidence,
144
                ];
145
            }
146
        }
147
    }
148
149
    /**
150
     * Generates the power set for given item set $sample.
151
     *
152
     * @param mixed[] $sample
153
     *
154
     * @return mixed[][]
155
     */
156
    private function powerSet(array $sample): array
157
    {
158
        $results = [[]];
159
        foreach ($sample as $item) {
160
            foreach ($results as $combination) {
161
                $results[] = array_merge([$item], $combination);
162
            }
163
        }
164
165
        return $results;
166
    }
167
168
    /**
169
     * Generates all proper subsets for given set $sample without the empty set.
170
     *
171
     * @param mixed[] $sample
172
     *
173
     * @return mixed[][]
174
     */
175
    private function antecedents(array $sample): array
176
    {
177
        $cardinality = count($sample);
178
        $antecedents = $this->powerSet($sample);
179
180
        return array_filter($antecedents, function ($antecedent) use ($cardinality) {
181
            return (count($antecedent) != $cardinality) && ($antecedent != []);
182
        });
183
    }
184
185
    /**
186
     * Calculates frequent k = 1 item sets.
187
     *
188
     * @return mixed[][]
189
     */
190
    private function items(): array
191
    {
192
        $items = [];
193
194
        foreach ($this->samples as $sample) {
195
            foreach ($sample as $item) {
196
                if (!in_array($item, $items, true)) {
197
                    $items[] = $item;
198
                }
199
            }
200
        }
201
202
        return array_map(function ($entry) {
203
            return [$entry];
204
        }, $items);
205
    }
206
207
    /**
208
     * Returns frequent item sets only.
209
     *
210
     * @param mixed[][] $samples
211
     *
212
     * @return mixed[][]
213
     */
214
    private function frequent(array $samples): array
215
    {
216
        return array_filter($samples, function ($entry) {
217
            return $this->support($entry) >= $this->support;
218
        });
219
    }
220
221
    /**
222
     * Calculates frequent k item sets, where count($samples) == $k - 1.
223
     *
224
     * @param mixed[][] $samples
225
     *
226
     * @return mixed[][]
227
     */
228
    private function candidates(array $samples): array
229
    {
230
        $candidates = [];
231
232
        foreach ($samples as $p) {
233
            foreach ($samples as $q) {
234
                if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) {
235
                    continue;
236
                }
237
238
                $candidate = array_unique(array_merge($p, $q));
239
240
                if ($this->contains($candidates, $candidate)) {
241
                    continue;
242
                }
243
244
                foreach ((array) $this->samples as $sample) {
245
                    if ($this->subset($sample, $candidate)) {
246
                        $candidates[] = $candidate;
247
248
                        continue 2;
249
                    }
250
                }
251
            }
252
        }
253
254
        return $candidates;
255
    }
256
257
    /**
258
     * Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain
259
     * $set.
260
     *
261
     * @param mixed[] $set
262
     * @param mixed[] $subset
263
     */
264
    private function confidence(array $set, array $subset): float
265
    {
266
        return $this->support($set) / $this->support($subset);
267
    }
268
269
    /**
270
     * Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data
271
     * pool.
272
     *
273
     * @see \Phpml\Association\Apriori::samples
274
     *
275
     * @param mixed[] $sample
276
     */
277
    private function support(array $sample): float
278
    {
279
        return $this->frequency($sample) / count($this->samples);
280
    }
281
282
    /**
283
     * Counts occurrences of $sample as subset in data pool.
284
     *
285
     * @see \Phpml\Association\Apriori::samples
286
     *
287
     * @param mixed[] $sample
288
     */
289
    private function frequency(array $sample): int
290
    {
291
        return count(array_filter($this->samples, function ($entry) use ($sample) {
292
            return $this->subset($entry, $sample);
293
        }));
294
    }
295
296
    /**
297
     * Returns true if set is an element of system.
298
     *
299
     * @see \Phpml\Association\Apriori::equals()
300
     *
301
     * @param mixed[][] $system
302
     * @param mixed[]   $set
303
     */
304
    private function contains(array $system, array $set): bool
305
    {
306
        return (bool) array_filter($system, function ($entry) use ($set) {
307
            return $this->equals($entry, $set);
308
        });
309
    }
310
311
    /**
312
     * Returns true if subset is a (proper) subset of set by its items string representation.
313
     *
314
     * @param mixed[] $set
315
     * @param mixed[] $subset
316
     */
317
    private function subset(array $set, array $subset): bool
318
    {
319
        return !array_diff($subset, array_intersect($subset, $set));
320
    }
321
322
    /**
323
     * Returns true if string representation of items does not differ.
324
     *
325
     * @param mixed[] $set1
326
     * @param mixed[] $set2
327
     */
328
    private function equals(array $set1, array $set2): bool
329
    {
330
        return array_diff($set1, $set2) == array_diff($set2, $set1);
331
    }
332
}
333