Passed
Push — master ( f7537c...ff80af )
by Arkadiusz
02:31
created

src/Phpml/Association/Apriori.php (2 issues)

Upgrade to new PHP Analysis Engine

These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more

1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Association;
6
7
use Phpml\Helper\Predictable;
8
use Phpml\Helper\Trainable;
9
10
class Apriori implements Associator
11
{
12
    use Trainable, Predictable;
13
14
    public const ARRAY_KEY_ANTECEDENT = 'antecedent';
15
16
    public const ARRAY_KEY_CONFIDENCE = 'confidence';
17
18
    public const ARRAY_KEY_CONSEQUENT = 'consequent';
19
20
    public const ARRAY_KEY_SUPPORT = 'support';
21
22
    /**
23
     * Minimum relative probability of frequent transactions.
24
     *
25
     * @var float
26
     */
27
    private $confidence;
28
29
    /**
30
     * The large set contains frequent k-length item sets.
31
     *
32
     * @var mixed[][][]
33
     */
34
    private $large;
35
36
    /**
37
     * Minimum relative frequency of transactions.
38
     *
39
     * @var float
40
     */
41
    private $support;
42
43
    /**
44
     * The generated Apriori association rules.
45
     *
46
     * @var mixed[][]
47
     */
48
    private $rules;
49
50
    /**
51
     * Apriori constructor.
52
     */
53
    public function __construct(float $support = 0.0, float $confidence = 0.0)
54
    {
55
        $this->support = $support;
56
        $this->confidence = $confidence;
57
    }
58
59
    /**
60
     * Get all association rules which are generated for every k-length frequent item set.
61
     *
62
     * @return mixed[][]
63
     */
64
    public function getRules() : array
65
    {
66
        if (!$this->large) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->large of type array[][] is implicitly converted to a boolean; are you sure this is intended? If so, consider using empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
67
            $this->large = $this->apriori();
68
        }
69
70
        if ($this->rules) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->rules of type array[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.

This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.

Consider making the comparison explicit by using empty(..) or ! empty(...) instead.

Loading history...
71
            return $this->rules;
72
        }
73
74
        $this->rules = [];
75
76
        $this->generateAllRules();
77
78
        return $this->rules;
79
    }
80
81
    /**
82
     * Generates frequent item sets.
83
     *
84
     * @return mixed[][][]
85
     */
86
    public function apriori() : array
87
    {
88
        $L = [];
89
        $L[1] = $this->items();
90
        $L[1] = $this->frequent($L[1]);
91
92
        for ($k = 2; !empty($L[$k - 1]); ++$k) {
93
            $L[$k] = $this->candidates($L[$k - 1]);
94
            $L[$k] = $this->frequent($L[$k]);
95
        }
96
97
        return $L;
98
    }
99
100
    /**
101
     * @param mixed[] $sample
102
     *
103
     * @return mixed[][]
104
     */
105
    protected function predictSample(array $sample) : array
106
    {
107
        $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
108
            return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
109
        }));
110
111
        return array_map(function ($rule) {
112
            return $rule[self::ARRAY_KEY_CONSEQUENT];
113
        }, $predicts);
114
    }
115
116
    /**
117
     * Generate rules for each k-length frequent item set.
118
     */
119
    private function generateAllRules(): void
120
    {
121
        for ($k = 2; !empty($this->large[$k]); ++$k) {
122
            foreach ($this->large[$k] as $frequent) {
123
                $this->generateRules($frequent);
124
            }
125
        }
126
    }
127
128
    /**
129
     * Generate confident rules for frequent item set.
130
     *
131
     * @param mixed[] $frequent
132
     */
133
    private function generateRules(array $frequent): void
134
    {
135
        foreach ($this->antecedents($frequent) as $antecedent) {
136
            if ($this->confidence <= ($confidence = $this->confidence($frequent, $antecedent))) {
137
                $consequent = array_values(array_diff($frequent, $antecedent));
138
                $this->rules[] = [
139
                    self::ARRAY_KEY_ANTECEDENT => $antecedent,
140
                    self::ARRAY_KEY_CONSEQUENT => $consequent,
141
                    self::ARRAY_KEY_SUPPORT => $this->support($consequent),
142
                    self::ARRAY_KEY_CONFIDENCE => $confidence,
143
                ];
144
            }
145
        }
146
    }
147
148
    /**
149
     * Generates the power set for given item set $sample.
150
     *
151
     * @param mixed[] $sample
152
     *
153
     * @return mixed[][]
154
     */
155
    private function powerSet(array $sample) : array
156
    {
157
        $results = [[]];
158
        foreach ($sample as $item) {
159
            foreach ($results as $combination) {
160
                $results[] = array_merge([$item], $combination);
161
            }
162
        }
163
164
        return $results;
165
    }
166
167
    /**
168
     * Generates all proper subsets for given set $sample without the empty set.
169
     *
170
     * @param mixed[] $sample
171
     *
172
     * @return mixed[][]
173
     */
174
    private function antecedents(array $sample) : array
175
    {
176
        $cardinality = count($sample);
177
        $antecedents = $this->powerSet($sample);
178
179
        return array_filter($antecedents, function ($antecedent) use ($cardinality) {
180
            return (count($antecedent) != $cardinality) && ($antecedent != []);
181
        });
182
    }
183
184
    /**
185
     * Calculates frequent k = 1 item sets.
186
     *
187
     * @return mixed[][]
188
     */
189
    private function items() : array
190
    {
191
        $items = [];
192
193
        foreach ($this->samples as $sample) {
194
            foreach ($sample as $item) {
195
                if (!in_array($item, $items, true)) {
196
                    $items[] = $item;
197
                }
198
            }
199
        }
200
201
        return array_map(function ($entry) {
202
            return [$entry];
203
        }, $items);
204
    }
205
206
    /**
207
     * Returns frequent item sets only.
208
     *
209
     * @param mixed[][] $samples
210
     *
211
     * @return mixed[][]
212
     */
213
    private function frequent(array $samples) : array
214
    {
215
        return array_filter($samples, function ($entry) {
216
            return $this->support($entry) >= $this->support;
217
        });
218
    }
219
220
    /**
221
     * Calculates frequent k item sets, where count($samples) == $k - 1.
222
     *
223
     * @param mixed[][] $samples
224
     *
225
     * @return mixed[][]
226
     */
227
    private function candidates(array $samples) : array
228
    {
229
        $candidates = [];
230
231
        foreach ($samples as $p) {
232
            foreach ($samples as $q) {
233
                if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) {
234
                    continue;
235
                }
236
237
                $candidate = array_unique(array_merge($p, $q));
238
239
                if ($this->contains($candidates, $candidate)) {
240
                    continue;
241
                }
242
243
                foreach ((array) $this->samples as $sample) {
244
                    if ($this->subset($sample, $candidate)) {
245
                        $candidates[] = $candidate;
246
                        continue 2;
247
                    }
248
                }
249
            }
250
        }
251
252
        return $candidates;
253
    }
254
255
    /**
256
     * Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain
257
     * $set.
258
     *
259
     * @param mixed[] $set
260
     * @param mixed[] $subset
261
     */
262
    private function confidence(array $set, array $subset) : float
263
    {
264
        return $this->support($set) / $this->support($subset);
265
    }
266
267
    /**
268
     * Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data
269
     * pool.
270
     *
271
     * @see \Phpml\Association\Apriori::samples
272
     *
273
     * @param mixed[] $sample
274
     */
275
    private function support(array $sample) : float
276
    {
277
        return $this->frequency($sample) / count($this->samples);
278
    }
279
280
    /**
281
     * Counts occurrences of $sample as subset in data pool.
282
     *
283
     * @see \Phpml\Association\Apriori::samples
284
     *
285
     * @param mixed[] $sample
286
     */
287
    private function frequency(array $sample) : int
288
    {
289
        return count(array_filter($this->samples, function ($entry) use ($sample) {
290
            return $this->subset($entry, $sample);
291
        }));
292
    }
293
294
    /**
295
     * Returns true if set is an element of system.
296
     *
297
     * @see \Phpml\Association\Apriori::equals()
298
     *
299
     * @param mixed[][] $system
300
     * @param mixed[]   $set
301
     */
302
    private function contains(array $system, array $set) : bool
303
    {
304
        return (bool) array_filter($system, function ($entry) use ($set) {
305
            return $this->equals($entry, $set);
306
        });
307
    }
308
309
    /**
310
     * Returns true if subset is a (proper) subset of set by its items string representation.
311
     *
312
     * @param mixed[] $set
313
     * @param mixed[] $subset
314
     */
315
    private function subset(array $set, array $subset) : bool
316
    {
317
        return !array_diff($subset, array_intersect($subset, $set));
318
    }
319
320
    /**
321
     * Returns true if string representation of items does not differ.
322
     *
323
     * @param mixed[] $set1
324
     * @param mixed[] $set2
325
     */
326
    private function equals(array $set1, array $set2) : bool
327
    {
328
        return array_diff($set1, $set2) == array_diff($set2, $set1);
329
    }
330
}
331