Passed
Pull Request — master (#287)
by Marcin
02:51
created

Apriori::apriori()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 12

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
dl 0
loc 12
rs 9.8666
c 0
b 0
f 0
cc 2
nc 2
nop 0
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Association;
6
7
use Phpml\Helper\Predictable;
8
use Phpml\Helper\Trainable;
9
10
class Apriori implements Associator
11
{
12
    use Trainable, Predictable;
13
14
    public const ARRAY_KEY_ANTECEDENT = 'antecedent';
15
16
    public const ARRAY_KEY_CONFIDENCE = 'confidence';
17
18
    public const ARRAY_KEY_CONSEQUENT = 'consequent';
19
20
    public const ARRAY_KEY_SUPPORT = 'support';
21
22
    /**
23
     * Minimum relative probability of frequent transactions.
24
     *
25
     * @var float
26
     */
27
    private $confidence;
28
29
    /**
30
     * The large set contains frequent k-length item sets.
31
     *
32
     * @var mixed[][][]
33
     */
34
    private $large = [];
35
36
    /**
37
     * Minimum relative frequency of transactions.
38
     *
39
     * @var float
40
     */
41
    private $support;
42
43
    /**
44
     * The generated Apriori association rules.
45
     *
46
     * @var mixed[][]
47
     */
48
    private $rules = [];
49
50
    /**
51
     * Apriori constructor.
52
     */
53
    public function __construct(float $support = 0.0, float $confidence = 0.0)
54
    {
55
        $this->support = $support;
56
        $this->confidence = $confidence;
57
    }
58
59
    /**
60
     * Get all association rules which are generated for every k-length frequent item set.
61
     *
62
     * @return mixed[][]
63
     */
64
    public function getRules(): array
65
    {
66
        if (empty($this->large)) {
67
            $this->large = $this->apriori();
68
        }
69
70
        if (!empty($this->rules)) {
71
            return $this->rules;
72
        }
73
74
        $this->rules = [];
75
76
        $this->generateAllRules();
77
78
        return $this->rules;
79
    }
80
81
    /**
82
     * Generates frequent item sets.
83
     *
84
     * @return mixed[][][]
85
     */
86
    public function apriori(): array
87
    {
88
        $L = [];
89
90
        $items = $this->frequent($this->items());
91
        for ($k = 1; !empty($items); ++$k) {
92
            $L[$k] = $items;
93
            $items = $this->frequent($this->candidates($items));
94
        }
95
96
        return $L;
97
    }
98
99
    /**
100
     * @param mixed[] $sample
101
     *
102
     * @return mixed[][]
103
     */
104
    protected function predictSample(array $sample): array
105
    {
106
        $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) {
107
            return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
108
        }));
109
110
        return array_map(function ($rule) {
111
            return $rule[self::ARRAY_KEY_CONSEQUENT];
112
        }, $predicts);
113
    }
114
115
    /**
116
     * Generate rules for each k-length frequent item set.
117
     */
118
    private function generateAllRules(): void
119
    {
120
        for ($k = 2; !empty($this->large[$k]); ++$k) {
121
            foreach ($this->large[$k] as $frequent) {
122
                $this->generateRules($frequent);
123
            }
124
        }
125
    }
126
127
    /**
128
     * Generate confident rules for frequent item set.
129
     *
130
     * @param mixed[] $frequent
131
     */
132
    private function generateRules(array $frequent): void
133
    {
134
        foreach ($this->antecedents($frequent) as $antecedent) {
135
            $confidence = $this->confidence($frequent, $antecedent);
136
            if ($this->confidence <= $confidence) {
137
                $consequent = array_values(array_diff($frequent, $antecedent));
138
                $this->rules[] = [
139
                    self::ARRAY_KEY_ANTECEDENT => $antecedent,
140
                    self::ARRAY_KEY_CONSEQUENT => $consequent,
141
                    self::ARRAY_KEY_SUPPORT => $this->support($frequent),
142
                    self::ARRAY_KEY_CONFIDENCE => $confidence,
143
                ];
144
            }
145
        }
146
    }
147
148
    /**
149
     * Generates the power set for given item set $sample.
150
     *
151
     * @param mixed[] $sample
152
     *
153
     * @return mixed[][]
154
     */
155
    private function powerSet(array $sample): array
156
    {
157
        $results = [[]];
158
        foreach ($sample as $item) {
159
            foreach ($results as $combination) {
160
                $results[] = array_merge([$item], $combination);
161
            }
162
        }
163
164
        return $results;
165
    }
166
167
    /**
168
     * Generates all proper subsets for given set $sample without the empty set.
169
     *
170
     * @param mixed[] $sample
171
     *
172
     * @return mixed[][]
173
     */
174
    private function antecedents(array $sample): array
175
    {
176
        $cardinality = count($sample);
177
        $antecedents = $this->powerSet($sample);
178
179
        return array_filter($antecedents, function ($antecedent) use ($cardinality) {
180
            return (count($antecedent) != $cardinality) && ($antecedent != []);
181
        });
182
    }
183
184
    /**
185
     * Calculates frequent k = 1 item sets.
186
     *
187
     * @return mixed[][]
188
     */
189
    private function items(): array
190
    {
191
        $items = [];
192
193
        foreach ($this->samples as $sample) {
194
            foreach ($sample as $item) {
195
                if (!in_array($item, $items, true)) {
196
                    $items[] = $item;
197
                }
198
            }
199
        }
200
201
        return array_map(function ($entry) {
202
            return [$entry];
203
        }, $items);
204
    }
205
206
    /**
207
     * Returns frequent item sets only.
208
     *
209
     * @param mixed[][] $samples
210
     *
211
     * @return mixed[][]
212
     */
213
    private function frequent(array $samples): array
214
    {
215
        return array_values(array_filter($samples, function ($entry) {
216
            return $this->support($entry) >= $this->support;
217
        }));
218
    }
219
220
    /**
221
     * Calculates frequent k item sets, where count($samples) == $k - 1.
222
     *
223
     * @param mixed[][] $samples
224
     *
225
     * @return mixed[][]
226
     */
227
    private function candidates(array $samples): array
228
    {
229
        $candidates = [];
230
231
        foreach ($samples as $p) {
232
            foreach ($samples as $q) {
233
                if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) {
234
                    continue;
235
                }
236
237
                $candidate = array_values(array_unique(array_merge($p, $q)));
238
239
                if ($this->contains($candidates, $candidate)) {
240
                    continue;
241
                }
242
243
                foreach ((array) $this->samples as $sample) {
244
                    if ($this->subset($sample, $candidate)) {
245
                        $candidates[] = $candidate;
246
247
                        continue 2;
248
                    }
249
                }
250
            }
251
        }
252
253
        return $candidates;
254
    }
255
256
    /**
257
     * Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain
258
     * $set.
259
     *
260
     * @param mixed[] $set
261
     * @param mixed[] $subset
262
     */
263
    private function confidence(array $set, array $subset): float
264
    {
265
        return $this->support($set) / $this->support($subset);
266
    }
267
268
    /**
269
     * Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data
270
     * pool.
271
     *
272
     * @see \Phpml\Association\Apriori::samples
273
     *
274
     * @param mixed[] $sample
275
     */
276
    private function support(array $sample): float
277
    {
278
        return $this->frequency($sample) / count($this->samples);
279
    }
280
281
    /**
282
     * Counts occurrences of $sample as subset in data pool.
283
     *
284
     * @see \Phpml\Association\Apriori::samples
285
     *
286
     * @param mixed[] $sample
287
     */
288
    private function frequency(array $sample): int
289
    {
290
        return count(array_filter($this->samples, function ($entry) use ($sample) {
291
            return $this->subset($entry, $sample);
292
        }));
293
    }
294
295
    /**
296
     * Returns true if set is an element of system.
297
     *
298
     * @see \Phpml\Association\Apriori::equals()
299
     *
300
     * @param mixed[][] $system
301
     * @param mixed[]   $set
302
     */
303
    private function contains(array $system, array $set): bool
304
    {
305
        return (bool) array_filter($system, function ($entry) use ($set) {
306
            return $this->equals($entry, $set);
307
        });
308
    }
309
310
    /**
311
     * Returns true if subset is a (proper) subset of set by its items string representation.
312
     *
313
     * @param mixed[] $set
314
     * @param mixed[] $subset
315
     */
316
    private function subset(array $set, array $subset): bool
317
    {
318
        return !array_diff($subset, array_intersect($subset, $set));
319
    }
320
321
    /**
322
     * Returns true if string representation of items does not differ.
323
     *
324
     * @param mixed[] $set1
325
     * @param mixed[] $set2
326
     */
327
    private function equals(array $set1, array $set2): bool
328
    {
329
        return array_diff($set1, $set2) == array_diff($set2, $set1);
330
    }
331
}
332