Apriori::frequency()   A
last analyzed

Complexity

Conditions 1
Paths 1

Size

Total Lines 4
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
eloc 2
dl 0
loc 4
rs 10
c 0
b 0
f 0
cc 1
nc 1
nop 1
1
<?php
2
3
declare(strict_types=1);
4
5
namespace Phpml\Association;
6
7
use Phpml\Helper\Predictable;
8
use Phpml\Helper\Trainable;
9
10
class Apriori implements Associator
11
{
12
    use Trainable;
13
    use Predictable;
14
15
    public const ARRAY_KEY_ANTECEDENT = 'antecedent';
16
17
    public const ARRAY_KEY_CONFIDENCE = 'confidence';
18
19
    public const ARRAY_KEY_CONSEQUENT = 'consequent';
20
21
    public const ARRAY_KEY_SUPPORT = 'support';
22
23
    /**
24
     * Minimum relative probability of frequent transactions.
25
     *
26
     * @var float
27
     */
28
    private $confidence;
29
30
    /**
31
     * The large set contains frequent k-length item sets.
32
     *
33
     * @var mixed[][][]
34
     */
35
    private $large = [];
36
37
    /**
38
     * Minimum relative frequency of transactions.
39
     *
40
     * @var float
41
     */
42
    private $support;
43
44
    /**
45
     * The generated Apriori association rules.
46
     *
47
     * @var mixed[][]
48
     */
49
    private $rules = [];
50
51
    /**
52
     * Apriori constructor.
53
     */
54
    public function __construct(float $support = 0.0, float $confidence = 0.0)
55
    {
56
        $this->support = $support;
57
        $this->confidence = $confidence;
58
    }
59
60
    /**
61
     * Get all association rules which are generated for every k-length frequent item set.
62
     *
63
     * @return mixed[][]
64
     */
65
    public function getRules(): array
66
    {
67
        if (count($this->large) === 0) {
68
            $this->large = $this->apriori();
69
        }
70
71
        if (count($this->rules) > 0) {
72
            return $this->rules;
73
        }
74
75
        $this->rules = [];
76
77
        $this->generateAllRules();
78
79
        return $this->rules;
80
    }
81
82
    /**
83
     * Generates frequent item sets.
84
     *
85
     * @return mixed[][][]
86
     */
87
    public function apriori(): array
88
    {
89
        $L = [];
90
91
        $items = $this->frequent($this->items());
92
        for ($k = 1; isset($items[0]); ++$k) {
93
            $L[$k] = $items;
94
            $items = $this->frequent($this->candidates($items));
95
        }
96
97
        return $L;
98
    }
99
100
    /**
101
     * @param mixed[] $sample
102
     *
103
     * @return mixed[][]
104
     */
105
    protected function predictSample(array $sample): array
106
    {
107
        $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample): bool {
108
            return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample);
109
        }));
110
111
        return array_map(static function ($rule) {
112
            return $rule[self::ARRAY_KEY_CONSEQUENT];
113
        }, $predicts);
114
    }
115
116
    /**
117
     * Generate rules for each k-length frequent item set.
118
     */
119
    private function generateAllRules(): void
120
    {
121
        for ($k = 2; isset($this->large[$k]); ++$k) {
122
            foreach ($this->large[$k] as $frequent) {
123
                $this->generateRules($frequent);
124
            }
125
        }
126
    }
127
128
    /**
129
     * Generate confident rules for frequent item set.
130
     *
131
     * @param mixed[] $frequent
132
     */
133
    private function generateRules(array $frequent): void
134
    {
135
        foreach ($this->antecedents($frequent) as $antecedent) {
136
            $confidence = $this->confidence($frequent, $antecedent);
137
            if ($this->confidence <= $confidence) {
138
                $consequent = array_values(array_diff($frequent, $antecedent));
139
                $this->rules[] = [
140
                    self::ARRAY_KEY_ANTECEDENT => $antecedent,
141
                    self::ARRAY_KEY_CONSEQUENT => $consequent,
142
                    self::ARRAY_KEY_SUPPORT => $this->support($frequent),
143
                    self::ARRAY_KEY_CONFIDENCE => $confidence,
144
                ];
145
            }
146
        }
147
    }
148
149
    /**
150
     * Generates the power set for given item set $sample.
151
     *
152
     * @param mixed[] $sample
153
     *
154
     * @return mixed[][]
155
     */
156
    private function powerSet(array $sample): array
157
    {
158
        $results = [[]];
159
        foreach ($sample as $item) {
160
            foreach ($results as $combination) {
161
                $results[] = array_merge([$item], $combination);
162
            }
163
        }
164
165
        return $results;
166
    }
167
168
    /**
169
     * Generates all proper subsets for given set $sample without the empty set.
170
     *
171
     * @param mixed[] $sample
172
     *
173
     * @return mixed[][]
174
     */
175
    private function antecedents(array $sample): array
176
    {
177
        $cardinality = count($sample);
178
        $antecedents = $this->powerSet($sample);
179
180
        return array_filter($antecedents, static function ($antecedent) use ($cardinality): bool {
181
            return (count($antecedent) != $cardinality) && ($antecedent != []);
182
        });
183
    }
184
185
    /**
186
     * Calculates frequent k = 1 item sets.
187
     *
188
     * @return mixed[][]
189
     */
190
    private function items(): array
191
    {
192
        $items = [];
193
194
        foreach ($this->samples as $sample) {
195
            foreach ($sample as $item) {
196
                if (!in_array($item, $items, true)) {
197
                    $items[] = $item;
198
                }
199
            }
200
        }
201
202
        return array_map(static function ($entry): array {
203
            return [$entry];
204
        }, $items);
205
    }
206
207
    /**
208
     * Returns frequent item sets only.
209
     *
210
     * @param mixed[][] $samples
211
     *
212
     * @return mixed[][]
213
     */
214
    private function frequent(array $samples): array
215
    {
216
        return array_values(array_filter($samples, function ($entry): bool {
217
            return $this->support($entry) >= $this->support;
218
        }));
219
    }
220
221
    /**
222
     * Calculates frequent k item sets, where count($samples) == $k - 1.
223
     *
224
     * @param mixed[][] $samples
225
     *
226
     * @return mixed[][]
227
     */
228
    private function candidates(array $samples): array
229
    {
230
        $candidates = [];
231
232
        foreach ($samples as $p) {
233
            foreach ($samples as $q) {
234
                if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) {
235
                    continue;
236
                }
237
238
                $candidate = array_values(array_unique(array_merge($p, $q)));
239
240
                if ($this->contains($candidates, $candidate)) {
241
                    continue;
242
                }
243
244
                foreach ($this->samples as $sample) {
245
                    if ($this->subset($sample, $candidate)) {
246
                        $candidates[] = $candidate;
247
248
                        continue 2;
249
                    }
250
                }
251
            }
252
        }
253
254
        return $candidates;
255
    }
256
257
    /**
258
     * Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain
259
     * $set.
260
     *
261
     * @param mixed[] $set
262
     * @param mixed[] $subset
263
     */
264
    private function confidence(array $set, array $subset): float
265
    {
266
        return $this->support($set) / $this->support($subset);
267
    }
268
269
    /**
270
     * Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data
271
     * pool.
272
     *
273
     * @see \Phpml\Association\Apriori::samples
274
     *
275
     * @param mixed[] $sample
276
     */
277
    private function support(array $sample): float
278
    {
279
        return $this->frequency($sample) / count($this->samples);
280
    }
281
282
    /**
283
     * Counts occurrences of $sample as subset in data pool.
284
     *
285
     * @see \Phpml\Association\Apriori::samples
286
     *
287
     * @param mixed[] $sample
288
     */
289
    private function frequency(array $sample): int
290
    {
291
        return count(array_filter($this->samples, function ($entry) use ($sample): bool {
292
            return $this->subset($entry, $sample);
293
        }));
294
    }
295
296
    /**
297
     * Returns true if set is an element of system.
298
     *
299
     * @see \Phpml\Association\Apriori::equals()
300
     *
301
     * @param mixed[][] $system
302
     * @param mixed[]   $set
303
     */
304
    private function contains(array $system, array $set): bool
305
    {
306
        return (bool) array_filter($system, function ($entry) use ($set): bool {
307
            return $this->equals($entry, $set);
308
        });
309
    }
310
311
    /**
312
     * Returns true if subset is a (proper) subset of set by its items string representation.
313
     *
314
     * @param mixed[] $set
315
     * @param mixed[] $subset
316
     */
317
    private function subset(array $set, array $subset): bool
318
    {
319
        return count(array_diff($subset, array_intersect($subset, $set))) === 0;
320
    }
321
322
    /**
323
     * Returns true if string representation of items does not differ.
324
     *
325
     * @param mixed[] $set1
326
     * @param mixed[] $set2
327
     */
328
    private function equals(array $set1, array $set2): bool
329
    {
330
        return array_diff($set1, $set2) == array_diff($set2, $set1);
331
    }
332
}
333