These results are based on our legacy PHP analysis, consider migrating to our new PHP analysis engine instead. Learn more
1 | <?php |
||
2 | |||
3 | declare(strict_types=1); |
||
4 | |||
5 | namespace Phpml\Association; |
||
6 | |||
7 | use Phpml\Helper\Predictable; |
||
8 | use Phpml\Helper\Trainable; |
||
9 | |||
10 | class Apriori implements Associator |
||
11 | { |
||
12 | use Trainable, Predictable; |
||
13 | |||
14 | public const ARRAY_KEY_ANTECEDENT = 'antecedent'; |
||
15 | |||
16 | public const ARRAY_KEY_CONFIDENCE = 'confidence'; |
||
17 | |||
18 | public const ARRAY_KEY_CONSEQUENT = 'consequent'; |
||
19 | |||
20 | public const ARRAY_KEY_SUPPORT = 'support'; |
||
21 | |||
22 | /** |
||
23 | * Minimum relative probability of frequent transactions. |
||
24 | * |
||
25 | * @var float |
||
26 | */ |
||
27 | private $confidence; |
||
28 | |||
29 | /** |
||
30 | * The large set contains frequent k-length item sets. |
||
31 | * |
||
32 | * @var mixed[][][] |
||
33 | */ |
||
34 | private $large = []; |
||
35 | |||
36 | /** |
||
37 | * Minimum relative frequency of transactions. |
||
38 | * |
||
39 | * @var float |
||
40 | */ |
||
41 | private $support; |
||
42 | |||
43 | /** |
||
44 | * The generated Apriori association rules. |
||
45 | * |
||
46 | * @var mixed[][] |
||
47 | */ |
||
48 | private $rules = []; |
||
49 | |||
50 | /** |
||
51 | * Apriori constructor. |
||
52 | */ |
||
53 | public function __construct(float $support = 0.0, float $confidence = 0.0) |
||
54 | { |
||
55 | $this->support = $support; |
||
56 | $this->confidence = $confidence; |
||
57 | } |
||
58 | |||
59 | /** |
||
60 | * Get all association rules which are generated for every k-length frequent item set. |
||
61 | * |
||
62 | * @return mixed[][] |
||
63 | */ |
||
64 | public function getRules(): array |
||
65 | { |
||
66 | if (!$this->large) { |
||
0 ignored issues
–
show
|
|||
67 | $this->large = $this->apriori(); |
||
68 | } |
||
69 | |||
70 | if ($this->rules) { |
||
0 ignored issues
–
show
The expression
$this->rules of type array[] is implicitly converted to a boolean; are you sure this is intended? If so, consider using ! empty($expr) instead to make it clear that you intend to check for an array without elements.
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent. Consider making the comparison explicit by using
Loading history...
|
|||
71 | return $this->rules; |
||
72 | } |
||
73 | |||
74 | $this->rules = []; |
||
75 | |||
76 | $this->generateAllRules(); |
||
77 | |||
78 | return $this->rules; |
||
79 | } |
||
80 | |||
81 | /** |
||
82 | * Generates frequent item sets. |
||
83 | * |
||
84 | * @return mixed[][][] |
||
85 | */ |
||
86 | public function apriori(): array |
||
87 | { |
||
88 | $L = []; |
||
89 | $L[1] = $this->items(); |
||
90 | $L[1] = $this->frequent($L[1]); |
||
91 | |||
92 | for ($k = 2; !empty($L[$k - 1]); ++$k) { |
||
93 | $L[$k] = $this->candidates($L[$k - 1]); |
||
94 | $L[$k] = $this->frequent($L[$k]); |
||
95 | } |
||
96 | |||
97 | return $L; |
||
98 | } |
||
99 | |||
100 | /** |
||
101 | * @param mixed[] $sample |
||
102 | * |
||
103 | * @return mixed[][] |
||
104 | */ |
||
105 | protected function predictSample(array $sample): array |
||
106 | { |
||
107 | $predicts = array_values(array_filter($this->getRules(), function ($rule) use ($sample) { |
||
108 | return $this->equals($rule[self::ARRAY_KEY_ANTECEDENT], $sample); |
||
109 | })); |
||
110 | |||
111 | return array_map(function ($rule) { |
||
112 | return $rule[self::ARRAY_KEY_CONSEQUENT]; |
||
113 | }, $predicts); |
||
114 | } |
||
115 | |||
116 | /** |
||
117 | * Generate rules for each k-length frequent item set. |
||
118 | */ |
||
119 | private function generateAllRules(): void |
||
120 | { |
||
121 | for ($k = 2; !empty($this->large[$k]); ++$k) { |
||
122 | foreach ($this->large[$k] as $frequent) { |
||
123 | $this->generateRules($frequent); |
||
124 | } |
||
125 | } |
||
126 | } |
||
127 | |||
128 | /** |
||
129 | * Generate confident rules for frequent item set. |
||
130 | * |
||
131 | * @param mixed[] $frequent |
||
132 | */ |
||
133 | private function generateRules(array $frequent): void |
||
134 | { |
||
135 | foreach ($this->antecedents($frequent) as $antecedent) { |
||
136 | $confidence = $this->confidence($frequent, $antecedent); |
||
137 | if ($this->confidence <= $confidence) { |
||
138 | $consequent = array_values(array_diff($frequent, $antecedent)); |
||
139 | $this->rules[] = [ |
||
140 | self::ARRAY_KEY_ANTECEDENT => $antecedent, |
||
141 | self::ARRAY_KEY_CONSEQUENT => $consequent, |
||
142 | self::ARRAY_KEY_SUPPORT => $this->support($consequent), |
||
143 | self::ARRAY_KEY_CONFIDENCE => $confidence, |
||
144 | ]; |
||
145 | } |
||
146 | } |
||
147 | } |
||
148 | |||
149 | /** |
||
150 | * Generates the power set for given item set $sample. |
||
151 | * |
||
152 | * @param mixed[] $sample |
||
153 | * |
||
154 | * @return mixed[][] |
||
155 | */ |
||
156 | private function powerSet(array $sample): array |
||
157 | { |
||
158 | $results = [[]]; |
||
159 | foreach ($sample as $item) { |
||
160 | foreach ($results as $combination) { |
||
161 | $results[] = array_merge([$item], $combination); |
||
162 | } |
||
163 | } |
||
164 | |||
165 | return $results; |
||
166 | } |
||
167 | |||
168 | /** |
||
169 | * Generates all proper subsets for given set $sample without the empty set. |
||
170 | * |
||
171 | * @param mixed[] $sample |
||
172 | * |
||
173 | * @return mixed[][] |
||
174 | */ |
||
175 | private function antecedents(array $sample): array |
||
176 | { |
||
177 | $cardinality = count($sample); |
||
178 | $antecedents = $this->powerSet($sample); |
||
179 | |||
180 | return array_filter($antecedents, function ($antecedent) use ($cardinality) { |
||
181 | return (count($antecedent) != $cardinality) && ($antecedent != []); |
||
182 | }); |
||
183 | } |
||
184 | |||
185 | /** |
||
186 | * Calculates frequent k = 1 item sets. |
||
187 | * |
||
188 | * @return mixed[][] |
||
189 | */ |
||
190 | private function items(): array |
||
191 | { |
||
192 | $items = []; |
||
193 | |||
194 | foreach ($this->samples as $sample) { |
||
195 | foreach ($sample as $item) { |
||
196 | if (!in_array($item, $items, true)) { |
||
197 | $items[] = $item; |
||
198 | } |
||
199 | } |
||
200 | } |
||
201 | |||
202 | return array_map(function ($entry) { |
||
203 | return [$entry]; |
||
204 | }, $items); |
||
205 | } |
||
206 | |||
207 | /** |
||
208 | * Returns frequent item sets only. |
||
209 | * |
||
210 | * @param mixed[][] $samples |
||
211 | * |
||
212 | * @return mixed[][] |
||
213 | */ |
||
214 | private function frequent(array $samples): array |
||
215 | { |
||
216 | return array_filter($samples, function ($entry) { |
||
217 | return $this->support($entry) >= $this->support; |
||
218 | }); |
||
219 | } |
||
220 | |||
221 | /** |
||
222 | * Calculates frequent k item sets, where count($samples) == $k - 1. |
||
223 | * |
||
224 | * @param mixed[][] $samples |
||
225 | * |
||
226 | * @return mixed[][] |
||
227 | */ |
||
228 | private function candidates(array $samples): array |
||
229 | { |
||
230 | $candidates = []; |
||
231 | |||
232 | foreach ($samples as $p) { |
||
233 | foreach ($samples as $q) { |
||
234 | if (count(array_merge(array_diff($p, $q), array_diff($q, $p))) != 2) { |
||
235 | continue; |
||
236 | } |
||
237 | |||
238 | $candidate = array_unique(array_merge($p, $q)); |
||
239 | |||
240 | if ($this->contains($candidates, $candidate)) { |
||
241 | continue; |
||
242 | } |
||
243 | |||
244 | foreach ((array) $this->samples as $sample) { |
||
245 | if ($this->subset($sample, $candidate)) { |
||
246 | $candidates[] = $candidate; |
||
247 | |||
248 | continue 2; |
||
249 | } |
||
250 | } |
||
251 | } |
||
252 | } |
||
253 | |||
254 | return $candidates; |
||
255 | } |
||
256 | |||
257 | /** |
||
258 | * Calculates confidence for $set. Confidence is the relative amount of sets containing $subset which also contain |
||
259 | * $set. |
||
260 | * |
||
261 | * @param mixed[] $set |
||
262 | * @param mixed[] $subset |
||
263 | */ |
||
264 | private function confidence(array $set, array $subset): float |
||
265 | { |
||
266 | return $this->support($set) / $this->support($subset); |
||
267 | } |
||
268 | |||
269 | /** |
||
270 | * Calculates support for item set $sample. Support is the relative amount of sets containing $sample in the data |
||
271 | * pool. |
||
272 | * |
||
273 | * @see \Phpml\Association\Apriori::samples |
||
274 | * |
||
275 | * @param mixed[] $sample |
||
276 | */ |
||
277 | private function support(array $sample): float |
||
278 | { |
||
279 | return $this->frequency($sample) / count($this->samples); |
||
280 | } |
||
281 | |||
282 | /** |
||
283 | * Counts occurrences of $sample as subset in data pool. |
||
284 | * |
||
285 | * @see \Phpml\Association\Apriori::samples |
||
286 | * |
||
287 | * @param mixed[] $sample |
||
288 | */ |
||
289 | private function frequency(array $sample): int |
||
290 | { |
||
291 | return count(array_filter($this->samples, function ($entry) use ($sample) { |
||
292 | return $this->subset($entry, $sample); |
||
293 | })); |
||
294 | } |
||
295 | |||
296 | /** |
||
297 | * Returns true if set is an element of system. |
||
298 | * |
||
299 | * @see \Phpml\Association\Apriori::equals() |
||
300 | * |
||
301 | * @param mixed[][] $system |
||
302 | * @param mixed[] $set |
||
303 | */ |
||
304 | private function contains(array $system, array $set): bool |
||
305 | { |
||
306 | return (bool) array_filter($system, function ($entry) use ($set) { |
||
307 | return $this->equals($entry, $set); |
||
308 | }); |
||
309 | } |
||
310 | |||
311 | /** |
||
312 | * Returns true if subset is a (proper) subset of set by its items string representation. |
||
313 | * |
||
314 | * @param mixed[] $set |
||
315 | * @param mixed[] $subset |
||
316 | */ |
||
317 | private function subset(array $set, array $subset): bool |
||
318 | { |
||
319 | return !array_diff($subset, array_intersect($subset, $set)); |
||
320 | } |
||
321 | |||
322 | /** |
||
323 | * Returns true if string representation of items does not differ. |
||
324 | * |
||
325 | * @param mixed[] $set1 |
||
326 | * @param mixed[] $set2 |
||
327 | */ |
||
328 | private function equals(array $set1, array $set2): bool |
||
329 | { |
||
330 | return array_diff($set1, $set2) == array_diff($set2, $set1); |
||
331 | } |
||
332 | } |
||
333 |
This check marks implicit conversions of arrays to boolean values in a comparison. While in PHP an empty array is considered to be equal (but not identical) to false, this is not always apparent.
Consider making the comparison explicit by using
empty(..)
or! empty(...)
instead.