Complex classes like Space often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Space, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
12 | class Space extends SplObjectStorage |
||
13 | { |
||
14 | /** |
||
15 | * @var int |
||
16 | */ |
||
17 | protected $dimension; |
||
18 | |||
19 | public function __construct($dimension) |
||
20 | { |
||
21 | if ($dimension < 1) { |
||
22 | throw new LogicException('a space dimension cannot be null or negative'); |
||
23 | } |
||
24 | |||
25 | $this->dimension = $dimension; |
||
26 | } |
||
27 | |||
28 | public function toArray(): array |
||
29 | { |
||
30 | $points = []; |
||
31 | foreach ($this as $point) { |
||
32 | $points[] = $point->toArray(); |
||
33 | } |
||
34 | |||
35 | return ['points' => $points]; |
||
36 | } |
||
37 | |||
38 | public function newPoint(array $coordinates): Point |
||
39 | { |
||
40 | if (count($coordinates) != $this->dimension) { |
||
41 | throw new LogicException('('.implode(',', $coordinates).') is not a point of this space'); |
||
42 | } |
||
43 | |||
44 | return new Point($coordinates); |
||
45 | } |
||
46 | |||
47 | /** |
||
48 | * @param null $data |
||
49 | */ |
||
50 | public function addPoint(array $coordinates, $data = null): void |
||
51 | { |
||
52 | $this->attach($this->newPoint($coordinates), $data); |
||
53 | } |
||
54 | |||
55 | /** |
||
56 | * @param Point $point |
||
57 | * @param null $data |
||
58 | */ |
||
59 | public function attach($point, $data = null): void |
||
60 | { |
||
61 | if (!$point instanceof Point) { |
||
62 | throw new InvalidArgumentException('can only attach points to spaces'); |
||
63 | } |
||
64 | |||
65 | parent::attach($point, $data); |
||
66 | } |
||
67 | |||
68 | public function getDimension(): int |
||
69 | { |
||
70 | return $this->dimension; |
||
71 | } |
||
72 | |||
73 | /** |
||
74 | * @return array|bool |
||
75 | */ |
||
76 | public function getBoundaries() |
||
77 | { |
||
78 | if (!count($this)) { |
||
79 | return false; |
||
80 | } |
||
81 | |||
82 | $min = $this->newPoint(array_fill(0, $this->dimension, null)); |
||
83 | $max = $this->newPoint(array_fill(0, $this->dimension, null)); |
||
84 | |||
85 | foreach ($this as $point) { |
||
86 | for ($n = 0; $n < $this->dimension; ++$n) { |
||
87 | ($min[$n] > $point[$n] || $min[$n] === null) && $min[$n] = $point[$n]; |
||
88 | ($max[$n] < $point[$n] || $max[$n] === null) && $max[$n] = $point[$n]; |
||
89 | } |
||
90 | } |
||
91 | |||
92 | return [$min, $max]; |
||
93 | } |
||
94 | |||
95 | public function getRandomPoint(Point $min, Point $max): Point |
||
96 | { |
||
97 | $point = $this->newPoint(array_fill(0, $this->dimension, null)); |
||
98 | |||
99 | for ($n = 0; $n < $this->dimension; ++$n) { |
||
100 | $point[$n] = random_int($min[$n], $max[$n]); |
||
101 | } |
||
102 | |||
103 | return $point; |
||
104 | } |
||
105 | |||
106 | /** |
||
107 | * @return array|Cluster[] |
||
108 | */ |
||
109 | public function cluster(int $clustersNumber, int $initMethod = KMeans::INIT_RANDOM): array |
||
110 | { |
||
111 | $clusters = $this->initializeClusters($clustersNumber, $initMethod); |
||
112 | |||
113 | do { |
||
|
|||
114 | } while (!$this->iterate($clusters)); |
||
115 | |||
116 | return $clusters; |
||
117 | } |
||
118 | |||
119 | /** |
||
120 | * @return array|Cluster[] |
||
121 | */ |
||
122 | protected function initializeClusters(int $clustersNumber, int $initMethod): array |
||
123 | { |
||
124 | switch ($initMethod) { |
||
125 | case KMeans::INIT_RANDOM: |
||
126 | $clusters = $this->initializeRandomClusters($clustersNumber); |
||
127 | |||
128 | break; |
||
129 | |||
130 | case KMeans::INIT_KMEANS_PLUS_PLUS: |
||
131 | $clusters = $this->initializeKMPPClusters($clustersNumber); |
||
132 | |||
133 | break; |
||
134 | |||
135 | default: |
||
136 | return []; |
||
137 | } |
||
138 | |||
139 | $clusters[0]->attachAll($this); |
||
140 | |||
141 | return $clusters; |
||
142 | } |
||
143 | |||
144 | protected function iterate($clusters): bool |
||
145 | { |
||
146 | $convergence = true; |
||
147 | |||
148 | $attach = new SplObjectStorage(); |
||
149 | $detach = new SplObjectStorage(); |
||
150 | |||
151 | foreach ($clusters as $cluster) { |
||
152 | foreach ($cluster as $point) { |
||
153 | $closest = $point->getClosest($clusters); |
||
154 | |||
155 | if ($closest !== $cluster) { |
||
156 | isset($attach[$closest]) || $attach[$closest] = new SplObjectStorage(); |
||
157 | isset($detach[$cluster]) || $detach[$cluster] = new SplObjectStorage(); |
||
158 | |||
159 | $attach[$closest]->attach($point); |
||
160 | $detach[$cluster]->attach($point); |
||
161 | |||
162 | $convergence = false; |
||
163 | } |
||
164 | } |
||
165 | } |
||
166 | |||
167 | foreach ($attach as $cluster) { |
||
168 | $cluster->attachAll($attach[$cluster]); |
||
169 | } |
||
170 | |||
171 | foreach ($detach as $cluster) { |
||
172 | $cluster->detachAll($detach[$cluster]); |
||
173 | } |
||
174 | |||
175 | foreach ($clusters as $cluster) { |
||
176 | $cluster->updateCentroid(); |
||
177 | } |
||
178 | |||
179 | return $convergence; |
||
180 | } |
||
181 | |||
182 | protected function initializeKMPPClusters(int $clustersNumber): array |
||
183 | { |
||
184 | $clusters = []; |
||
185 | $this->rewind(); |
||
186 | |||
187 | $clusters[] = new Cluster($this, $this->current()->getCoordinates()); |
||
188 | |||
189 | $distances = new SplObjectStorage(); |
||
190 | |||
191 | for ($i = 1; $i < $clustersNumber; ++$i) { |
||
192 | $sum = 0; |
||
193 | foreach ($this as $point) { |
||
194 | $distance = $point->getDistanceWith($point->getClosest($clusters)); |
||
195 | $sum += $distances[$point] = $distance; |
||
196 | } |
||
197 | |||
198 | $sum = random_int(0, (int) $sum); |
||
199 | foreach ($this as $point) { |
||
200 | if (($sum -= $distances[$point]) > 0) { |
||
201 | continue; |
||
202 | } |
||
203 | |||
204 | $clusters[] = new Cluster($this, $point->getCoordinates()); |
||
205 | |||
206 | break; |
||
207 | } |
||
208 | } |
||
209 | |||
210 | return $clusters; |
||
211 | } |
||
212 | |||
213 | private function initializeRandomClusters(int $clustersNumber): array |
||
224 | } |
||
225 |
This check looks for
do
loops that have no statements or where all statements have been commented out. This may be the result of changes for debugging or the code may simply be obsolete.Consider removing the loop.