Total Complexity | 52 |
Total Lines | 359 |
Duplicated Lines | 0 % |
Changes | 0 |
Complex classes like Aggregator often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.
Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.
While breaking up the class, it is a good idea to analyze how other classes use Aggregator, and based on these observations, apply Extract Interface, too.
1 | <?php |
||
13 | class Aggregator |
||
14 | { |
||
15 | /** @var \SimpleSAML\Configuration */ |
||
16 | private Configuration $statconfig; |
||
17 | |||
18 | /** @var string */ |
||
19 | private string $statdir; |
||
20 | |||
21 | /** @var string */ |
||
22 | private string $inputfile; |
||
23 | |||
24 | /** @var array */ |
||
25 | private array $statrules; |
||
26 | |||
27 | /** @var int */ |
||
28 | private int $offset; |
||
29 | |||
30 | /** @var array|null */ |
||
31 | private ?array $metadata = null; |
||
32 | |||
33 | /** @var bool */ |
||
34 | private bool $fromcmdline; |
||
35 | |||
36 | /** @var int */ |
||
37 | private int $starttime; |
||
38 | |||
39 | /** @var array */ |
||
40 | private array $timeres; |
||
41 | |||
42 | |||
43 | /** |
||
44 | * Constructor |
||
45 | * |
||
46 | * @param bool $fromcmdline |
||
47 | */ |
||
48 | public function __construct(bool $fromcmdline = false) |
||
49 | { |
||
50 | $this->fromcmdline = $fromcmdline; |
||
51 | $this->statconfig = Configuration::getConfig('module_statistics.php'); |
||
52 | |||
53 | $this->statdir = $this->statconfig->getString('statdir'); |
||
54 | $this->inputfile = $this->statconfig->getString('inputfile'); |
||
55 | $this->statrules = $this->statconfig->getValue('statrules'); |
||
56 | $this->timeres = $this->statconfig->getValue('timeres'); |
||
57 | $this->offset = $this->statconfig->getOptionalInteger('offset', 0); |
||
58 | |||
59 | $this->starttime = time(); |
||
60 | } |
||
61 | |||
62 | |||
63 | /** |
||
64 | */ |
||
65 | public function dumpConfig(): void |
||
66 | { |
||
67 | echo 'Statistics directory : ' . $this->statdir . "\n"; |
||
68 | echo 'Input file : ' . $this->inputfile . "\n"; |
||
69 | echo 'Offset : ' . $this->offset . "\n"; |
||
70 | } |
||
71 | |||
72 | |||
73 | /** |
||
74 | */ |
||
75 | public function debugInfo(): void |
||
76 | { |
||
77 | // 1024*1024=1048576 |
||
78 | echo 'Memory usage : ' . number_format(memory_get_usage() / 1048576, 2) . " MB\n"; |
||
79 | } |
||
80 | |||
81 | |||
82 | /** |
||
83 | */ |
||
84 | public function loadMetadata(): void |
||
85 | { |
||
86 | $filename = $this->statdir . '/.stat.metadata'; |
||
87 | $metadata = null; |
||
88 | if (file_exists($filename)) { |
||
89 | $metadata = unserialize(file_get_contents($filename)); |
||
90 | } |
||
91 | $this->metadata = $metadata; |
||
92 | } |
||
93 | |||
94 | |||
95 | /** |
||
96 | * @return array|null |
||
97 | */ |
||
98 | public function getMetadata(): ?array |
||
99 | { |
||
100 | return $this->metadata; |
||
101 | } |
||
102 | |||
103 | |||
104 | /** |
||
105 | */ |
||
106 | public function saveMetadata(): void |
||
107 | { |
||
108 | $this->metadata['time'] = time() - $this->starttime; |
||
109 | $this->metadata['memory'] = memory_get_usage(); |
||
110 | $this->metadata['lastrun'] = time(); |
||
111 | |||
112 | $filename = $this->statdir . '/.stat.metadata'; |
||
113 | file_put_contents($filename, serialize($this->metadata), LOCK_EX); |
||
114 | } |
||
115 | |||
116 | |||
117 | /** |
||
118 | * @param bool $debug |
||
119 | * @return array |
||
120 | * @throws \Exception |
||
121 | */ |
||
122 | public function aggregate(bool $debug = false): array |
||
123 | { |
||
124 | $this->loadMetadata(); |
||
125 | |||
126 | if (!is_dir($this->statdir)) { |
||
127 | throw new Exception('Statistics module: output dir does not exist [' . $this->statdir . ']'); |
||
128 | } |
||
129 | |||
130 | if (!file_exists($this->inputfile)) { |
||
131 | throw new Exception('Statistics module: input file does not exist [' . $this->inputfile . ']'); |
||
132 | } |
||
133 | |||
134 | $file = fopen($this->inputfile, 'r'); |
||
135 | |||
136 | if ($file === false) { |
||
137 | throw new Exception('Statistics module: unable to open file [' . $this->inputfile . ']'); |
||
138 | } |
||
139 | |||
140 | $logparser = new LogParser( |
||
141 | $this->statconfig->getOptionalValue('datestart', 0), |
||
142 | $this->statconfig->getOptionalValue('datelength', 15), |
||
143 | $this->statconfig->getOptionalValue('offsetspan', 44), |
||
144 | ); |
||
145 | $datehandler = [ |
||
146 | 'default' => new DateHandler($this->offset), |
||
147 | 'month' => new DateHandlerMonth($this->offset), |
||
148 | ]; |
||
149 | |||
150 | $notBefore = 0; |
||
151 | $lastRead = 0; |
||
152 | $lastlinehash = '-'; |
||
153 | |||
154 | if (isset($this->metadata)) { |
||
155 | $notBefore = $this->metadata['notBefore']; |
||
156 | $lastlinehash = $this->metadata['lastlinehash']; |
||
157 | } |
||
158 | |||
159 | $lastlogline = 'sdfsdf'; |
||
160 | $lastlineflip = false; |
||
161 | $results = []; |
||
162 | |||
163 | $i = 0; |
||
164 | // Parse through log file, line by line |
||
165 | while (!feof($file)) { |
||
166 | $logline = strval(fgets($file, 4096)); |
||
167 | |||
168 | // Continue if STAT is not found on line |
||
169 | if (!preg_match('/STAT/', $logline)) { |
||
170 | continue; |
||
171 | } |
||
172 | |||
173 | $i++; |
||
174 | $lastlogline = $logline; |
||
175 | |||
176 | // Parse log, and extract epoch time and rest of content. |
||
177 | $epoch = $logparser->parseEpoch($logline); |
||
178 | $content = $logparser->parseContent($logline); |
||
179 | $action = trim($content[5]); |
||
180 | |||
181 | if ($this->fromcmdline && ($i % 10000) == 0) { |
||
182 | echo "Read line " . $i . "\n"; |
||
183 | } |
||
184 | |||
185 | if ($debug) { |
||
186 | echo "----------------------------------------\n"; |
||
187 | echo 'Log line: ' . $logline . "\n"; |
||
188 | echo 'Date parse [' . substr($logline, 0, $this->statconfig->getOptionalValue('datelength', 15)) . |
||
189 | '] to [' . date(DATE_RFC822, $epoch) . ']' . "\n"; |
||
190 | $ret = print_r($content, true); |
||
191 | echo htmlentities($ret); |
||
|
|||
192 | if ($i >= 13) { |
||
193 | exit; |
||
194 | } |
||
195 | } |
||
196 | |||
197 | if ($epoch > $lastRead) { |
||
198 | $lastRead = $epoch; |
||
199 | } |
||
200 | |||
201 | if ($epoch === $notBefore) { |
||
202 | if (!$lastlineflip) { |
||
203 | if (sha1($logline) === $lastlinehash) { |
||
204 | $lastlineflip = true; |
||
205 | } |
||
206 | continue; |
||
207 | } |
||
208 | } |
||
209 | |||
210 | if ($epoch < $notBefore) { |
||
211 | continue; |
||
212 | } |
||
213 | |||
214 | // Iterate all the statrules from config. |
||
215 | foreach ($this->statrules as $rulename => $rule) { |
||
216 | $type = 'aggregate'; |
||
217 | |||
218 | if (array_key_exists('type', $rule)) { |
||
219 | $type = $rule['type']; |
||
220 | } |
||
221 | |||
222 | if ($type !== 'aggregate') { |
||
223 | continue; |
||
224 | } |
||
225 | |||
226 | foreach ($this->timeres as $tres => $tresconfig) { |
||
227 | $dh = 'default'; |
||
228 | if (isset($tresconfig['customDateHandler'])) { |
||
229 | $dh = $tresconfig['customDateHandler']; |
||
230 | } |
||
231 | |||
232 | $timeslot = $datehandler['default']->toSlot($epoch, $tresconfig['slot']); |
||
233 | $fileslot = $datehandler[$dh]->toSlot($epoch, intval($tresconfig['fileslot'])); |
||
234 | |||
235 | if (isset($rule['action']) && ($action !== $rule['action'])) { |
||
236 | continue; |
||
237 | } |
||
238 | |||
239 | $difcol = self::getDifCol($content, $rule['col']); |
||
240 | |||
241 | if (!isset($results[$rulename][$tres][$fileslot][$timeslot]['_'])) { |
||
242 | $results[$rulename][$tres][$fileslot][$timeslot]['_'] = 0; |
||
243 | } |
||
244 | if (!isset($results[$rulename][$tres][$fileslot][$timeslot][$difcol])) { |
||
245 | $results[$rulename][$tres][$fileslot][$timeslot][$difcol] = 0; |
||
246 | } |
||
247 | |||
248 | $results[$rulename][$tres][$fileslot][$timeslot]['_']++; |
||
249 | $results[$rulename][$tres][$fileslot][$timeslot][$difcol]++; |
||
250 | } |
||
251 | } |
||
252 | } |
||
253 | $this->metadata['notBefore'] = $lastRead; |
||
254 | $this->metadata['lastline'] = $lastlogline; |
||
255 | $this->metadata['lastlinehash'] = sha1($lastlogline); |
||
256 | return $results; |
||
257 | } |
||
258 | |||
259 | |||
260 | /** |
||
261 | * @param array $content |
||
262 | * @param mixed $colrule |
||
263 | * @return string |
||
264 | */ |
||
265 | private static function getDifCol(array $content, $colrule): string |
||
266 | { |
||
267 | if (is_int($colrule)) { |
||
268 | return trim($content[$colrule]); |
||
269 | } elseif (is_array($colrule)) { |
||
270 | $difcols = []; |
||
271 | foreach ($colrule as $cr) { |
||
272 | $difcols[] = trim($content[$cr]); |
||
273 | } |
||
274 | return join('|', $difcols); |
||
275 | } else { |
||
276 | return 'NA'; |
||
277 | } |
||
278 | } |
||
279 | |||
280 | |||
281 | /** |
||
282 | * @param mixed $previous |
||
283 | * @param array $newdata |
||
284 | * @return array |
||
285 | */ |
||
286 | private function cummulateData($previous, array $newdata): array |
||
287 | { |
||
288 | $dataset = []; |
||
289 | foreach (func_get_args() as $item) { |
||
290 | foreach ($item as $slot => $dataarray) { |
||
291 | if (!array_key_exists($slot, $dataset)) { |
||
292 | $dataset[$slot] = []; |
||
293 | } |
||
294 | foreach ($dataarray as $key => $data) { |
||
295 | if (!array_key_exists($key, $dataset[$slot])) { |
||
296 | $dataset[$slot][$key] = 0; |
||
297 | } |
||
298 | $dataset[$slot][$key] += $data; |
||
299 | } |
||
300 | } |
||
301 | } |
||
302 | return $dataset; |
||
303 | } |
||
304 | |||
305 | |||
306 | /** |
||
307 | * @param array $results |
||
308 | */ |
||
309 | public function store(array $results): void |
||
372 | } |
||
373 | } |
||
374 |