Aggregator   B
last analyzed

Complexity

Total Complexity 52

Size/Duplication

Total Lines 359
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
eloc 165
dl 0
loc 359
rs 7.44
c 0
b 0
f 0
wmc 52

10 Methods

Rating   Name   Duplication   Size   Complexity  
B store() 0 63 10
A dumpConfig() 0 5 1
A __construct() 0 12 1
A getMetadata() 0 3 1
A cummulateData() 0 17 6
A saveMetadata() 0 8 1
D aggregate() 0 135 25
A loadMetadata() 0 8 2
A getDifCol() 0 12 4
A debugInfo() 0 4 1

How to fix   Complexity   

Complex Class

Complex classes like Aggregator often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use Aggregator, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
declare(strict_types=1);
4
5
namespace SimpleSAML\Module\statistics;
6
7
use Exception;
8
use SimpleSAML\Configuration;
9
10
/**
11
 * @package SimpleSAMLphp
12
 */
13
class Aggregator
14
{
15
    /** @var \SimpleSAML\Configuration */
16
    private Configuration $statconfig;
17
18
    /** @var string */
19
    private string $statdir;
20
21
    /** @var string */
22
    private string $inputfile;
23
24
    /** @var array */
25
    private array $statrules;
26
27
    /** @var int */
28
    private int $offset;
29
30
    /** @var array|null */
31
    private ?array $metadata = null;
32
33
    /** @var bool */
34
    private bool $fromcmdline;
35
36
    /** @var int */
37
    private int $starttime;
38
39
    /** @var array */
40
    private array $timeres;
41
42
43
    /**
44
     * Constructor
45
     *
46
     * @param bool $fromcmdline
47
     */
48
    public function __construct(bool $fromcmdline = false)
49
    {
50
        $this->fromcmdline = $fromcmdline;
51
        $this->statconfig = Configuration::getConfig('module_statistics.php');
52
53
        $this->statdir = $this->statconfig->getString('statdir');
54
        $this->inputfile = $this->statconfig->getString('inputfile');
55
        $this->statrules = $this->statconfig->getValue('statrules');
56
        $this->timeres = $this->statconfig->getValue('timeres');
57
        $this->offset = $this->statconfig->getOptionalInteger('offset', 0);
58
59
        $this->starttime = time();
60
    }
61
62
63
    /**
64
     */
65
    public function dumpConfig(): void
66
    {
67
        echo 'Statistics directory   : ' . $this->statdir . "\n";
68
        echo 'Input file             : ' . $this->inputfile . "\n";
69
        echo 'Offset                 : ' . $this->offset . "\n";
70
    }
71
72
73
    /**
74
     */
75
    public function debugInfo(): void
76
    {
77
        // 1024*1024=1048576
78
        echo 'Memory usage           : ' . number_format(memory_get_usage() / 1048576, 2) . " MB\n";
79
    }
80
81
82
    /**
83
     */
84
    public function loadMetadata(): void
85
    {
86
        $filename = $this->statdir . '/.stat.metadata';
87
        $metadata = null;
88
        if (file_exists($filename)) {
89
            $metadata = unserialize(file_get_contents($filename));
90
        }
91
        $this->metadata = $metadata;
92
    }
93
94
95
    /**
96
     * @return array|null
97
     */
98
    public function getMetadata(): ?array
99
    {
100
        return $this->metadata;
101
    }
102
103
104
    /**
105
     */
106
    public function saveMetadata(): void
107
    {
108
        $this->metadata['time'] = time() - $this->starttime;
109
        $this->metadata['memory'] = memory_get_usage();
110
        $this->metadata['lastrun'] = time();
111
112
        $filename = $this->statdir . '/.stat.metadata';
113
        file_put_contents($filename, serialize($this->metadata), LOCK_EX);
114
    }
115
116
117
    /**
118
     * @param bool $debug
119
     * @return array
120
     * @throws \Exception
121
     */
122
    public function aggregate(bool $debug = false): array
123
    {
124
        $this->loadMetadata();
125
126
        if (!is_dir($this->statdir)) {
127
            throw new Exception('Statistics module: output dir does not exist [' . $this->statdir . ']');
128
        }
129
130
        if (!file_exists($this->inputfile)) {
131
            throw new Exception('Statistics module: input file does not exist [' . $this->inputfile . ']');
132
        }
133
134
        $file = fopen($this->inputfile, 'r');
135
136
        if ($file === false) {
137
            throw new Exception('Statistics module: unable to open file [' . $this->inputfile . ']');
138
        }
139
140
        $logparser = new LogParser(
141
            $this->statconfig->getOptionalValue('datestart', 0),
142
            $this->statconfig->getOptionalValue('datelength', 15),
143
            $this->statconfig->getOptionalValue('offsetspan', 44),
144
        );
145
        $datehandler = [
146
            'default' => new DateHandler($this->offset),
147
            'month' => new  DateHandlerMonth($this->offset),
148
        ];
149
150
        $notBefore = 0;
151
        $lastRead = 0;
152
        $lastlinehash = '-';
153
154
        if (isset($this->metadata)) {
155
            $notBefore = $this->metadata['notBefore'];
156
            $lastlinehash = $this->metadata['lastlinehash'];
157
        }
158
159
        $lastlogline = 'sdfsdf';
160
        $lastlineflip = false;
161
        $results = [];
162
163
        $i = 0;
164
        // Parse through log file, line by line
165
        while (!feof($file)) {
166
            $logline = strval(fgets($file, 4096));
167
168
            // Continue if STAT is not found on line
169
            if (!preg_match('/STAT/', $logline)) {
170
                continue;
171
            }
172
173
            $i++;
174
            $lastlogline = $logline;
175
176
            // Parse log, and extract epoch time and rest of content.
177
            $epoch = $logparser->parseEpoch($logline);
178
            $content = $logparser->parseContent($logline);
179
            $action = trim($content[5]);
180
181
            if ($this->fromcmdline && ($i % 10000) == 0) {
182
                echo "Read line " . $i . "\n";
183
            }
184
185
            if ($debug) {
186
                echo "----------------------------------------\n";
187
                echo 'Log line: ' . $logline . "\n";
188
                echo 'Date parse [' . substr($logline, 0, $this->statconfig->getOptionalValue('datelength', 15)) .
189
                    '] to [' . date(DATE_RFC822, $epoch) . ']' . "\n";
190
                $ret = print_r($content, true);
191
                echo htmlentities($ret);
0 ignored issues
show
Bug introduced by
It seems like $ret can also be of type true; however, parameter $string of htmlentities() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

191
                echo htmlentities(/** @scrutinizer ignore-type */ $ret);
Loading history...
192
                if ($i >= 13) {
193
                    exit;
0 ignored issues
show
Best Practice introduced by
Using exit here is not recommended.

In general, usage of exit should be done with care and only when running in a scripting context like a CLI script.

Loading history...
Bug Best Practice introduced by
In this branch, the function will implicitly return null which is incompatible with the type-hinted return array. Consider adding a return statement or allowing null as return value.

For hinted functions/methods where all return statements with the correct type are only reachable via conditions, ?null? gets implicitly returned which may be incompatible with the hinted type. Let?s take a look at an example:

interface ReturnsInt {
    public function returnsIntHinted(): int;
}

class MyClass implements ReturnsInt {
    public function returnsIntHinted(): int
    {
        if (foo()) {
            return 123;
        }
        // here: null is implicitly returned
    }
}
Loading history...
194
                }
195
            }
196
197
            if ($epoch > $lastRead) {
198
                $lastRead = $epoch;
199
            }
200
201
            if ($epoch === $notBefore) {
202
                if (!$lastlineflip) {
203
                    if (sha1($logline) === $lastlinehash) {
204
                        $lastlineflip = true;
205
                    }
206
                    continue;
207
                }
208
            }
209
210
            if ($epoch < $notBefore) {
211
                continue;
212
            }
213
214
            // Iterate all the statrules from config.
215
            foreach ($this->statrules as $rulename => $rule) {
216
                $type = 'aggregate';
217
218
                if (array_key_exists('type', $rule)) {
219
                    $type = $rule['type'];
220
                }
221
222
                if ($type !== 'aggregate') {
223
                    continue;
224
                }
225
226
                foreach ($this->timeres as $tres => $tresconfig) {
227
                    $dh = 'default';
228
                    if (isset($tresconfig['customDateHandler'])) {
229
                        $dh = $tresconfig['customDateHandler'];
230
                    }
231
232
                    $timeslot = $datehandler['default']->toSlot($epoch, $tresconfig['slot']);
233
                    $fileslot = $datehandler[$dh]->toSlot($epoch, intval($tresconfig['fileslot']));
234
235
                    if (isset($rule['action']) && ($action !== $rule['action'])) {
236
                        continue;
237
                    }
238
239
                    $difcol = self::getDifCol($content, $rule['col']);
240
241
                    if (!isset($results[$rulename][$tres][$fileslot][$timeslot]['_'])) {
242
                        $results[$rulename][$tres][$fileslot][$timeslot]['_'] = 0;
243
                    }
244
                    if (!isset($results[$rulename][$tres][$fileslot][$timeslot][$difcol])) {
245
                        $results[$rulename][$tres][$fileslot][$timeslot][$difcol] = 0;
246
                    }
247
248
                    $results[$rulename][$tres][$fileslot][$timeslot]['_']++;
249
                    $results[$rulename][$tres][$fileslot][$timeslot][$difcol]++;
250
                }
251
            }
252
        }
253
        $this->metadata['notBefore'] = $lastRead;
254
        $this->metadata['lastline'] = $lastlogline;
255
        $this->metadata['lastlinehash'] = sha1($lastlogline);
256
        return $results;
257
    }
258
259
260
    /**
261
     * @param array $content
262
     * @param mixed $colrule
263
     * @return string
264
     */
265
    private static function getDifCol(array $content, $colrule): string
266
    {
267
        if (is_int($colrule)) {
268
            return trim($content[$colrule]);
269
        } elseif (is_array($colrule)) {
270
            $difcols = [];
271
            foreach ($colrule as $cr) {
272
                $difcols[] = trim($content[$cr]);
273
            }
274
            return join('|', $difcols);
275
        } else {
276
            return 'NA';
277
        }
278
    }
279
280
281
    /**
282
     * @param mixed $previous
283
     * @param array $newdata
284
     * @return array
285
     */
286
    private function cummulateData($previous, array $newdata): array
287
    {
288
        $dataset = [];
289
        foreach (func_get_args() as $item) {
290
            foreach ($item as $slot => $dataarray) {
291
                if (!array_key_exists($slot, $dataset)) {
292
                    $dataset[$slot] = [];
293
                }
294
                foreach ($dataarray as $key => $data) {
295
                    if (!array_key_exists($key, $dataset[$slot])) {
296
                        $dataset[$slot][$key] = 0;
297
                    }
298
                    $dataset[$slot][$key] += $data;
299
                }
300
            }
301
        }
302
        return $dataset;
303
    }
304
305
306
    /**
307
     * @param array $results
308
     */
309
    public function store(array $results): void
310
    {
311
        $datehandler = [
312
            'default' => new DateHandler($this->offset),
313
            'month' => new DateHandlerMonth($this->offset),
314
        ];
315
316
        // Iterate the first level of results, which is per rule, as defined in the config.
317
        foreach ($results as $rulename => $timeresdata) {
318
            // Iterate over time resolutions
319
            foreach ($timeresdata as $tres => $resres) {
320
                $dh = 'default';
321
                if (isset($this->timeres[$tres]['customDateHandler'])) {
322
                    $dh = $this->timeres[$tres]['customDateHandler'];
323
                }
324
325
                $filenos = array_keys($resres);
326
                $lastfile = $filenos[count($filenos) - 1];
327
328
                // Iterate the second level of results, which is the fileslot.
329
                foreach ($resres as $fileno => $fileres) {
330
                    // Slots that have data.
331
                    $slotlist = array_keys($fileres);
332
333
                    // The last slot.
334
                    $maxslot = $slotlist[count($slotlist) - 1];
335
336
                    // Get start and end slot number within the file, based on the fileslot.
337
                    $start = $datehandler['default']->toSlot(
338
                        $datehandler[$dh]->fromSlot($fileno, $this->timeres[$tres]['fileslot']),
339
                        $this->timeres[$tres]['slot'],
340
                    );
341
                    $end = $datehandler['default']->toSlot(
342
                        $datehandler[$dh]->fromSlot($fileno + 1, $this->timeres[$tres]['fileslot']),
343
                        $this->timeres[$tres]['slot'],
344
                    );
345
346
                    // Fill in missing entries and sort file results
347
                    $filledresult = [];
348
                    for ($slot = $start; $slot < $end; $slot++) {
349
                        if (array_key_exists($slot, $fileres)) {
350
                            $filledresult[$slot] = $fileres[$slot];
351
                        } else {
352
                            if ($lastfile == $fileno && $slot > $maxslot) {
353
                                $filledresult[$slot] = ['_' => null];
354
                            } else {
355
                                $filledresult[$slot] = ['_' => 0];
356
                            }
357
                        }
358
                    }
359
360
                    $filename = $this->statdir . '/' . $rulename . '-' . $tres . '-' . $fileno . '.stat';
361
                    if (file_exists($filename)) {
362
                        $previousData = unserialize(file_get_contents($filename));
363
                        $filledresult = $this->cummulateData($previousData, $filledresult);
364
                    }
365
366
                    // store file
367
                    file_put_contents($filename, serialize($filledresult), LOCK_EX);
368
                }
369
            }
370
        }
371
        $this->saveMetadata();
372
    }
373
}
374