1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
declare(strict_types=1); |
4
|
|
|
|
5
|
|
|
namespace SimpleSAML\Module\statistics; |
6
|
|
|
|
7
|
|
|
use Exception; |
8
|
|
|
use SimpleSAML\Configuration; |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* @package SimpleSAMLphp |
12
|
|
|
*/ |
13
|
|
|
class LogCleaner |
14
|
|
|
{ |
15
|
|
|
/** @var \SimpleSAML\Configuration */ |
16
|
|
|
private Configuration $statconfig; |
17
|
|
|
|
18
|
|
|
/** @var string */ |
19
|
|
|
private string $statdir; |
20
|
|
|
|
21
|
|
|
/** @var string */ |
22
|
|
|
private string $inputfile; |
23
|
|
|
|
24
|
|
|
/** @var array */ |
25
|
|
|
private array $statrules; |
26
|
|
|
|
27
|
|
|
/** @var int */ |
28
|
|
|
private int $offset; |
29
|
|
|
|
30
|
|
|
|
31
|
|
|
/** |
32
|
|
|
* Constructor |
33
|
|
|
* |
34
|
|
|
* @param string|null $inputfile |
35
|
|
|
*/ |
36
|
|
|
public function __construct(?string $inputfile = null) |
37
|
|
|
{ |
38
|
|
|
$this->statconfig = Configuration::getConfig('module_statistics.php'); |
39
|
|
|
|
40
|
|
|
$this->statdir = $this->statconfig->getString('statdir'); |
41
|
|
|
$this->inputfile = $this->statconfig->getString('inputfile'); |
42
|
|
|
$this->statrules = $this->statconfig->getValue('statrules'); |
43
|
|
|
$this->offset = $this->statconfig->getOptionalInteger('offset', 0); |
44
|
|
|
|
45
|
|
|
if (isset($inputfile)) { |
46
|
|
|
$this->inputfile = $inputfile; |
47
|
|
|
} |
48
|
|
|
} |
49
|
|
|
|
50
|
|
|
|
51
|
|
|
/** |
52
|
|
|
*/ |
53
|
|
|
public function dumpConfig(): void |
54
|
|
|
{ |
55
|
|
|
echo 'Statistics directory : ' . $this->statdir . "\n"; |
56
|
|
|
echo 'Input file : ' . $this->inputfile . "\n"; |
57
|
|
|
echo 'Offset : ' . $this->offset . "\n"; |
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
|
61
|
|
|
/** |
62
|
|
|
* @param bool $debug |
63
|
|
|
* @return array |
64
|
|
|
* @throws \Exception |
65
|
|
|
*/ |
66
|
|
|
public function clean(bool $debug = false): array |
67
|
|
|
{ |
68
|
|
|
if (!is_dir($this->statdir)) { |
69
|
|
|
throw new Exception('Statistics module: output dir does not exist [' . $this->statdir . ']'); |
70
|
|
|
} |
71
|
|
|
|
72
|
|
|
if (!file_exists($this->inputfile)) { |
73
|
|
|
throw new Exception('Statistics module: input file does not exist [' . $this->inputfile . ']'); |
74
|
|
|
} |
75
|
|
|
|
76
|
|
|
$file = fopen($this->inputfile, 'r'); |
77
|
|
|
|
78
|
|
|
$logparser = new LogParser( |
79
|
|
|
$this->statconfig->getOptionalValue('datestart', 0), |
80
|
|
|
$this->statconfig->getOptionalValue('datelength', 15), |
81
|
|
|
$this->statconfig->getOptionalValue('offsetspan', 44), |
82
|
|
|
); |
83
|
|
|
|
84
|
|
|
$sessioncounter = []; |
85
|
|
|
|
86
|
|
|
$i = 0; |
87
|
|
|
// Parse through log file, line by line |
88
|
|
|
while (!feof($file)) { |
89
|
|
|
$logline = fgets($file, 4096); |
90
|
|
|
|
91
|
|
|
// Continue if STAT is not found on line |
92
|
|
|
if (!preg_match('/STAT/', $logline)) { |
93
|
|
|
continue; |
94
|
|
|
} |
95
|
|
|
$i++; |
96
|
|
|
|
97
|
|
|
// Parse log, and extract epoch time and rest of content. |
98
|
|
|
$epoch = $logparser->parseEpoch($logline); |
99
|
|
|
$content = $logparser->parseContent($logline); |
100
|
|
|
|
101
|
|
|
if (($i % 10000) == 0) { |
102
|
|
|
echo "Read line " . $i . "\n"; |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
$trackid = $content[4]; |
106
|
|
|
|
107
|
|
|
if (!isset($sessioncounter[$trackid])) { |
108
|
|
|
$sessioncounter[$trackid] = 0; |
109
|
|
|
} |
110
|
|
|
$sessioncounter[$trackid]++; |
111
|
|
|
|
112
|
|
|
if ($debug) { |
113
|
|
|
echo "----------------------------------------\n"; |
114
|
|
|
echo 'Log line: ' . $logline . "\n"; |
115
|
|
|
echo 'Date parse [' . substr($logline, 0, $this->statconfig->getOptionalValue('datelength', 15)) . |
116
|
|
|
'] to [' . date(DATE_RFC822, $epoch) . ']' . "\n"; |
117
|
|
|
$ret = print_r($content, true); |
118
|
|
|
echo htmlentities($ret); |
|
|
|
|
119
|
|
|
if ($i >= 13) { |
120
|
|
|
exit; |
|
|
|
|
121
|
|
|
} |
122
|
|
|
} |
123
|
|
|
} |
124
|
|
|
|
125
|
|
|
$histogram = []; |
126
|
|
|
foreach ($sessioncounter as $trackid => $sc) { |
127
|
|
|
if (!isset($histogram[$sc])) { |
128
|
|
|
$histogram[$sc] = 0; |
129
|
|
|
} |
130
|
|
|
$histogram[$sc]++; |
131
|
|
|
} |
132
|
|
|
ksort($histogram); |
133
|
|
|
|
134
|
|
|
$todelete = []; |
135
|
|
|
foreach ($sessioncounter as $trackid => $sc) { |
136
|
|
|
if ($sc > 200) { |
137
|
|
|
$todelete[] = $trackid; |
138
|
|
|
} |
139
|
|
|
} |
140
|
|
|
|
141
|
|
|
return $todelete; |
142
|
|
|
} |
143
|
|
|
|
144
|
|
|
|
145
|
|
|
/** |
146
|
|
|
* @param array $todelete |
147
|
|
|
* @param string $outputfile |
148
|
|
|
* @throws \Exception |
149
|
|
|
*/ |
150
|
|
|
public function store(array $todelete, string $outputfile): void |
151
|
|
|
{ |
152
|
|
|
echo "Preparing to delete [" . count($todelete) . "] trackids\n"; |
153
|
|
|
|
154
|
|
|
if (!is_dir($this->statdir)) { |
155
|
|
|
throw new Exception('Statistics module: output dir do not exists [' . $this->statdir . ']'); |
156
|
|
|
} |
157
|
|
|
|
158
|
|
|
if (!file_exists($this->inputfile)) { |
159
|
|
|
throw new Exception('Statistics module: input file do not exists [' . $this->inputfile . ']'); |
160
|
|
|
} |
161
|
|
|
|
162
|
|
|
$file = fopen($this->inputfile, 'r'); |
163
|
|
|
|
164
|
|
|
// Open the output file in a way that guarantees that we will not overwrite a random file. |
165
|
|
|
if (file_exists($outputfile)) { |
166
|
|
|
// Delete existing output file. |
167
|
|
|
unlink($outputfile); |
168
|
|
|
} |
169
|
|
|
$outfile = fopen($outputfile, 'x'); // Create the output file |
170
|
|
|
|
171
|
|
|
$logparser = new LogParser( |
172
|
|
|
$this->statconfig->getOptionalValue('datestart', 0), |
173
|
|
|
$this->statconfig->getOptionalValue('datelength', 15), |
174
|
|
|
$this->statconfig->getOptionalValue('offsetspan', 44), |
175
|
|
|
); |
176
|
|
|
|
177
|
|
|
$i = 0; |
178
|
|
|
// Parse through log file, line by line |
179
|
|
|
while (!feof($file)) { |
180
|
|
|
$logline = fgets($file, 4096); |
181
|
|
|
|
182
|
|
|
// Continue if STAT is not found on line. |
183
|
|
|
if (!preg_match('/STAT/', $logline)) { |
184
|
|
|
continue; |
185
|
|
|
} |
186
|
|
|
$i++; |
187
|
|
|
|
188
|
|
|
$content = $logparser->parseContent($logline); |
189
|
|
|
|
190
|
|
|
if (($i % 10000) == 0) { |
191
|
|
|
echo "Read line " . $i . "\n"; |
192
|
|
|
} |
193
|
|
|
|
194
|
|
|
$trackid = $content[4]; |
195
|
|
|
if (in_array($trackid, $todelete, true)) { |
196
|
|
|
continue; |
197
|
|
|
} |
198
|
|
|
|
199
|
|
|
fputs($outfile, $logline); |
200
|
|
|
} |
201
|
|
|
fclose($file); |
202
|
|
|
fclose($outfile); |
203
|
|
|
} |
204
|
|
|
} |
205
|
|
|
|