1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace AppBundle\ShowUnusedPublicAssets; |
4
|
|
|
|
5
|
|
|
use Helper\FileSystem; |
6
|
|
|
use Helper\NullStyle; |
7
|
|
|
use Symfony\Component\Console\Style\OutputStyle; |
8
|
|
|
use Symfony\Component\Finder\Finder; |
9
|
|
|
|
10
|
|
|
/** |
11
|
|
|
* Get all asset files in the public web directory and it's subdirecories that were not accessed according to webserver |
12
|
|
|
* logs. |
13
|
|
|
*/ |
14
|
|
|
final class Task |
15
|
|
|
{ |
16
|
|
|
/** |
17
|
|
|
* @param string $pathToPublic |
18
|
|
|
* @param string $pathToLogfile |
19
|
|
|
* @param string $regExpToFindFile |
20
|
|
|
* @param string|null $pathToOutput |
21
|
|
|
* @param string|null $pathToBlacklist |
22
|
|
|
* @param OutputStyle|null $ioStyle |
23
|
|
|
*/ |
24
|
|
|
public function getUnusedPublicAssets($pathToPublic, $pathToLogfile, $regExpToFindFile, $pathToOutput, $pathToBlacklist, OutputStyle $ioStyle = null) |
25
|
|
|
{ |
26
|
|
|
$ioStyle = $ioStyle ?: new NullStyle(); |
27
|
|
|
$ioStyle->text('Started.'); |
28
|
|
|
|
29
|
|
|
$accessedUrls = $this->getAccessedUrls($pathToPublic, $pathToLogfile, $regExpToFindFile); |
30
|
|
|
$ioStyle->text('Found ' . count($accessedUrls) . ' distinct accessed URLs.'); |
31
|
|
|
|
32
|
|
|
$blacklistingRegExps = FileSystem::getBlacklistingRegExps($pathToBlacklist); |
33
|
|
|
$foundFilesInfos = iterator_to_array((new Finder())->in($pathToPublic)->files()->getIterator()); |
34
|
|
|
$relevantPublicAssets = FileSystem::filterFilesIn($foundFilesInfos, $blacklistingRegExps); |
35
|
|
|
|
36
|
|
|
$message = 'Found ' . count($relevantPublicAssets) . ' public assets'; |
37
|
|
|
$numberOfBlacklistingRegExps = count($blacklistingRegExps); |
38
|
|
|
if ($numberOfBlacklistingRegExps > 0) { |
39
|
|
|
$message .= ' not matched by the ' . $numberOfBlacklistingRegExps . ' blacklisting regular expressions'; |
40
|
|
|
} |
41
|
|
|
$ioStyle->text($message . ' in ' . $pathToPublic . '.'); |
42
|
|
|
|
43
|
|
|
$unusedAssets = array_diff($relevantPublicAssets, $accessedUrls); |
44
|
|
|
sort($unusedAssets); |
45
|
|
|
|
46
|
|
|
$pathToOutput = FileSystem::getPathToOutput($pathToOutput, $pathToPublic, 'potentially-unused-public-assets.txt'); |
47
|
|
|
FileSystem::writeArrayToFile($unusedAssets, $pathToOutput); |
48
|
|
|
$ioStyle->success([ |
49
|
|
|
'Finished writing list of ' . count($unusedAssets) . ' potentially unused public assets. Please inspect the ' |
50
|
|
|
. 'output file ' . $pathToOutput, |
51
|
|
|
'For files you want to keep (even if they are not used according to the webserver access logs), you ' |
52
|
|
|
. 'can maintain a blacklist. With it, you can exclude these files from the output of further runs of ' |
53
|
|
|
. 'this command. See --help or the readme for details.', |
54
|
|
|
'Once you are sure you can restore the rest of the files (ideally from your version control system), try ' |
55
|
|
|
. 'deleting them, e.g. with "xargs rm < ' . $pathToOutput . '", rerun your tests and check your logs ' |
56
|
|
|
. 'for 404s to see if that broke anything.', |
57
|
|
|
]); |
58
|
|
|
} |
59
|
|
|
|
60
|
|
|
/** |
61
|
|
|
* @param string $pathToPublic |
62
|
|
|
* @param string $pathToLogfile |
63
|
|
|
* @param string $regExpToFindFile |
64
|
|
|
* @return string[] |
65
|
|
|
*/ |
66
|
|
|
private function getAccessedUrls($pathToPublic, $pathToLogfile, $regExpToFindFile) |
67
|
|
|
{ |
68
|
|
|
$logEntries = FileSystem::readFileIntoArray($pathToLogfile); |
69
|
|
|
$usedAssets = []; |
70
|
|
|
$regExpMatches = []; |
71
|
|
|
|
72
|
|
|
foreach ($logEntries as $logEntry) { |
73
|
|
|
if (preg_match($regExpToFindFile, $logEntry, $regExpMatches) === 1) { |
74
|
|
|
$usedAssets[] = realpath($pathToPublic . $regExpMatches[1]); |
75
|
|
|
} |
76
|
|
|
} |
77
|
|
|
|
78
|
|
|
$usedAssets = array_unique($usedAssets); |
79
|
|
|
sort($usedAssets); |
80
|
|
|
|
81
|
|
|
return $usedAssets; |
82
|
|
|
} |
83
|
|
|
} |
84
|
|
|
|