Passed
Push — master ( 05c3b6...3a433f )
by Malte
02:38
created

Task::getUnusedPublicAssets()   B

Complexity

Conditions 3
Paths 2

Size

Total Lines 33
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 33
rs 8.8571
cc 3
eloc 25
nc 2
nop 6
1
<?php
2
3
namespace AppBundle\ShowUnusedPublicAssets;
4
5
use Helper\FileSystem;
6
use Helper\NullStyle;
7
use Symfony\Component\Console\Style\OutputStyle;
8
use Symfony\Component\Finder\Finder;
9
10
/**
11
 * Get all asset files in the public web directory and it's subdirecories that were not accessed according to webserver
12
 * logs.
13
 */
14
final class Task
15
{
16
    /**
17
     * @param string $pathToPublic
18
     * @param string $pathToLogfile
19
     * @param string $regExpToFindFile
20
     * @param string|null $pathToOutput
21
     * @param string|null $pathToBlacklist
22
     * @param OutputStyle|null $ioStyle
23
     */
24
    public function getUnusedPublicAssets($pathToPublic, $pathToLogfile, $regExpToFindFile, $pathToOutput, $pathToBlacklist, OutputStyle $ioStyle = null)
25
    {
26
        $ioStyle = $ioStyle ?: new NullStyle();
27
        $ioStyle->text('Started.');
28
29
        $accessedUrls = $this->getAccessedUrls($pathToPublic, $pathToLogfile, $regExpToFindFile);
30
        $ioStyle->text('Found ' . count($accessedUrls) . ' distinct accessed URLs.');
31
32
        $blacklistingRegExps = FileSystem::getBlacklistingRegExps($pathToBlacklist);
33
        $foundFilesInfos = iterator_to_array((new Finder())->in($pathToPublic)->files()->getIterator());
34
        $relevantPublicAssets = FileSystem::filterFilesIn($foundFilesInfos, $blacklistingRegExps);
35
36
        $message = 'Found ' . count($relevantPublicAssets) . ' public assets';
37
        $numberOfBlacklistingRegExps = count($blacklistingRegExps);
38
        if ($numberOfBlacklistingRegExps > 0) {
39
            $message .= ' not matched by the ' . $numberOfBlacklistingRegExps . ' blacklisting regular expressions';
40
        }
41
        $ioStyle->text($message . ' in ' . $pathToPublic . '.');
42
43
        $unusedAssets = array_diff($relevantPublicAssets, $accessedUrls);
44
        sort($unusedAssets);
45
46
        $pathToOutput = FileSystem::getPathToOutput($pathToOutput, $pathToPublic, 'potentially-unused-public-assets.txt');
47
        FileSystem::writeArrayToFile($unusedAssets, $pathToOutput);
48
        $ioStyle->success([
49
            'Finished writing list of ' . count($unusedAssets) . ' potentially unused public assets. Please inspect the '
50
                . 'output file ' . $pathToOutput,
51
            'For files you want to keep (even if they are not used according to the webserver access logs), you '
52
                . 'can maintain a blacklist. With it, you can exclude these files from the output of further runs of '
53
                . 'this command. See --help or the readme for details.',
54
            'Once you are sure you can restore the rest of the files (ideally from your version control system), try '
55
                . 'deleting them, e.g. with "xargs rm < ' . $pathToOutput . '", rerun your tests and check your logs '
56
                . 'for 404s to see if that broke anything.',
57
        ]);
58
    }
59
60
    /**
61
     * @param string $pathToPublic
62
     * @param string $pathToLogfile
63
     * @param string $regExpToFindFile
64
     * @return string[]
65
     */
66
    private function getAccessedUrls($pathToPublic, $pathToLogfile, $regExpToFindFile)
67
    {
68
        $logEntries = FileSystem::readFileIntoArray($pathToLogfile);
69
        $usedAssets = [];
70
        $regExpMatches = [];
71
72
        foreach ($logEntries as $logEntry) {
73
            if (preg_match($regExpToFindFile, $logEntry, $regExpMatches) === 1) {
74
                $usedAssets[] = realpath($pathToPublic . $regExpMatches[1]);
75
            }
76
        }
77
78
        $usedAssets = array_unique($usedAssets);
79
        sort($usedAssets);
80
81
        return $usedAssets;
82
    }
83
}
84