Passed
Push — master ( 0597c0...d5c4bf )
by
unknown
51:21 queued 37:25
created

MissingFilesCommand::getFileNameWithoutAnchor()   A

Complexity

Conditions 2
Paths 2

Size

Total Lines 6
Code Lines 3

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 2
eloc 3
c 1
b 0
f 0
nc 2
nop 1
dl 0
loc 6
rs 10
1
<?php
2
declare(strict_types = 1);
3
namespace TYPO3\CMS\Lowlevel\Command;
4
5
/*
6
 * This file is part of the TYPO3 CMS project.
7
 *
8
 * It is free software; you can redistribute it and/or modify it under
9
 * the terms of the GNU General Public License, either version 2
10
 * of the License, or any later version.
11
 *
12
 * For the full copyright and license information, please read the
13
 * LICENSE.txt file that was distributed with this source code.
14
 *
15
 * The TYPO3 project - inspiring people to share!
16
 */
17
18
use Symfony\Component\Console\Command\Command;
19
use Symfony\Component\Console\Input\InputInterface;
20
use Symfony\Component\Console\Input\InputOption;
21
use Symfony\Component\Console\Output\OutputInterface;
22
use Symfony\Component\Console\Style\SymfonyStyle;
23
use TYPO3\CMS\Backend\Command\ProgressListener\ReferenceIndexProgressListener;
24
use TYPO3\CMS\Core\Core\Bootstrap;
25
use TYPO3\CMS\Core\Core\Environment;
26
use TYPO3\CMS\Core\Database\ConnectionPool;
27
use TYPO3\CMS\Core\Database\ReferenceIndex;
28
use TYPO3\CMS\Core\Utility\GeneralUtility;
29
30
/**
31
 * Finds files which are referenced by TYPO3 but not found in the file system
32
 */
33
class MissingFilesCommand extends Command
34
{
35
36
    /**
37
     * Configure the command by defining the name, options and arguments
38
     */
39
    public function configure()
40
    {
41
        $this
42
            ->setDescription('Find all file references from records pointing to a missing (non-existing) file.')
43
            ->setHelp('
44
Assumptions:
45
- a perfect integrity of the reference index table (always update the reference index table before using this tool!)
46
- relevant soft reference parsers applied everywhere file references are used inline
47
48
Files may be missing for these reasons (except software bugs):
49
- someone manually deleted the file inside fileadmin/ or another user maintained folder. If the reference was a soft reference (opposite to a DataHandler managed file relation from "group" type fields), technically it is not an error although it might be a mistake that someone did so.
50
- someone manually deleted the file inside the uploads/ folder (typically containing managed files) which is an error since no user interaction should take place there.
51
52
Manual repair suggestions (using --dry-run):
53
- Managed files: You might be able to locate the file and re-insert it in the correct location. However, no automatic fix can do that for you.
54
- Soft References: You should investigate each case and edit the content accordingly. A soft reference to a file could be in an HTML image tag (for example <img src="missing_file.jpg" />) and you would have to either remove the whole tag, change the filename or re-create the missing file.
55
56
If the option "--dry-run" is not set, all managed files (TCA/FlexForm attachments) will silently remove the reference
57
from the record since the file is missing. For this reason you might prefer a manual approach instead.
58
All soft references with missing files require manual fix if you consider it an error.
59
60
If you want to get more detailed information, use the --verbose option.')
61
            ->addOption(
62
                'dry-run',
63
                null,
64
                InputOption::VALUE_NONE,
65
                'If this option is set, the references will not be removed, but just the output which files would be deleted are shown'
66
            )
67
            ->addOption(
68
                'update-refindex',
69
                null,
70
                InputOption::VALUE_NONE,
71
                'Setting this option automatically updates the reference index and does not ask on command line. Alternatively, use -n to avoid the interactive mode'
72
            );
73
    }
74
75
    /**
76
     * Executes the command to
77
     * - optionally update the reference index (to have clean data)
78
     * - find data in sys_refindex (softrefs and regular references) where the actual file does not exist (anymore)
79
     * - remove these files if --dry-run is not set (not possible for refindexes)
80
     *
81
     * @param InputInterface $input
82
     * @param OutputInterface $output
83
     * @return int
84
     */
85
    protected function execute(InputInterface $input, OutputInterface $output)
86
    {
87
        // Make sure the _cli_ user is loaded
88
        Bootstrap::initializeBackendAuthentication();
89
90
        $io = new SymfonyStyle($input, $output);
91
        $io->title($this->getDescription());
92
93
        $dryRun = $input->hasOption('dry-run') && $input->getOption('dry-run') != false ? true : false;
94
95
        // Update the reference index
96
        $this->updateReferenceIndex($input, $io);
97
98
        // Find missing soft references (cannot be updated / deleted)
99
        $missingSoftReferencedFiles = $this->findMissingSoftReferencedFiles();
100
        if (count($missingSoftReferencedFiles)) {
101
            $io->note('Found ' . count($missingSoftReferencedFiles) . ' soft-referenced files that need manual repair.');
102
            $io->listing($missingSoftReferencedFiles);
103
        }
104
105
        // Find missing references
106
        $missingReferencedFiles = $this->findMissingReferencedFiles();
107
        if (count($missingReferencedFiles)) {
108
            $io->note('Found ' . count($missingReferencedFiles) . ' references to non-existing files.');
109
110
            $this->removeReferencesToMissingFiles($missingReferencedFiles, $dryRun, $io);
111
            $io->success('All references were updated accordingly.');
112
        }
113
114
        if (!count($missingSoftReferencedFiles) && !count($missingReferencedFiles)) {
115
            $io->success('Nothing to do, no missing files found. Everything is in place.');
116
        }
117
        return 0;
118
    }
119
120
    /**
121
     * Function to update the reference index
122
     * - if the option --update-refindex is set, do it
123
     * - otherwise, if in interactive mode (not having -n set), ask the user
124
     * - otherwise assume everything is fine
125
     *
126
     * @param InputInterface $input holds information about entered parameters
127
     * @param SymfonyStyle $io necessary for outputting information
128
     */
129
    protected function updateReferenceIndex(InputInterface $input, SymfonyStyle $io)
130
    {
131
        // Check for reference index to update
132
        $io->note('Finding missing files referenced by TYPO3 requires a clean reference index (sys_refindex)');
133
        if ($input->hasOption('update-refindex') && $input->getOption('update-refindex')) {
134
            $updateReferenceIndex = true;
135
        } elseif ($input->isInteractive()) {
136
            $updateReferenceIndex = $io->confirm('Should the reference index be updated right now?', false);
137
        } else {
138
            $updateReferenceIndex = false;
139
        }
140
141
        // Update the reference index
142
        if ($updateReferenceIndex) {
143
            $progressListener = GeneralUtility::makeInstance(ReferenceIndexProgressListener::class);
144
            $progressListener->initialize($io);
145
            $referenceIndex = GeneralUtility::makeInstance(ReferenceIndex::class);
146
            $referenceIndex->updateIndex(false, $progressListener);
147
        } else {
148
            $io->writeln('Reference index is assumed to be up to date, continuing.');
149
        }
150
    }
151
152
    /**
153
     * Find file references that points to non-existing files in system
154
     * Fix methods: API in \TYPO3\CMS\Core\Database\ReferenceIndex that allows to
155
     * change the value of a reference (or remove it)
156
     *
157
     * @return array an array of records within sys_refindex
158
     */
159
    protected function findMissingReferencedFiles(): array
160
    {
161
        $missingReferences = [];
162
        // Select all files in the reference table
163
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
164
            ->getQueryBuilderForTable('sys_refindex');
165
166
        $result = $queryBuilder
167
            ->select('*')
168
            ->from('sys_refindex')
169
            ->where(
170
                $queryBuilder->expr()->eq('ref_table', $queryBuilder->createNamedParameter('_FILE', \PDO::PARAM_STR)),
171
                $queryBuilder->expr()->isNull('softref_key')
172
            )
173
            ->execute();
174
175
        // Traverse the references and check if the files exists
176
        while ($record = $result->fetch()) {
177
            $fileName = $this->getFileNameWithoutAnchor($record['ref_string']);
178
            if (empty($record['softref_key']) && !@is_file(Environment::getPublicPath() . '/' . $fileName)) {
179
                $missingReferences[$fileName][$record['hash']] = $this->formatReferenceIndexEntryToString($record);
180
            }
181
        }
182
183
        return $missingReferences;
184
    }
185
186
    /**
187
     * Find file references that points to non-existing files in system
188
     * registered as soft references (checked for "softref_key")
189
     *
190
     * @return array an array of the data within soft references
191
     */
192
    protected function findMissingSoftReferencedFiles(): array
193
    {
194
        $missingReferences = [];
195
        // Select all files in the reference table
196
        $queryBuilder = GeneralUtility::makeInstance(ConnectionPool::class)
197
            ->getQueryBuilderForTable('sys_refindex');
198
199
        $result = $queryBuilder
200
            ->select('*')
201
            ->from('sys_refindex')
202
            ->where(
203
                $queryBuilder->expr()->eq('ref_table', $queryBuilder->createNamedParameter('_FILE', \PDO::PARAM_STR)),
204
                $queryBuilder->expr()->isNotNull('softref_key')
205
            )
206
            ->execute();
207
208
        // Traverse the references and check if the files exists
209
        while ($record = $result->fetch()) {
210
            $fileName = $this->getFileNameWithoutAnchor($record['ref_string']);
211
            if (!@is_file(Environment::getPublicPath() . '/' . $fileName)) {
212
                $missingReferences[] = $fileName . ' - ' . $record['hash'] . ' - ' . $this->formatReferenceIndexEntryToString($record);
213
            }
214
        }
215
        return $missingReferences;
216
    }
217
218
    /**
219
     * Remove a possible anchor like 'my-path/file.pdf#page15'
220
     *
221
     * @param string $fileName a filename as found in sys_refindex.ref_string
222
     * @return string the filename but leaving everything behind #page15 behind
223
     */
224
    protected function getFileNameWithoutAnchor(string $fileName): string
225
    {
226
        if (strpos($fileName, '#') !== false) {
227
            [$fileName] = explode('#', $fileName);
228
        }
229
        return $fileName;
230
    }
231
232
    /**
233
     * Removes all references in the sys_file_reference where files were not found
234
     *
235
     * @param array $missingManagedFiles Contains the records of sys_refindex which need to be updated
236
     * @param bool $dryRun if set, the references are just displayed, but not removed
237
     * @param SymfonyStyle $io the IO object for output
238
     */
239
    protected function removeReferencesToMissingFiles(array $missingManagedFiles, bool $dryRun, SymfonyStyle $io)
240
    {
241
        foreach ($missingManagedFiles as $fileName => $references) {
242
            if ($io->isVeryVerbose()) {
243
                $io->writeln('Deleting references to missing file "' . $fileName . '"');
244
            }
245
            foreach ($references as $hash => $recordReference) {
246
                $io->writeln('Removing reference in record "' . $recordReference . '"');
247
                if (!$dryRun) {
248
                    $sysRefObj = GeneralUtility::makeInstance(ReferenceIndex::class);
249
                    $error = $sysRefObj->setReferenceValue($hash, null);
250
                    if ($error) {
251
                        $io->error('ReferenceIndex::setReferenceValue() reported "' . $error . '"');
252
                    }
253
                }
254
            }
255
        }
256
    }
257
258
    /**
259
     * Formats a sys_refindex entry to something readable
260
     *
261
     * @param array $record
262
     * @return string
263
     */
264
    protected function formatReferenceIndexEntryToString(array $record): string
265
    {
266
        return $record['tablename'] . ':' . $record['recuid'] . ':' . $record['field'] . ':' . $record['flexpointer'] . ':' . $record['softref_key'] . ($record['deleted'] ? ' (DELETED)' : '');
267
    }
268
}
269