FilenameSanitizer::sanitize()   A
last analyzed

Complexity

Conditions 5
Paths 16

Size

Total Lines 21
Code Lines 10

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 11
CRAP Score 5

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 5
eloc 10
c 1
b 0
f 0
nc 16
nop 1
dl 0
loc 21
ccs 11
cts 11
cp 1
crap 5
rs 9.6111
1
<?php
2
3
/**
4
 * Copyright (c) Florian Krämer (https://florian-kraemer.net)
5
 * Licensed under The MIT License
6
 * For full copyright and license information, please see the LICENSE.txt
7
 * Redistributions of files must retain the above copyright notice.
8
 *
9
 * @copyright Copyright (c) Florian Krämer (https://florian-kraemer.net)
10
 * @author    Florian Krämer
11
 * @link      https://github.com/Phauthentic
12
 * @license   https://opensource.org/licenses/MIT MIT License
13
 */
14
15
declare(strict_types=1);
16
17
namespace Phauthentic\Infrastructure\Storage\Utility;
18
19
/**
20
 * Filename Sanitizer
21
 *
22
 * @link https://stackoverflow.com/questions/2021624/string-sanitizer-for-filename
23
 */
24
class FilenameSanitizer implements FilenameSanitizerInterface
25
{
26
    /**
27
     * @var array
28
     */
29
    protected array $config = [];
30
31
    /**
32
     * @var array
33
     */
34
    protected array $defaultConfig = [
35
        'lowercase' => false,
36
        'removeAllNonAlphaNumerical' => false,
37
        'beautify' => true,
38
        'enforceMaxLength' => true,
39
        'maxLength' => 255,
40
        'removeControlChars' => true,
41
        'removeNonPrintingChars' => true,
42
        'removeUriReservedChars' => false,
43
        'urlSafe' => false,
44
    ];
45
46
    /**
47
     * File system reserved characters
48
     *
49
     * @link https://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words
50
     * @var string
51
     */
52
    protected string $filesystemReservedChars = '[<>:"/\\|?*]';
53
54
    /**
55
     * URL unsafe characters
56
     *
57
     * @link https://www.ietf.org/rfc/rfc1738.txt
58
     * @var string
59
     */
60
    protected string $urlUnsafeChars = '[{}^\~`]';
61
62
    /**
63
     * @link https://tools.ietf.org/html/rfc3986#section-2.2
64
     * @var string
65
     */
66
    protected string $uriReservedChars = '[#\[\]@!$&\'()+,;=]';
67
68
    /**
69
     * Non-printing characters DEL, NO-BREAK SPACE, SOFT HYPHEN
70
     *
71
     * @var string
72
     */
73
    protected string $nonPrintingChars = '[\x7F\xA0\xAD]';
74
75
    /**
76
     * Control Characters
77
     *
78
     * @link http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247%28v=vs.85%29.aspx
79
     * @var string
80
     */
81
    protected string $controlChars = '[\x00-\x1F]';
82
83
    /**
84
     * @param array $config Config array
85
     */
86 9
    public function __construct(array $config = [])
87
    {
88 9
        $this->config = $config + $this->defaultConfig;
89 9
    }
90
91
    /**
92
     * @param string $filename Filename
93
     * @param string $replacement Replacement character
94
     * @return string
95
     */
96 8
    protected function replaceCharacters(string $filename, string $replacement = '-'): string
97
    {
98 8
        $regex = [];
99 8
        $regex[] = $this->filesystemReservedChars;
100 8
        $regex[] = $this->config['urlSafe'] === true ? $this->urlUnsafeChars : '';
101 8
        $regex[] = $this->config['removeUriReservedChars'] === true ? $this->uriReservedChars : '';
102 8
        $regex[] = $this->config['removeNonPrintingChars'] === true ? $this->nonPrintingChars : '';
103 8
        $regex[] = $this->config['removeControlChars'] === true ? $this->controlChars : '';
104 8
        $regex = '~' . implode('|', array_filter($regex)) . '~x';
105
106 8
        return (string)preg_replace($regex, $replacement, $filename);
107
    }
108
109
    /**
110
     * @param string $string String
111
     * @return string
112
     */
113 8
    public function sanitize(string $string): string
114
    {
115 8
        $string = $this->replaceCharacters($string);
116
117 8
        if ($this->config['lowercase'] === true) {
118 1
            $string = $this->stringToLowerCase($string);
119
        }
120
121 8
        if ($this->config['removeAllNonAlphaNumerical']) {
122 1
            $string = $this->removeAllNonAlphaNumerical($string);
123
        }
124
125 8
        if ($this->config['beautify'] === true) {
126 8
            $string = $this->beautify($string);
127
        }
128
129 8
        if ($this->config['enforceMaxLength'] === true) {
130 8
            $string = $this->enforceMaxLength($string, $this->config['maxLength']);
131
        }
132
133 8
        return $string;
134
    }
135
136
    /**
137
     * Enforces the max length of a filename
138
     *
139
     * @link http://en.wikipedia.org/wiki/Comparison_of_file_systems#Limits
140
     * @link http://serverfault.com/a/9548/44086
141
     * @param string $filename Filename
142
     * @param int $maxLength Max length, 255 by default
143
     * @return string
144
     */
145 8
    protected function enforceMaxLength(string $filename, int $maxLength = 255): string
146
    {
147 8
        $ext = pathinfo($filename, PATHINFO_EXTENSION);
148 8
        $length = $maxLength - ($ext ? strlen($ext) + 1 : 0);
0 ignored issues
show
Bug introduced by
It seems like $ext can also be of type array; however, parameter $string of strlen() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

148
        $length = $maxLength - ($ext ? strlen(/** @scrutinizer ignore-type */ $ext) + 1 : 0);
Loading history...
149
150 8
        $filename = mb_strcut(
151 8
            pathinfo($filename, PATHINFO_FILENAME),
0 ignored issues
show
Bug introduced by
It seems like pathinfo($filename, Phau...lity\PATHINFO_FILENAME) can also be of type array; however, parameter $string of mb_strcut() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

151
            /** @scrutinizer ignore-type */ pathinfo($filename, PATHINFO_FILENAME),
Loading history...
152 8
            0,
153
            $length,
154 8
            mb_detect_encoding($filename)
155
        );
156
157 8
        return $filename . ($ext ? '.' . $ext : '');
0 ignored issues
show
Bug introduced by
Are you sure $ext of type array|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

157
        return $filename . ($ext ? '.' . /** @scrutinizer ignore-type */ $ext : '');
Loading history...
158
    }
159
160
    /**
161
     * Beautifies a filename to make it better to read
162
     *
163
     * "file   name.zip" becomes "file-name.zip"
164
     * "file___name.zip" becomes "file-name.zip"
165
     * "file---name.zip" becomes "file-name.zip"
166
     * "file--.--.-.--name.zip" becomes "file.name.zip"
167
     * "file...name..zip" becomes "file.name.zip"
168
     * ".file-name.-" becomes "file-name"
169
     *
170
     * @link https://stackoverflow.com/questions/2021624/string-sanitizer-for-filename
171
     * @param string $filename Filename
172
     * @return string
173
     */
174 9
    public function beautify(string $filename): string
175
    {
176
        // reduce consecutive characters
177 9
        $filename = (string)preg_replace([
178
            // "file   name.zip" becomes "file-name.zip"
179 9
            '/ +/',
180
            // "file___name.zip" becomes "file-name.zip"
181
            '/_+/',
182
            // "file---name.zip" becomes "file-name.zip"
183
            '/-+/'
184 9
        ], '-', $filename);
185
186 9
        $filename = (string)preg_replace([
187
            // "file--.--.-.--name.zip" becomes "file.name.zip"
188 9
            '/-*\.-*/',
189
            // "file...name..zip" becomes "file.name.zip"
190
            '/\.{2,}/'
191 9
        ], '.', $filename);
192
193
        // lowercase for windows/unix interoperability http://support.microsoft.com/kb/100625
194 9
        $filename = mb_strtolower($filename, mb_detect_encoding($filename));
195
196
        // ".file-name.-" becomes "file-name"
197 9
        $filename = trim($filename, '.-');
198
199 9
        return $filename;
200
    }
201
202
    /**
203
     * @param string $string String
204
     * @return string
205
     */
206 1
    protected function removeAllNonAlphaNumerical(string $string): string
207
    {
208 1
        $pathInfo = PathInfo::for($string);
209 1
        $string = (string)preg_replace('/[^a-zA-Z0-9]/', '', $pathInfo->filename());
210
211 1
        if (!$pathInfo->hasExtension()) {
212
            return $string;
213
        }
214
215 1
        return $string . '.' . $pathInfo->extension();
216
    }
217
218
    /**
219
     * @param string $string String
220
     * @param string $encoding Encoding
221
     * @return string
222
     */
223 1
    protected function stringToLowerCase(
224
        string $string,
225
        string $encoding = 'UTF-8'
226
    ): string {
227 1
        return ((function_exists('mb_strtolower')) ?
228 1
            mb_strtolower($string, $encoding) :
229 1
            strtolower($string)
230
        );
231
    }
232
}
233