FilenameSanitizer   A
last analyzed

Complexity

Total Complexity 21

Size/Duplication

Total Lines 214
Duplicated Lines 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
eloc 69
c 1
b 0
f 0
dl 0
loc 214
rs 10
wmc 21

7 Methods

Rating   Name   Duplication   Size   Complexity  
A __construct() 0 3 1
A replaceCharacters() 0 11 5
A sanitize() 0 21 5
A stringToLowerCase() 0 7 2
A removeAllNonAlphaNumerical() 0 10 2
A enforceMaxLength() 0 17 4
A beautify() 0 30 2
1
<?php
2
3
/**
4
 * Copyright (c) Florian Krämer (https://florian-kraemer.net)
5
 * Licensed under The MIT License
6
 * For full copyright and license information, please see the LICENSE.txt
7
 * Redistributions of files must retain the above copyright notice.
8
 *
9
 * @copyright Copyright (c) Florian Krämer (https://florian-kraemer.net)
10
 * @author    Florian Krämer
11
 * @link      https://github.com/Phauthentic
12
 * @license   https://opensource.org/licenses/MIT MIT License
13
 */
14
15
declare(strict_types=1);
16
17
namespace Phauthentic\Infrastructure\Storage\Utility;
18
19
/**
20
 * Filename Sanitizer
21
 *
22
 * @link https://stackoverflow.com/questions/2021624/string-sanitizer-for-filename
23
 */
24
class FilenameSanitizer implements FilenameSanitizerInterface
25
{
26
    /**
27
     * @var array<string, mixed>
28
     */
29
    protected array $config = [];
30
31
    /**
32
     * @var array<string, mixed>
33
     */
34
    protected array $defaultConfig = [
35
        'lowercase' => false,
36
        'removeAllNonAlphaNumerical' => false,
37
        'beautify' => true,
38
        'enforceMaxLength' => true,
39
        'maxLength' => 255,
40
        'removeControlChars' => true,
41
        'removeNonPrintingChars' => true,
42
        'removeUriReservedChars' => false,
43
        'urlSafe' => false,
44
    ];
45
46
    /**
47
     * File system reserved characters
48
     *
49
     * @link https://en.wikipedia.org/wiki/Filename#Reserved_characters_and_words
50
     * @var string
51
     */
52
    protected string $filesystemReservedChars = '[<>:"/\\|?*]';
53
54
    /**
55
     * URL unsafe characters
56
     *
57
     * @link https://www.ietf.org/rfc/rfc1738.txt
58
     * @var string
59
     */
60
    protected string $urlUnsafeChars = '[{}^\~`]';
61
62
    /**
63
     * @link https://tools.ietf.org/html/rfc3986#section-2.2
64
     * @var string
65
     */
66
    protected string $uriReservedChars = '[#\[\]@!$&\'()+,;=]';
67
68
    /**
69
     * Non-printing characters DEL, NO-BREAK SPACE, SOFT HYPHEN
70
     *
71
     * @var string
72
     */
73
    protected string $nonPrintingChars = '[\x7F\xA0\xAD]';
74
75
    /**
76
     * Control Characters
77
     *
78
     * @link http://msdn.microsoft.com/en-us/library/windows/desktop/aa365247%28v=vs.85%29.aspx
79
     * @var string
80
     */
81
    protected string $controlChars = '[\x00-\x1F]';
82
83
    /**
84
     * @param array<string, mixed> $config Config array
85
     */
86
    public function __construct(array $config = [])
87
    {
88
        $this->config = $config + $this->defaultConfig;
89
    }
90
91
    /**
92
     * @param string $filename Filename
93
     * @param string $replacement Replacement character
94
     * @return string
95
     */
96
    protected function replaceCharacters(string $filename, string $replacement = '-'): string
97
    {
98
        $regex = [];
99
        $regex[] = $this->filesystemReservedChars;
100
        $regex[] = $this->config['urlSafe'] === true ? $this->urlUnsafeChars : '';
101
        $regex[] = $this->config['removeUriReservedChars'] === true ? $this->uriReservedChars : '';
102
        $regex[] = $this->config['removeNonPrintingChars'] === true ? $this->nonPrintingChars : '';
103
        $regex[] = $this->config['removeControlChars'] === true ? $this->controlChars : '';
104
        $regex = '~' . implode('|', array_filter($regex)) . '~x';
105
106
        return (string)preg_replace($regex, $replacement, $filename);
107
    }
108
109
    /**
110
     * @param string $string String
111
     * @return string
112
     */
113
    public function sanitize(string $string): string
114
    {
115
        $string = $this->replaceCharacters($string);
116
117
        if ($this->config['lowercase'] === true) {
118
            $string = $this->stringToLowerCase($string);
119
        }
120
121
        if ($this->config['removeAllNonAlphaNumerical']) {
122
            $string = $this->removeAllNonAlphaNumerical($string);
123
        }
124
125
        if ($this->config['beautify'] === true) {
126
            $string = $this->beautify($string);
127
        }
128
129
        if ($this->config['enforceMaxLength'] === true) {
130
            $string = $this->enforceMaxLength($string, $this->config['maxLength']);
131
        }
132
133
        return $string;
134
    }
135
136
    /**
137
     * Enforces the max length of a filename
138
     *
139
     * @link http://en.wikipedia.org/wiki/Comparison_of_file_systems#Limits
140
     * @link http://serverfault.com/a/9548/44086
141
     * @param string $filename Filename
142
     * @param int $maxLength Max length, 255 by default
143
     * @return string
144
     */
145
    protected function enforceMaxLength(string $filename, int $maxLength = 255): string
146
    {
147
        $ext = pathinfo($filename, PATHINFO_EXTENSION);
148
        $length = $maxLength - ($ext ? strlen($ext) + 1 : 0);
0 ignored issues
show
Bug introduced by
It seems like $ext can also be of type array; however, parameter $string of strlen() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

148
        $length = $maxLength - ($ext ? strlen(/** @scrutinizer ignore-type */ $ext) + 1 : 0);
Loading history...
149
150
        $encoding = mb_detect_encoding($filename);
151
        if ($encoding === false) {
152
            $encoding = null;
153
        }
154
        $filename = mb_strcut(
155
            pathinfo($filename, PATHINFO_FILENAME),
0 ignored issues
show
Bug introduced by
It seems like pathinfo($filename, Phau...lity\PATHINFO_FILENAME) can also be of type array; however, parameter $string of mb_strcut() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

155
            /** @scrutinizer ignore-type */ pathinfo($filename, PATHINFO_FILENAME),
Loading history...
156
            0,
157
            $length,
158
            $encoding
159
        );
160
161
        return $filename . ($ext ? '.' . $ext : '');
0 ignored issues
show
Bug introduced by
Are you sure $ext of type array|string can be used in concatenation? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

161
        return $filename . ($ext ? '.' . /** @scrutinizer ignore-type */ $ext : '');
Loading history...
162
    }
163
164
    /**
165
     * Beautifies a filename to make it better to read
166
     *
167
     * "file   name.zip" becomes "file-name.zip"
168
     * "file___name.zip" becomes "file-name.zip"
169
     * "file---name.zip" becomes "file-name.zip"
170
     * "file--.--.-.--name.zip" becomes "file.name.zip"
171
     * "file...name..zip" becomes "file.name.zip"
172
     * ".file-name.-" becomes "file-name"
173
     *
174
     * @link https://stackoverflow.com/questions/2021624/string-sanitizer-for-filename
175
     * @param string $filename Filename
176
     * @return string
177
     */
178
    public function beautify(string $filename): string
179
    {
180
        // reduce consecutive characters
181
        $filename = (string)preg_replace([
182
            // "file   name.zip" becomes "file-name.zip"
183
            '/ +/',
184
            // "file___name.zip" becomes "file-name.zip"
185
            '/_+/',
186
            // "file---name.zip" becomes "file-name.zip"
187
            '/-+/'
188
        ], '-', $filename);
189
190
        $filename = (string)preg_replace([
191
            // "file--.--.-.--name.zip" becomes "file.name.zip"
192
            '/-*\.-*/',
193
            // "file...name..zip" becomes "file.name.zip"
194
            '/\.{2,}/'
195
        ], '.', $filename);
196
197
        // lowercase for windows/unix interoperability http://support.microsoft.com/kb/100625
198
        $encoding = mb_detect_encoding($filename);
199
        if ($encoding === false) {
200
            $encoding = null;
201
        }
202
        $filename = mb_strtolower($filename, $encoding);
203
204
        // ".file-name.-" becomes "file-name"
205
        $filename = trim($filename, '.-');
206
207
        return $filename;
208
    }
209
210
    /**
211
     * @param string $string String
212
     * @return string
213
     */
214
    protected function removeAllNonAlphaNumerical(string $string): string
215
    {
216
        $pathInfo = PathInfo::for($string);
217
        $string = (string)preg_replace('/[^a-zA-Z0-9]/', '', $pathInfo->filename());
218
219
        if (!$pathInfo->hasExtension()) {
220
            return $string;
221
        }
222
223
        return $string . '.' . $pathInfo->extension();
224
    }
225
226
    /**
227
     * @param string $string String
228
     * @param string $encoding Encoding
229
     * @return string
230
     */
231
    protected function stringToLowerCase(
232
        string $string,
233
        string $encoding = 'UTF-8'
234
    ): string {
235
        return ((function_exists('mb_strtolower')) ?
236
            mb_strtolower($string, $encoding) :
237
            strtolower($string)
238
        );
239
    }
240
}
241