|
1
|
|
|
'use strict'; |
|
2
|
|
|
const url = require('url'); |
|
3
|
|
|
const punycode = require('punycode'); |
|
4
|
|
|
const queryString = require('query-string'); |
|
5
|
|
|
const prependHttp = require('prepend-http'); |
|
6
|
|
|
const sortKeys = require('sort-keys'); |
|
7
|
|
|
|
|
8
|
|
|
const DEFAULT_PORTS = { |
|
9
|
|
|
'http:': 80, |
|
10
|
|
|
'https:': 443, |
|
11
|
|
|
'ftp:': 21 |
|
12
|
|
|
}; |
|
13
|
|
|
|
|
14
|
|
|
// Protocols that always contain a `//`` bit |
|
15
|
|
|
const slashedProtocol = { |
|
16
|
|
|
http: true, |
|
17
|
|
|
https: true, |
|
18
|
|
|
ftp: true, |
|
19
|
|
|
gopher: true, |
|
20
|
|
|
file: true, |
|
21
|
|
|
'http:': true, |
|
22
|
|
|
'https:': true, |
|
23
|
|
|
'ftp:': true, |
|
24
|
|
|
'gopher:': true, |
|
25
|
|
|
'file:': true |
|
26
|
|
|
}; |
|
27
|
|
|
|
|
28
|
|
|
function testParameter(name, filters) { |
|
29
|
|
|
return filters.some(filter => filter instanceof RegExp ? filter.test(name) : filter === name); |
|
30
|
|
|
} |
|
31
|
|
|
|
|
32
|
|
|
module.exports = (str, opts) => { |
|
33
|
|
|
opts = Object.assign({ |
|
34
|
|
|
normalizeProtocol: true, |
|
35
|
|
|
normalizeHttps: false, |
|
36
|
|
|
stripFragment: true, |
|
37
|
|
|
stripWWW: true, |
|
38
|
|
|
removeQueryParameters: [/^utm_\w+/i], |
|
39
|
|
|
removeTrailingSlash: true, |
|
40
|
|
|
removeDirectoryIndex: false, |
|
41
|
|
|
sortQueryParameters: true |
|
42
|
|
|
}, opts); |
|
43
|
|
|
|
|
44
|
|
|
if (typeof str !== 'string') { |
|
45
|
|
|
throw new TypeError('Expected a string'); |
|
46
|
|
|
} |
|
47
|
|
|
|
|
48
|
|
|
const hasRelativeProtocol = str.startsWith('//'); |
|
49
|
|
|
|
|
50
|
|
|
// Prepend protocol |
|
51
|
|
|
str = prependHttp(str.trim()).replace(/^\/\//, 'http://'); |
|
52
|
|
|
|
|
53
|
|
|
const urlObj = url.parse(str); |
|
54
|
|
|
|
|
55
|
|
|
if (opts.normalizeHttps && urlObj.protocol === 'https:') { |
|
56
|
|
|
urlObj.protocol = 'http:'; |
|
57
|
|
|
} |
|
58
|
|
|
|
|
59
|
|
|
if (!urlObj.hostname && !urlObj.pathname) { |
|
60
|
|
|
throw new Error('Invalid URL'); |
|
61
|
|
|
} |
|
62
|
|
|
|
|
63
|
|
|
// Prevent these from being used by `url.format` |
|
64
|
|
|
delete urlObj.host; |
|
65
|
|
|
delete urlObj.query; |
|
66
|
|
|
|
|
67
|
|
|
// Remove fragment |
|
68
|
|
|
if (opts.stripFragment) { |
|
69
|
|
|
delete urlObj.hash; |
|
70
|
|
|
} |
|
71
|
|
|
|
|
72
|
|
|
// Remove default port |
|
73
|
|
|
const port = DEFAULT_PORTS[urlObj.protocol]; |
|
74
|
|
|
if (Number(urlObj.port) === port) { |
|
75
|
|
|
delete urlObj.port; |
|
76
|
|
|
} |
|
77
|
|
|
|
|
78
|
|
|
// Remove duplicate slashes |
|
79
|
|
|
if (urlObj.pathname) { |
|
80
|
|
|
urlObj.pathname = urlObj.pathname.replace(/\/{2,}/g, '/'); |
|
81
|
|
|
} |
|
82
|
|
|
|
|
83
|
|
|
// Decode URI octets |
|
84
|
|
|
if (urlObj.pathname) { |
|
85
|
|
|
urlObj.pathname = decodeURI(urlObj.pathname); |
|
86
|
|
|
} |
|
87
|
|
|
|
|
88
|
|
|
// Remove directory index |
|
89
|
|
|
if (opts.removeDirectoryIndex === true) { |
|
90
|
|
|
opts.removeDirectoryIndex = [/^index\.[a-z]+$/]; |
|
91
|
|
|
} |
|
92
|
|
|
|
|
93
|
|
|
if (Array.isArray(opts.removeDirectoryIndex) && opts.removeDirectoryIndex.length > 0) { |
|
94
|
|
|
let pathComponents = urlObj.pathname.split('/'); |
|
95
|
|
|
const lastComponent = pathComponents[pathComponents.length - 1]; |
|
96
|
|
|
|
|
97
|
|
|
if (testParameter(lastComponent, opts.removeDirectoryIndex)) { |
|
98
|
|
|
pathComponents = pathComponents.slice(0, pathComponents.length - 1); |
|
99
|
|
|
urlObj.pathname = pathComponents.slice(1).join('/') + '/'; |
|
100
|
|
|
} |
|
101
|
|
|
} |
|
102
|
|
|
|
|
103
|
|
|
// Resolve relative paths, but only for slashed protocols |
|
104
|
|
|
if (slashedProtocol[urlObj.protocol]) { |
|
105
|
|
|
const domain = urlObj.protocol + '//' + urlObj.hostname; |
|
106
|
|
|
const relative = url.resolve(domain, urlObj.pathname); |
|
107
|
|
|
urlObj.pathname = relative.replace(domain, ''); |
|
108
|
|
|
} |
|
109
|
|
|
|
|
110
|
|
|
if (urlObj.hostname) { |
|
111
|
|
|
// IDN to Unicode |
|
112
|
|
|
urlObj.hostname = punycode.toUnicode(urlObj.hostname).toLowerCase(); |
|
113
|
|
|
|
|
114
|
|
|
// Remove trailing dot |
|
115
|
|
|
urlObj.hostname = urlObj.hostname.replace(/\.$/, ''); |
|
116
|
|
|
|
|
117
|
|
|
// Remove `www.` |
|
118
|
|
|
if (opts.stripWWW) { |
|
119
|
|
|
urlObj.hostname = urlObj.hostname.replace(/^www\./, ''); |
|
120
|
|
|
} |
|
121
|
|
|
} |
|
122
|
|
|
|
|
123
|
|
|
// Remove URL with empty query string |
|
124
|
|
|
if (urlObj.search === '?') { |
|
125
|
|
|
delete urlObj.search; |
|
126
|
|
|
} |
|
127
|
|
|
|
|
128
|
|
|
const queryParameters = queryString.parse(urlObj.search); |
|
129
|
|
|
|
|
130
|
|
|
// Remove query unwanted parameters |
|
131
|
|
|
if (Array.isArray(opts.removeQueryParameters)) { |
|
132
|
|
|
for (const key in queryParameters) { |
|
133
|
|
|
if (testParameter(key, opts.removeQueryParameters)) { |
|
134
|
|
|
delete queryParameters[key]; |
|
135
|
|
|
} |
|
136
|
|
|
} |
|
137
|
|
|
} |
|
138
|
|
|
|
|
139
|
|
|
// Sort query parameters |
|
140
|
|
|
if (opts.sortQueryParameters) { |
|
141
|
|
|
urlObj.search = queryString.stringify(sortKeys(queryParameters)); |
|
142
|
|
|
} |
|
143
|
|
|
|
|
144
|
|
|
// Decode query parameters |
|
145
|
|
|
if (urlObj.search !== null) { |
|
146
|
|
|
urlObj.search = decodeURIComponent(urlObj.search); |
|
147
|
|
|
} |
|
148
|
|
|
|
|
149
|
|
|
// Take advantage of many of the Node `url` normalizations |
|
150
|
|
|
str = url.format(urlObj); |
|
151
|
|
|
|
|
152
|
|
|
// Remove ending `/` |
|
153
|
|
|
if (opts.removeTrailingSlash || urlObj.pathname === '/') { |
|
154
|
|
|
str = str.replace(/\/$/, ''); |
|
155
|
|
|
} |
|
156
|
|
|
|
|
157
|
|
|
// Restore relative protocol, if applicable |
|
158
|
|
|
if (hasRelativeProtocol && !opts.normalizeProtocol) { |
|
159
|
|
|
str = str.replace(/^http:\/\//, '//'); |
|
160
|
|
|
} |
|
161
|
|
|
|
|
162
|
|
|
return str; |
|
163
|
|
|
}; |
|
164
|
|
|
|