1
|
|
|
'use strict'; |
2
|
|
|
|
3
|
|
|
var http = require('http'); |
4
|
|
|
var https = require('https'); |
5
|
|
|
var urllib = require('url'); |
6
|
|
|
var utillib = require('util'); |
7
|
|
|
var zlib = require('zlib'); |
8
|
|
|
var dns = require('dns'); |
9
|
|
|
var Stream = require('stream').Readable; |
10
|
|
|
var CookieJar = require('./cookiejar').CookieJar; |
11
|
|
|
var encodinglib = require('encoding'); |
12
|
|
|
var net = require('net'); |
13
|
|
|
|
14
|
|
|
var USE_ALLOC = typeof Buffer.alloc === 'function'; |
|
|
|
|
15
|
|
|
|
16
|
|
|
exports.FetchStream = FetchStream; |
17
|
|
|
exports.CookieJar = CookieJar; |
18
|
|
|
exports.fetchUrl = fetchUrl; |
19
|
|
|
|
20
|
|
|
function FetchStream(url, options) { |
21
|
|
|
Stream.call(this); |
22
|
|
|
|
23
|
|
|
options = options || {}; |
24
|
|
|
|
25
|
|
|
this.url = url; |
26
|
|
|
if (!this.url) { |
27
|
|
|
return this.emit('error', new Error('url not defined')); |
28
|
|
|
} |
29
|
|
|
|
30
|
|
|
this.userAgent = options.userAgent || 'FetchStream'; |
31
|
|
|
|
32
|
|
|
this._redirect_count = 0; |
33
|
|
|
|
34
|
|
|
this.options = options || {}; |
35
|
|
|
this.normalizeOptions(); |
36
|
|
|
|
37
|
|
|
// prevent errors before 'error' handler is set by defferring actions |
38
|
|
|
if (typeof setImmediate !== 'undefined') { |
|
|
|
|
39
|
|
|
setImmediate(this.runStream.bind(this, url)); |
40
|
|
|
} else { |
41
|
|
|
process.nextTick(this.runStream.bind(this, url)); |
42
|
|
|
} |
43
|
|
|
this.responseBuffer = USE_ALLOC ? Buffer.alloc(0, '', 'binary') : new Buffer(0, 'binary'); |
|
|
|
|
44
|
|
|
this.ended = false; |
45
|
|
|
this.readyToRead = 0; |
|
|
|
|
46
|
|
|
} |
47
|
|
|
utillib.inherits(FetchStream, Stream); |
48
|
|
|
|
49
|
|
|
FetchStream.prototype._read = function (size) { |
50
|
|
|
if (this.ended && this.responseBuffer.length === 0) { |
51
|
|
|
this.push(null); |
52
|
|
|
return; |
53
|
|
|
} |
54
|
|
|
this.readyToRead += size; |
55
|
|
|
this.drainBuffer(); |
56
|
|
|
}; |
57
|
|
|
|
58
|
|
|
FetchStream.prototype.drainBuffer = function () { |
59
|
|
|
if (this.readyToRead === 0) { |
60
|
|
|
return; |
61
|
|
|
} |
62
|
|
|
if (this.responseBuffer.length === 0) { |
63
|
|
|
return; |
64
|
|
|
} |
65
|
|
|
var push; |
66
|
|
|
var rest; |
67
|
|
|
var restSize; |
68
|
|
|
|
69
|
|
|
if (this.responseBuffer.length > this.readyToRead) { |
70
|
|
|
push = USE_ALLOC ? Buffer.alloc(this.readyToRead, '', 'binary') : new Buffer(this.readyToRead, 'binary'); |
|
|
|
|
71
|
|
|
this.responseBuffer.copy(push, 0, 0, this.readyToRead); |
72
|
|
|
restSize = this.responseBuffer.length - this.readyToRead; |
73
|
|
|
rest = USE_ALLOC ? Buffer.alloc(restSize, '', 'binary') : new Buffer(restSize, 'binary'); |
74
|
|
|
this.responseBuffer.copy(rest, 0, this.readyToRead); |
75
|
|
|
} else { |
76
|
|
|
push = this.responseBuffer; |
77
|
|
|
rest = USE_ALLOC ? Buffer.alloc(0, '', 'binary') : new Buffer(0, 'binary'); |
78
|
|
|
} |
79
|
|
|
this.responseBuffer = rest; |
80
|
|
|
this.readyToRead = 0; |
81
|
|
|
if (this.options.encoding) { |
82
|
|
|
this.push(push, this.options.encoding); |
83
|
|
|
} else { |
84
|
|
|
this.push(push); |
85
|
|
|
} |
86
|
|
|
}; |
87
|
|
|
|
88
|
|
|
FetchStream.prototype.destroy = function (ex) { |
89
|
|
|
this.emit('destroy', ex); |
90
|
|
|
}; |
91
|
|
|
|
92
|
|
|
FetchStream.prototype.normalizeOptions = function () { |
93
|
|
|
|
94
|
|
|
// cookiejar |
95
|
|
|
this.cookieJar = this.options.cookieJar || new CookieJar(); |
96
|
|
|
|
97
|
|
|
// default redirects - 10 |
98
|
|
|
// if disableRedirect is set, then 0 |
99
|
|
|
if (!this.options.disableRedirect && typeof this.options.maxRedirects !== 'number' && |
100
|
|
|
!(this.options.maxRedirects instanceof Number)) { |
101
|
|
|
this.options.maxRedirects = 10; |
102
|
|
|
} else if (this.options.disableRedirects) { |
103
|
|
|
this.options.maxRedirects = 0; |
104
|
|
|
} |
105
|
|
|
|
106
|
|
|
// normalize header keys |
107
|
|
|
// HTTP and HTTPS takes in key names in case insensitive but to find |
108
|
|
|
// an exact value from an object key name needs to be case sensitive |
109
|
|
|
// so we're just lowercasing all input keys |
110
|
|
|
this.options.headers = this.options.headers || {}; |
111
|
|
|
|
112
|
|
|
var keys = Object.keys(this.options.headers); |
113
|
|
|
var newheaders = {}; |
114
|
|
|
var i; |
115
|
|
|
|
116
|
|
|
for (i = keys.length - 1; i >= 0; i--) { |
117
|
|
|
newheaders[keys[i].toLowerCase().trim()] = this.options.headers[keys[i]]; |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
this.options.headers = newheaders; |
121
|
|
|
|
122
|
|
|
if (!this.options.headers['user-agent']) { |
123
|
|
|
this.options.headers['user-agent'] = this.userAgent; |
124
|
|
|
} |
125
|
|
|
|
126
|
|
|
if (!this.options.headers.pragma) { |
127
|
|
|
this.options.headers.pragma = 'no-cache'; |
128
|
|
|
} |
129
|
|
|
|
130
|
|
|
if (!this.options.headers['cache-control']) { |
131
|
|
|
this.options.headers['cache-control'] = 'no-cache'; |
132
|
|
|
} |
133
|
|
|
|
134
|
|
|
if (!this.options.disableGzip) { |
135
|
|
|
this.options.headers['accept-encoding'] = 'gzip, deflate'; |
136
|
|
|
} else { |
137
|
|
|
delete this.options.headers['accept-encoding']; |
138
|
|
|
} |
139
|
|
|
|
140
|
|
|
// max length for the response, |
141
|
|
|
// if not set, default is Infinity |
142
|
|
|
if (!this.options.maxResponseLength) { |
143
|
|
|
this.options.maxResponseLength = Infinity; |
144
|
|
|
} |
145
|
|
|
|
146
|
|
|
// method: |
147
|
|
|
// defaults to GET, or when payload present to POST |
148
|
|
|
if (!this.options.method) { |
149
|
|
|
this.options.method = this.options.payload || this.options.payloadSize ? 'POST' : 'GET'; |
150
|
|
|
} |
151
|
|
|
|
152
|
|
|
// set cookies |
153
|
|
|
// takes full cookie definition strings as params |
154
|
|
|
if (this.options.cookies) { |
155
|
|
|
for (i = 0; i < this.options.cookies.length; i++) { |
156
|
|
|
this.cookieJar.setCookie(this.options.cookies[i], this.url); |
157
|
|
|
} |
158
|
|
|
} |
159
|
|
|
|
160
|
|
|
// rejectUnauthorized |
161
|
|
|
if (typeof this.options.rejectUnauthorized === 'undefined') { |
162
|
|
|
this.options.rejectUnauthorized = true; |
163
|
|
|
} |
164
|
|
|
}; |
165
|
|
|
|
166
|
|
|
FetchStream.prototype.parseUrl = function (url) { |
167
|
|
|
var urlparts = urllib.parse(url, false, true), |
168
|
|
|
transport, |
169
|
|
|
urloptions = { |
170
|
|
|
host: urlparts.hostname || urlparts.host, |
171
|
|
|
port: urlparts.port, |
172
|
|
|
path: urlparts.pathname + (urlparts.search || '') || '/', |
173
|
|
|
method: this.options.method, |
174
|
|
|
rejectUnauthorized: this.options.rejectUnauthorized |
175
|
|
|
}; |
176
|
|
|
|
177
|
|
|
switch (urlparts.protocol) { |
178
|
|
|
case 'https:': |
179
|
|
|
transport = https; |
180
|
|
|
break; |
181
|
|
|
case 'http:': |
182
|
|
|
default: |
183
|
|
|
transport = http; |
184
|
|
|
break; |
185
|
|
|
} |
186
|
|
|
|
187
|
|
|
if (transport === https) { |
188
|
|
|
if('agentHttps' in this.options){ |
189
|
|
|
urloptions.agent = this.options.agentHttps; |
190
|
|
|
} |
191
|
|
|
if('agent' in this.options){ |
192
|
|
|
urloptions.agent = this.options.agent; |
193
|
|
|
} |
194
|
|
|
} else { |
195
|
|
|
if('agentHttp' in this.options){ |
196
|
|
|
urloptions.agent = this.options.agentHttp; |
197
|
|
|
} |
198
|
|
|
if('agent' in this.options){ |
199
|
|
|
urloptions.agent = this.options.agent; |
200
|
|
|
} |
201
|
|
|
} |
202
|
|
|
|
203
|
|
|
if (!urloptions.port) { |
204
|
|
|
switch (urlparts.protocol) { |
205
|
|
|
case 'https:': |
206
|
|
|
urloptions.port = 443; |
207
|
|
|
break; |
208
|
|
|
case 'http:': |
209
|
|
|
default: |
210
|
|
|
urloptions.port = 80; |
211
|
|
|
break; |
212
|
|
|
} |
213
|
|
|
} |
214
|
|
|
|
215
|
|
|
urloptions.headers = this.options.headers || {}; |
216
|
|
|
|
217
|
|
|
if (urlparts.auth) { |
218
|
|
|
var buf = USE_ALLOC ? Buffer.alloc(Buffer.byteLength(urlparts.auth), urlparts.auth) : new Buffer(urlparts.auth); |
|
|
|
|
219
|
|
|
urloptions.headers.Authorization = 'Basic ' + buf.toString('base64'); |
220
|
|
|
} |
221
|
|
|
|
222
|
|
|
return { |
223
|
|
|
urloptions: urloptions, |
224
|
|
|
transport: transport |
225
|
|
|
}; |
226
|
|
|
}; |
227
|
|
|
|
228
|
|
|
FetchStream.prototype.setEncoding = function (encoding) { |
229
|
|
|
this.options.encoding = encoding; |
230
|
|
|
}; |
231
|
|
|
|
232
|
|
|
FetchStream.prototype.runStream = function (url) { |
233
|
|
|
var url_data = this.parseUrl(url), |
234
|
|
|
cookies = this.cookieJar.getCookies(url); |
235
|
|
|
|
236
|
|
|
if (cookies) { |
237
|
|
|
url_data.urloptions.headers.cookie = cookies; |
238
|
|
|
} else { |
239
|
|
|
delete url_data.urloptions.headers.cookie; |
240
|
|
|
} |
241
|
|
|
|
242
|
|
|
if (this.options.payload) { |
243
|
|
|
url_data.urloptions.headers['content-length'] = Buffer.byteLength(this.options.payload || '', 'utf-8'); |
|
|
|
|
244
|
|
|
} |
245
|
|
|
|
246
|
|
|
if (this.options.payloadSize) { |
247
|
|
|
url_data.urloptions.headers['content-length'] = this.options.payloadSize; |
248
|
|
|
} |
249
|
|
|
|
250
|
|
|
if (this.options.asyncDnsLoookup) { |
251
|
|
|
var dnsCallback = (function (err, addresses) { |
252
|
|
|
if (err) { |
253
|
|
|
this.emit('error', err); |
254
|
|
|
return; |
255
|
|
|
} |
256
|
|
|
|
257
|
|
|
url_data.urloptions.headers.host = url_data.urloptions.hostname || url_data.urloptions.host; |
258
|
|
|
url_data.urloptions.hostname = addresses[0]; |
259
|
|
|
url_data.urloptions.host = url_data.urloptions.headers.host + (url_data.urloptions.port ? ':' + url_data.urloptions.port : ''); |
260
|
|
|
|
261
|
|
|
this._runStream(url_data, url); |
262
|
|
|
}).bind(this); |
263
|
|
|
|
264
|
|
|
if (net.isIP(url_data.urloptions.host)) { |
265
|
|
|
dnsCallback(null, [url_data.urloptions.host]); |
266
|
|
|
} else { |
267
|
|
|
dns.resolve4(url_data.urloptions.host, dnsCallback); |
268
|
|
|
} |
269
|
|
|
} else { |
270
|
|
|
this._runStream(url_data, url); |
271
|
|
|
} |
272
|
|
|
}; |
273
|
|
|
|
274
|
|
|
FetchStream.prototype._runStream = function (url_data, url) { |
275
|
|
|
|
276
|
|
|
var req = url_data.transport.request(url_data.urloptions, (function (res) { |
277
|
|
|
|
278
|
|
|
// catch new cookies before potential redirect |
279
|
|
|
if (Array.isArray(res.headers['set-cookie'])) { |
280
|
|
|
for (var i = 0; i < res.headers['set-cookie'].length; i++) { |
281
|
|
|
this.cookieJar.setCookie(res.headers['set-cookie'][i], url); |
282
|
|
|
} |
283
|
|
|
} |
284
|
|
|
|
285
|
|
|
if ([301, 302, 303, 307, 308].indexOf(res.statusCode) >= 0) { |
286
|
|
|
if (!this.options.disableRedirects && this.options.maxRedirects > this._redirect_count && res.headers.location) { |
287
|
|
|
this._redirect_count++; |
288
|
|
|
req.destroy(); |
289
|
|
|
this.runStream(urllib.resolve(url, res.headers.location)); |
290
|
|
|
return; |
|
|
|
|
291
|
|
|
} |
292
|
|
|
} |
293
|
|
|
|
294
|
|
|
this.meta = { |
295
|
|
|
status: res.statusCode, |
296
|
|
|
responseHeaders: res.headers, |
297
|
|
|
finalUrl: url, |
298
|
|
|
redirectCount: this._redirect_count, |
299
|
|
|
cookieJar: this.cookieJar |
300
|
|
|
}; |
301
|
|
|
|
302
|
|
|
var curlen = 0, |
303
|
|
|
maxlen, |
304
|
|
|
|
305
|
|
|
receive = (function (chunk) { |
306
|
|
|
if (curlen + chunk.length > this.options.maxResponseLength) { |
307
|
|
|
maxlen = this.options.maxResponseLength - curlen; |
308
|
|
|
} else { |
309
|
|
|
maxlen = chunk.length; |
310
|
|
|
} |
311
|
|
|
|
312
|
|
|
if (maxlen <= 0) { |
313
|
|
|
return; |
314
|
|
|
} |
315
|
|
|
|
316
|
|
|
curlen += Math.min(maxlen, chunk.length); |
317
|
|
|
if (maxlen >= chunk.length) { |
318
|
|
|
if (this.responseBuffer.length === 0) { |
319
|
|
|
this.responseBuffer = chunk; |
320
|
|
|
} else { |
321
|
|
|
this.responseBuffer = Buffer.concat([this.responseBuffer, chunk]); |
|
|
|
|
322
|
|
|
} |
323
|
|
|
} else { |
324
|
|
|
this.responseBuffer = Buffer.concat([this.responseBuffer, chunk], this.responseBuffer.length + maxlen); |
325
|
|
|
} |
326
|
|
|
this.drainBuffer(); |
327
|
|
|
}).bind(this), |
328
|
|
|
|
329
|
|
|
error = (function (e) { |
330
|
|
|
this.ended = true; |
331
|
|
|
this.emit('error', e); |
332
|
|
|
this.drainBuffer(); |
333
|
|
|
}).bind(this), |
334
|
|
|
|
335
|
|
|
end = (function () { |
336
|
|
|
this.ended = true; |
337
|
|
|
if (this.responseBuffer.length === 0) { |
338
|
|
|
this.push(null); |
339
|
|
|
} |
340
|
|
|
}).bind(this), |
341
|
|
|
|
342
|
|
|
unpack = (function (type, res) { |
343
|
|
|
var z = zlib['create' + type](); |
344
|
|
|
z.on('data', receive); |
345
|
|
|
z.on('error', error); |
346
|
|
|
z.on('end', end); |
347
|
|
|
res.pipe(z); |
348
|
|
|
}).bind(this); |
|
|
|
|
349
|
|
|
|
350
|
|
|
this.emit('meta', this.meta); |
351
|
|
|
|
352
|
|
|
if (res.headers['content-encoding']) { |
353
|
|
|
switch (res.headers['content-encoding'].toLowerCase().trim()) { |
354
|
|
|
case 'gzip': |
355
|
|
|
return unpack('Gunzip', res); |
356
|
|
|
case 'deflate': |
357
|
|
|
return unpack('InflateRaw', res); |
358
|
|
|
} |
359
|
|
|
} |
360
|
|
|
|
361
|
|
|
res.on('data', receive); |
362
|
|
|
res.on('end', end); |
|
|
|
|
363
|
|
|
|
364
|
|
|
}).bind(this)); |
365
|
|
|
|
366
|
|
|
req.on('error', (function (e) { |
367
|
|
|
this.emit('error', e); |
368
|
|
|
}).bind(this)); |
369
|
|
|
|
370
|
|
|
if (this.options.timeout) { |
371
|
|
|
req.setTimeout(this.options.timeout, req.abort.bind(req)); |
372
|
|
|
} |
373
|
|
|
this.on('destroy', req.abort.bind(req)); |
374
|
|
|
|
375
|
|
|
if (this.options.payload) { |
376
|
|
|
req.end(this.options.payload); |
377
|
|
|
} else if (this.options.payloadStream) { |
378
|
|
|
this.options.payloadStream.pipe(req); |
379
|
|
|
this.options.payloadStream.resume(); |
380
|
|
|
} else { |
381
|
|
|
req.end(); |
382
|
|
|
} |
383
|
|
|
}; |
384
|
|
|
|
385
|
|
|
function fetchUrl(url, options, callback) { |
386
|
|
|
if (!callback && typeof options === 'function') { |
387
|
|
|
callback = options; |
388
|
|
|
options = undefined; |
389
|
|
|
} |
390
|
|
|
options = options || {}; |
391
|
|
|
|
392
|
|
|
var fetchstream = new FetchStream(url, options), |
393
|
|
|
response_data, chunks = [], |
394
|
|
|
length = 0, |
395
|
|
|
curpos = 0, |
396
|
|
|
buffer, |
397
|
|
|
content_type, |
398
|
|
|
callbackFired = false; |
399
|
|
|
|
400
|
|
|
fetchstream.on('meta', function (meta) { |
401
|
|
|
response_data = meta; |
402
|
|
|
content_type = _parseContentType(meta.responseHeaders['content-type']); |
403
|
|
|
}); |
404
|
|
|
|
405
|
|
|
fetchstream.on('data', function (chunk) { |
406
|
|
|
if (chunk) { |
407
|
|
|
chunks.push(chunk); |
408
|
|
|
length += chunk.length; |
409
|
|
|
} |
410
|
|
|
}); |
411
|
|
|
|
412
|
|
|
fetchstream.on('error', function (error) { |
413
|
|
|
if (error && error.code === 'HPE_INVALID_CONSTANT') { |
414
|
|
|
// skip invalid formatting errors |
415
|
|
|
return; |
416
|
|
|
} |
417
|
|
|
if (callbackFired) { |
418
|
|
|
return; |
419
|
|
|
} |
420
|
|
|
callbackFired = true; |
421
|
|
|
callback(error); |
422
|
|
|
}); |
423
|
|
|
|
424
|
|
|
fetchstream.on('end', function () { |
425
|
|
|
if (callbackFired) { |
426
|
|
|
return; |
|
|
|
|
427
|
|
|
} |
428
|
|
|
callbackFired = true; |
429
|
|
|
|
430
|
|
|
buffer = USE_ALLOC ? Buffer.alloc(length) : new Buffer(length); |
|
|
|
|
431
|
|
|
for (var i = 0, len = chunks.length; i < len; i++) { |
432
|
|
|
chunks[i].copy(buffer, curpos); |
|
|
|
|
433
|
|
|
curpos += chunks[i].length; |
434
|
|
|
} |
435
|
|
|
|
436
|
|
|
if (content_type.mimeType === 'text/html') { |
437
|
|
|
content_type.charset = _findHTMLCharset(buffer) || content_type.charset; |
438
|
|
|
} |
439
|
|
|
|
440
|
|
|
content_type.charset = (options.overrideCharset || content_type.charset || 'utf-8').trim().toLowerCase(); |
441
|
|
|
|
442
|
|
|
|
443
|
|
|
if (!options.disableDecoding && !content_type.charset.match(/^utf-?8$/i)) { |
444
|
|
|
buffer = encodinglib.convert(buffer, 'UTF-8', content_type.charset); |
445
|
|
|
} |
446
|
|
|
|
447
|
|
|
if (options.outputEncoding) { |
448
|
|
|
return callback(null, response_data, buffer.toString(options.outputEncoding)); |
449
|
|
|
} else { |
|
|
|
|
450
|
|
|
return callback(null, response_data, buffer); |
451
|
|
|
} |
452
|
|
|
|
453
|
|
|
}); |
454
|
|
|
} |
455
|
|
|
|
456
|
|
|
function _parseContentType(str) { |
457
|
|
|
if (!str) { |
458
|
|
|
return {}; |
459
|
|
|
} |
460
|
|
|
var parts = str.split(';'), |
461
|
|
|
mimeType = parts.shift(), |
462
|
|
|
charset, chparts; |
463
|
|
|
|
464
|
|
|
for (var i = 0, len = parts.length; i < len; i++) { |
465
|
|
|
chparts = parts[i].split('='); |
466
|
|
|
if (chparts.length > 1) { |
467
|
|
|
if (chparts[0].trim().toLowerCase() === 'charset') { |
468
|
|
|
charset = chparts[1]; |
469
|
|
|
} |
470
|
|
|
} |
471
|
|
|
} |
472
|
|
|
|
473
|
|
|
return { |
474
|
|
|
mimeType: (mimeType || '').trim().toLowerCase(), |
475
|
|
|
charset: (charset || 'UTF-8').trim().toLowerCase() // defaults to UTF-8 |
476
|
|
|
}; |
477
|
|
|
} |
478
|
|
|
|
479
|
|
|
function _findHTMLCharset(htmlbuffer) { |
480
|
|
|
|
481
|
|
|
var body = htmlbuffer.toString('ascii'), |
482
|
|
|
input, meta, charset; |
483
|
|
|
|
484
|
|
|
if ((meta = body.match(/<meta\s+http-equiv=["']content-type["'][^>]*?>/i))) { |
485
|
|
|
input = meta[0]; |
486
|
|
|
} |
487
|
|
|
|
488
|
|
|
if (input) { |
489
|
|
|
charset = input.match(/charset\s?=\s?([a-zA-Z\-0-9]*);?/); |
490
|
|
|
if (charset) { |
491
|
|
|
charset = (charset[1] || '').trim().toLowerCase(); |
492
|
|
|
} |
493
|
|
|
} |
494
|
|
|
|
495
|
|
|
if (!charset && (meta = body.match(/<meta\s+charset=["'](.*?)["']/i))) { |
496
|
|
|
charset = (meta[1] || '').trim().toLowerCase(); |
497
|
|
|
} |
498
|
|
|
|
499
|
|
|
return charset; |
|
|
|
|
500
|
|
|
} |
501
|
|
|
|
This checks looks for references to variables that have not been declared. This is most likey a typographical error or a variable has been renamed.
To learn more about declaring variables in Javascript, see the MDN.