Completed
Push — develop ( 5cb106...80f130 )
by Dmytro
13:36
created

manager/media/rss/rss_fetch.inc::_response_to_rss()   B

Complexity

Conditions 8
Paths 11

Size

Total Lines 39

Duplication

Lines 7
Ratio 17.95 %

Importance

Changes 0
Metric Value
cc 8
nc 11
nop 1
dl 7
loc 39
rs 8.0515
c 0
b 0
f 0
1
<?php
2
/*
3
 * Project:     MagpieRSS: a simple RSS integration tool
4
 * File:        rss_fetch.inc, a simple functional interface
5
                to fetching and parsing RSS files, via the
6
                function fetch_rss()
7
 * Author:      Kellan Elliott-McCrea <[email protected]>
8
 * License:     GPL
9
 *
10
 * The lastest version of MagpieRSS can be obtained from:
11
 * http://magpierss.sourceforge.net
12
 *
13
 * For questions, help, comments, discussion, etc., please join the
14
 * Magpie mailing list:
15
 * [email protected]
16
 *
17
 */
18
19
// Setup MAGPIE_DIR for use on hosts that don't include
20
// the current path in include_path.
21
// with thanks to rajiv and smarty
22
if (!defined('DIR_SEP')) {
23
    define('DIR_SEP', DIRECTORY_SEPARATOR);
24
}
25
26
if (!defined('MAGPIE_DIR')) {
27
    define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP);
28
}
29
30
if (!defined('MAGPIE_CACHE_DIR')) {
31
    define('MAGPIE_CACHE_DIR', MODX_BASE_PATH . 'assets/cache/rss');
32
}
33
34
require_once( MAGPIE_DIR . 'rss_parse.inc' );
35
require_once( MAGPIE_DIR . 'rss_cache.inc' );
36
37
// for including 3rd party libraries
38
define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP);
39
require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc');
40
41
42
/*
43
 * CONSTANTS - redefine these in your script to change the
44
 * behaviour of fetch_rss() currently, most options effect the cache
45
 *
46
 * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects?
47
 * For me a built in cache was essential to creating a "PHP-like"
48
 * feel to Magpie, see rss_cache.inc for rationale
49
 *
50
 *
51
 * MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects?
52
 * This should be a location that the webserver can write to.   If this
53
 * directory does not already exist Mapie will try to be smart and create
54
 * it.  This will often fail for permissions reasons.
55
 *
56
 *
57
 * MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds.
58
 *
59
 *
60
 * MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error
61
 * instead of returning stale object?
62
 *
63
 * MAGPIE_DEBUG - Display debugging notices?
64
 *
65
*/
66
67
68
/*=======================================================================*\
69
    Function: fetch_rss:
70
    Purpose:  return RSS object for the give url
71
              maintain the cache
72
    Input:    url of RSS file
73
    Output:   parsed RSS object (see rss_parse.inc)
74
75
    NOTES ON CACHEING:
76
    If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache.
77
78
    NOTES ON RETRIEVING REMOTE FILES:
79
    If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
80
    return a cached object, and touch the cache object upon recieving a
81
    304.
82
83
    NOTES ON FAILED REQUESTS:
84
    If there is an HTTP error while fetching an RSS object, the cached
85
    version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off)
86
\*=======================================================================*/
87
88
define('MAGPIE_VERSION', '0.72');
89
90
$MAGPIE_ERROR = "";
91
92
function fetch_rss ($url) {
0 ignored issues
show
Documentation introduced by
The return type could not be reliably inferred; please add a @return annotation.

Our type inference engine in quite powerful, but sometimes the code does not provide enough clues to go by. In these cases we request you to add a @return annotation as described here.

Loading history...
93
    // initialize constants
94
    init();
95
96
    if ( !isset($url) ) {
97
        error("fetch_rss called without a url");
98
        return false;
99
    }
100
101
    // if cache is disabled
102
    if ( !MAGPIE_CACHE_ON ) {
103
        // fetch file, and parse it
104
        $resp = _fetch_remote_file( $url );
105
        if ( is_success( $resp->status ) ) {
106
            return _response_to_rss( $resp );
107
        }
108
        else {
109
            error("Failed to fetch $url and cache is off");
110
            return false;
111
        }
112
    }
113
    // else cache is ON
114
    else {
115
        // Flow
116
        // 1. check cache
117
        // 2. if there is a hit, make sure its fresh
118
        // 3. if cached obj fails freshness check, fetch remote
119
        // 4. if remote fails, return stale object, or error
120
121
        $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE );
122
123
        if (MAGPIE_DEBUG and $cache->ERROR) {
124
            debug($cache->ERROR, E_USER_WARNING);
125
        }
126
127
128
        $cache_status    = 0;       // response of check_cache
129
        $request_headers = array(); // HTTP headers to send with fetch
130
        $rss             = 0;       // parsed RSS object
131
        $errormsg        = 0;       // errors, if any
132
133
        // store parsed XML by desired output encoding
134
        // as character munging happens at parse time
135
        $cache_key       = $url . MAGPIE_OUTPUT_ENCODING;
136
137
        if (!$cache->ERROR) {
138
            // return cache HIT, MISS, or STALE
139
            $cache_status = $cache->check_cache( $cache_key);
140
        }
141
142
        // if object cached, and cache is fresh, return cached obj
143
        if ( $cache_status == 'HIT' ) {
144
            $rss = $cache->get( $cache_key );
145
            if ( isset($rss) and $rss ) {
146
                // should be cache age
147
                $rss->from_cache = 1;
148
                if ( MAGPIE_DEBUG > 1) {
149
                    debug("MagpieRSS: Cache HIT", E_USER_NOTICE);
150
                }
151
                return $rss;
152
            }
153
        }
154
155
        // else attempt a conditional get
156
157
        // setup headers
158
        if ( $cache_status == 'STALE' ) {
159
            $rss = $cache->get( $cache_key );
160
            if ( $rss and $rss->etag and $rss->last_modified ) {
161
                $request_headers['If-None-Match'] = $rss->etag;
162
                $request_headers['If-Last-Modified'] = $rss->last_modified;
163
            }
164
        }
165
166
        $resp = _fetch_remote_file( $url, $request_headers );
0 ignored issues
show
Documentation introduced by
$request_headers is of type array, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
167
168
        if (isset($resp) and $resp) {
169
          if ($resp->status == '304' ) {
170
                // we have the most current copy
171
                if ( MAGPIE_DEBUG > 1) {
172
                    debug("Got 304 for $url");
173
                }
174
                // reset cache on 304 (at minutillo insistent prodding)
175
                $cache->set($cache_key, $rss);
176
                return $rss;
177
            }
178
            elseif ( is_success( $resp->status ) ) {
179
                $rss = _response_to_rss( $resp );
180
                if ( $rss ) {
181
                    if (MAGPIE_DEBUG > 1) {
182
                        debug("Fetch successful");
183
                    }
184
                    // add object to cache
185
                    $cache->set( $cache_key, $rss );
186
                    return $rss;
187
                }
188
            }
189
            else {
190
                $errormsg = "Failed to fetch $url ";
191
                if ( $resp->status == '-100' ) {
192
                    $errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)";
193
                }
194
                elseif ( $resp->error ) {
195
                    # compensate for Snoopy's annoying habbit to tacking
196
                    # on '\n'
197
                    $http_error = substr($resp->error, 0, -2);
198
                    $errormsg .= "(HTTP Error: $http_error)";
199
                }
200
                else {
201
                    $errormsg .=  "(HTTP Response: " . $resp->response_code .')';
202
                }
203
            }
204
        }
205
        else {
206
            $errormsg = "Unable to retrieve RSS file for unknown reasons.";
207
        }
208
209
        // else fetch failed
210
211
        // attempt to return cached object
212
        if ($rss) {
213
            if ( MAGPIE_DEBUG ) {
214
                debug("Returning STALE object for $url");
215
            }
216
            return $rss;
217
        }
218
219
        // else we totally failed
220
        error( $errormsg );
221
222
        return false;
223
224
    } // end if ( !MAGPIE_CACHE_ON ) {
225
} // end fetch_rss()
226
227
/*=======================================================================*\
228
    Function:   error
229
    Purpose:    set MAGPIE_ERROR, and trigger error
230
\*=======================================================================*/
231
232
function error ($errormsg, $lvl=E_USER_WARNING) {
233
        global $MAGPIE_ERROR;
234
235
        // append PHP's error message if track_errors enabled
236
        if ( isset($php_errormsg) ) {
237
            $errormsg .= " ($php_errormsg)";
238
        }
239
        if ( $errormsg ) {
240
            $errormsg = "MagpieRSS: $errormsg";
241
            $MAGPIE_ERROR = $errormsg;
242
            trigger_error( $errormsg, $lvl);
243
        }
244
}
245
246
function debug ($debugmsg, $lvl=E_USER_NOTICE) {
247
    trigger_error("MagpieRSS [debug] $debugmsg", $lvl);
248
}
249
250
/*=======================================================================*\
251
    Function:   magpie_error
252
    Purpose:    accessor for the magpie error variable
253
\*=======================================================================*/
254
function magpie_error ($errormsg="") {
255
    global $MAGPIE_ERROR;
256
257
    if ( isset($errormsg) and $errormsg ) {
258
        $MAGPIE_ERROR = $errormsg;
259
    }
260
261
    return $MAGPIE_ERROR;
262
}
263
264
/*=======================================================================*\
265
    Function:   _fetch_remote_file
266
    Purpose:    retrieve an arbitrary remote file
267
    Input:      url of the remote file
268
                headers to send along with the request (optional)
269
    Output:     an HTTP response object (see Snoopy.class.inc)
270
\*=======================================================================*/
271
function _fetch_remote_file ($url, $headers = "" ) {
272
    // Snoopy is an HTTP client in PHP
273
    $client = new Snoopy();
274
    $client->agent = MAGPIE_USER_AGENT;
275
    $client->read_timeout = MAGPIE_FETCH_TIME_OUT;
276
    $client->use_gzip = MAGPIE_USE_GZIP;
277
    if (is_array($headers) ) {
278
        $client->rawheaders = $headers;
279
    }
280
281
    @$client->fetch($url);
282
    return $client;
283
284
}
285
286
/*=======================================================================*\
287
    Function:   _response_to_rss
288
    Purpose:    parse an HTTP response object into an RSS object
289
    Input:      an HTTP response object (see Snoopy)
290
    Output:     parsed RSS object (see rss_parse)
291
\*=======================================================================*/
292
function _response_to_rss ($resp) {
293
    $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING );
294
295
    // if RSS parsed successfully
296
    if ( $rss and !$rss->ERROR) {
297
298
        // find Etag, and Last-Modified
299
        foreach($resp->headers as $h) {
300
            // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1"
301 View Code Duplication
            if (strpos($h, ": ")) {
302
                list($field, $val) = explode(": ", $h, 2);
303
            }
304
            else {
305
                $field = $h;
306
                $val = "";
307
            }
308
309
            if ( $field == 'ETag' ) {
310
                $rss->etag = $val;
0 ignored issues
show
Bug introduced by
The property etag does not seem to exist in MagpieRSS.

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
311
            }
312
313
            if ( $field == 'Last-Modified' ) {
314
                $rss->last_modified = $val;
0 ignored issues
show
Bug introduced by
The property last_modified does not seem to exist in MagpieRSS.

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
315
            }
316
        }
317
318
        return $rss;
319
    } // else construct error message
320
    else {
321
        $errormsg = "Failed to parse RSS file.";
322
323
        if ($rss) {
324
            $errormsg .= " (" . $rss->ERROR . ")";
325
        }
326
        error($errormsg);
327
328
        return false;
329
    } // end if ($rss and !$rss->error)
330
}
331
332
/*=======================================================================*\
333
    Function:   init
334
    Purpose:    setup constants with default values
335
                check for user overrides
336
\*=======================================================================*/
337
function init () {
338
    if ( defined('MAGPIE_INITALIZED') ) {
339
        return;
340
    }
341
    else {
342
        define('MAGPIE_INITALIZED', true);
343
    }
344
345
    if ( !defined('MAGPIE_CACHE_ON') ) {
346
        define('MAGPIE_CACHE_ON', true);
347
    }
348
349
    if ( !defined('MAGPIE_CACHE_DIR') ) {
350
        define('MAGPIE_CACHE_DIR', './cache');
351
    }
352
353
    if ( !defined('MAGPIE_CACHE_AGE') ) {
354
        define('MAGPIE_CACHE_AGE', 60*60); // one hour
355
    }
356
357
    if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) {
358
        define('MAGPIE_CACHE_FRESH_ONLY', false);
359
    }
360
361
    if ( !defined('MAGPIE_OUTPUT_ENCODING') ) {
362
        global $modx_manager_charset;
363
        if(empty($modx_manager_charset)) $modx_manager_charset = 'ISO-8859-1';
364
        define('MAGPIE_OUTPUT_ENCODING', $modx_manager_charset);
365
    }
366
367
    if ( !defined('MAGPIE_INPUT_ENCODING') ) {
368
        define('MAGPIE_INPUT_ENCODING', null);
369
    }
370
371
    if ( !defined('MAGPIE_DETECT_ENCODING') ) {
372
        define('MAGPIE_DETECT_ENCODING', true);
373
    }
374
375
    if ( !defined('MAGPIE_DEBUG') ) {
376
        define('MAGPIE_DEBUG', 0);
377
    }
378
379
    if ( !defined('MAGPIE_USER_AGENT') ) {
380
        $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net';
381
382
        if ( MAGPIE_CACHE_ON ) {
383
            $ua = $ua . ')';
384
        }
385
        else {
386
            $ua = $ua . '; No cache)';
387
        }
388
389
        define('MAGPIE_USER_AGENT', $ua);
390
    }
391
392
    if ( !defined('MAGPIE_FETCH_TIME_OUT') ) {
393
        define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout
394
    }
395
396
    // use gzip encoding to fetch rss files if supported?
397
    if ( !defined('MAGPIE_USE_GZIP') ) {
398
        define('MAGPIE_USE_GZIP', true);
399
    }
400
}
401
402
// NOTE: the following code should really be in Snoopy, or at least
403
// somewhere other then rss_fetch!
404
405
/*=======================================================================*\
406
    HTTP STATUS CODE PREDICATES
407
    These functions attempt to classify an HTTP status code
408
    based on RFC 2616 and RFC 2518.
409
410
    All of them take an HTTP status code as input, and return true or false
411
412
    All this code is adapted from LWP's HTTP::Status.
413
\*=======================================================================*/
414
415
416
/*=======================================================================*\
417
    Function:   is_info
418
    Purpose:    return true if Informational status code
419
\*=======================================================================*/
420
function is_info ($sc) {
421
    return $sc >= 100 && $sc < 200;
422
}
423
424
/*=======================================================================*\
425
    Function:   is_success
426
    Purpose:    return true if Successful status code
427
\*=======================================================================*/
428
function is_success ($sc) {
429
    return $sc >= 200 && $sc < 300;
430
}
431
432
/*=======================================================================*\
433
    Function:   is_redirect
434
    Purpose:    return true if Redirection status code
435
\*=======================================================================*/
436
function is_redirect ($sc) {
437
    return $sc >= 300 && $sc < 400;
438
}
439
440
/*=======================================================================*\
441
    Function:   is_error
442
    Purpose:    return true if Error status code
443
\*=======================================================================*/
444
function is_error ($sc) {
445
    return $sc >= 400 && $sc < 600;
446
}
447
448
/*=======================================================================*\
449
    Function:   is_client_error
450
    Purpose:    return true if Error status code, and its a client error
451
\*=======================================================================*/
452
function is_client_error ($sc) {
453
    return $sc >= 400 && $sc < 500;
454
}
455
456
/*=======================================================================*\
457
    Function:   is_client_error
458
    Purpose:    return true if Error status code, and its a server error
459
\*=======================================================================*/
460
function is_server_error ($sc) {
461
    return $sc >= 500 && $sc < 600;
462
}
463