Completed
Pull Request — 1.10.x (#1154)
by
unknown
45:16
created

main/inc/lib/magpierss/rss_fetch.inc::init()   F

Complexity

Conditions 14
Paths 3073

Size

Total Lines 62
Code Lines 32

Duplication

Lines 0
Ratio 0 %
Metric Value
cc 14
eloc 32
nc 3073
nop 0
dl 0
loc 62
rs 2.9107

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
/**
3
 * Project:     MagpieRSS: a simple RSS integration tool
4
 * File:        rss_fetch.inc, a simple functional interface
5
                to fetching and parsing RSS files, via the
6
                function fetch_rss()
7
 * Author:      Kellan Elliott-McCrea <[email protected]>
8
 * License:     GPL
9
 *
10
 * The lastest version of MagpieRSS can be obtained from:
11
 * http://magpierss.sourceforge.net
12
 *
13
 * For questions, help, comments, discussion, etc., please join the
14
 * Magpie mailing list:
15
 * [email protected]
16
 * @package chamilo.include.rss
17
 */
18
/**
19
 * Code
20
 */ 
21
// Setup MAGPIE_DIR for use on hosts that don't include
22
// the current path in include_path.
23
// with thanks to rajiv and smarty
24
if (!defined('DIR_SEP')) {
25
    define('DIR_SEP', DIRECTORY_SEPARATOR);
26
}
27
28
if (!defined('MAGPIE_DIR')) {
29
    define('MAGPIE_DIR', dirname(__FILE__) . DIR_SEP);
30
}
31
32
require_once( MAGPIE_DIR . 'rss_parse.inc' );
33
require_once( MAGPIE_DIR . 'rss_cache.inc' );
34
35
// for including 3rd party libraries
36
define('MAGPIE_EXTLIB', MAGPIE_DIR . 'extlib' . DIR_SEP);
37
require_once( MAGPIE_EXTLIB . 'Snoopy.class.inc');
38
define('MAGPIE_CACHE_DIR', api_get_path(SYS_ARCHIVE_PATH));
39
40
/* 
41
 * CONSTANTS - redefine these in your script to change the
42
 * behaviour of fetch_rss() currently, most options effect the cache
43
 *
44
 * MAGPIE_CACHE_ON - Should Magpie cache parsed RSS objects? 
45
 * For me a built in cache was essential to creating a "PHP-like" 
46
 * feel to Magpie, see rss_cache.inc for rationale
47
 *
48
 *
49
 * MAGPIE_CACHE_DIR - Where should Magpie cache parsed RSS objects?
50
 * This should be a location that the webserver can write to.   If this 
51
 * directory does not already exist Mapie will try to be smart and create 
52
 * it.  This will often fail for permissions reasons.
53
 *
54
 *
55
 * MAGPIE_CACHE_AGE - How long to store cached RSS objects? In seconds.
56
 *
57
 *
58
 * MAGPIE_CACHE_FRESH_ONLY - If remote fetch fails, throw error
59
 * instead of returning stale object?
60
 *
61
 * MAGPIE_DEBUG - Display debugging notices?
62
 *
63
*/
64
65
66
/*=======================================================================*\
67
    Function: fetch_rss: 
68
    Purpose:  return RSS object for the give url
69
              maintain the cache
70
    Input:    url of RSS file
71
    Output:   parsed RSS object (see rss_parse.inc)
72
73
    NOTES ON CACHEING:  
74
    If caching is on (MAGPIE_CACHE_ON) fetch_rss will first check the cache.
75
    
76
    NOTES ON RETRIEVING REMOTE FILES:
77
    If conditional gets are on (MAGPIE_CONDITIONAL_GET_ON) fetch_rss will
78
    return a cached object, and touch the cache object upon recieving a
79
    304.
80
    
81
    NOTES ON FAILED REQUESTS:
82
    If there is an HTTP error while fetching an RSS object, the cached
83
    version will be return, if it exists (and if MAGPIE_CACHE_FRESH_ONLY is off)
84
\*=======================================================================*/
85
86
define('MAGPIE_VERSION', '0.72');
87
88
$MAGPIE_ERROR = "";
89
90
function fetch_rss ($url) {
91
    // initialize constants
92
    init();
93
    
94
    if ( !isset($url) ) {
95
        error("fetch_rss called without a url");
96
        return false;
97
    }
98
    
99
    // if cache is disabled
100
    if ( !MAGPIE_CACHE_ON ) {
101
        // fetch file, and parse it
102
        $resp = _fetch_remote_file( $url );
103
        if ( is_success( $resp->status ) ) {
104
            return _response_to_rss( $resp );
105
        }
106
        else {
107
            error("Failed to fetch $url and cache is off");
108
            return false;
109
        }
110
    } 
111
    // else cache is ON
112
    else {
113
        // Flow
114
        // 1. check cache
115
        // 2. if there is a hit, make sure its fresh
116
        // 3. if cached obj fails freshness check, fetch remote
117
        // 4. if remote fails, return stale object, or error
118
        
119
        $cache = new RSSCache( MAGPIE_CACHE_DIR, MAGPIE_CACHE_AGE );
120
        
121
        if (MAGPIE_DEBUG and $cache->ERROR) {
122
            debug($cache->ERROR, E_USER_WARNING);
123
        }
124
        
125
        
126
        $cache_status    = 0;       // response of check_cache
127
        $request_headers = array(); // HTTP headers to send with fetch
128
        $rss             = 0;       // parsed RSS object
129
        $errormsg        = 0;       // errors, if any
130
        
131
        // store parsed XML by desired output encoding
132
        // as character munging happens at parse time
133
        $cache_key       = $url . MAGPIE_OUTPUT_ENCODING;
134
        
135
        if (!$cache->ERROR) {
136
            // return cache HIT, MISS, or STALE
137
            $cache_status = $cache->check_cache( $cache_key);
138
        }
139
                
140
        // if object cached, and cache is fresh, return cached obj
141
        if ( $cache_status == 'HIT' ) {
142
            $rss = $cache->get( $cache_key );
143
            if ( isset($rss) and $rss ) {
144
                // should be cache age
145
                $rss->from_cache = 1;
146
                if ( MAGPIE_DEBUG > 1) {
147
                    debug("MagpieRSS: Cache HIT", E_USER_NOTICE);
148
                }
149
                return $rss;
150
            }
151
        }
152
        
153
        // else attempt a conditional get
154
        
155
        // setup headers
156
        if ( $cache_status == 'STALE' ) {
157
            $rss = $cache->get( $cache_key );
158
            if ( $rss and $rss->etag and $rss->last_modified ) {
159
                $request_headers['If-None-Match'] = $rss->etag;
160
                $request_headers['If-Last-Modified'] = $rss->last_modified;
161
            }
162
        }
163
        
164
        $resp = _fetch_remote_file( $url, $request_headers );
165
        
166
        if (isset($resp) and $resp) {
167
          if ($resp->status == '304' ) {
168
                // we have the most current copy
169
                if ( MAGPIE_DEBUG > 1) {
170
                    debug("Got 304 for $url");
171
                }
172
                // reset cache on 304 (at minutillo insistent prodding)
173
                $cache->set($cache_key, $rss);
174
                return $rss;
175
            }
176
            elseif ( is_success( $resp->status ) ) {
177
                $rss = _response_to_rss( $resp );
178
                if ( $rss ) {
179
                    if (MAGPIE_DEBUG > 1) {
180
                        debug("Fetch successful");
181
                    }
182
                    // add object to cache
183
                    $cache->set( $cache_key, $rss );
184
                    return $rss;
185
                }
186
            }
187
            else {
188
                $errormsg = "Failed to fetch $url ";
189
                if ( $resp->status == '-100' ) {
190
                    $errormsg .= "(Request timed out after " . MAGPIE_FETCH_TIME_OUT . " seconds)";
191
                }
192
                elseif ( $resp->error ) {
193
                    # compensate for Snoopy's annoying habbit to tacking
194
                    # on '\n'
195
                    $http_error = substr($resp->error, 0, -2); 
196
                    $errormsg .= "(HTTP Error: $http_error)";
197
                }
198
                else {
199
                    $errormsg .=  "(HTTP Response: " . $resp->response_code .')';
200
                }
201
            }
202
        }
203
        else {
204
            $errormsg = "Unable to retrieve RSS file for unknown reasons.";
205
        }
206
        
207
        // else fetch failed
208
        
209
        // attempt to return cached object
210
        if ($rss) {
211
            if ( MAGPIE_DEBUG ) {
212
                debug("Returning STALE object for $url");
213
            }
214
            return $rss;
215
        }
216
        
217
        // else we totally failed
218
        //hide the error
219
        //error( $errormsg ); 
220
        
221
        return false;
222
        
223
    } // end if ( !MAGPIE_CACHE_ON ) {
224
} // end fetch_rss()
225
226
/*=======================================================================*\
227
    Function:   error
228
    Purpose:    set MAGPIE_ERROR, and trigger error
229
\*=======================================================================*/
230
231
function error ($errormsg, $lvl=E_USER_WARNING) {
232
        global $MAGPIE_ERROR;
233
        
234
        // append PHP's error message if track_errors enabled
235
        if ( isset($php_errormsg) ) { 
236
            $errormsg .= " ($php_errormsg)";
237
        }
238
        if ( $errormsg ) {
239
            $errormsg = "MagpieRSS: $errormsg";
240
            $MAGPIE_ERROR = $errormsg;
241
            trigger_error( $errormsg, $lvl);                
242
        }
243
}
244
245
function debug ($debugmsg, $lvl=E_USER_NOTICE) {
246
    trigger_error("MagpieRSS [debug] $debugmsg", $lvl);
247
}
248
            
249
/*=======================================================================*\
250
    Function:   magpie_error
251
    Purpose:    accessor for the magpie error variable
252
\*=======================================================================*/
253
function magpie_error ($errormsg="") {
254
    global $MAGPIE_ERROR;
255
    
256
    if ( isset($errormsg) and $errormsg ) { 
257
        $MAGPIE_ERROR = $errormsg;
258
    }
259
    
260
    return $MAGPIE_ERROR;   
261
}
262
263
/*=======================================================================*\
264
    Function:   _fetch_remote_file
265
    Purpose:    retrieve an arbitrary remote file
266
    Input:      url of the remote file
267
                headers to send along with the request (optional)
268
    Output:     an HTTP response object (see Snoopy.class.inc)  
269
\*=======================================================================*/
270
function _fetch_remote_file ($url, $headers = "" ) {
271
    // Snoopy is an HTTP client in PHP
272
    $client = new Snoopy();
273
    $client->agent = MAGPIE_USER_AGENT;
274
    $client->read_timeout = MAGPIE_FETCH_TIME_OUT;
275
    $client->use_gzip = MAGPIE_USE_GZIP;
276
    if (is_array($headers) ) {
277
        $client->rawheaders = $headers;
278
    }
279
    
280
    @$client->fetch($url);
281
    return $client;
282
283
}
284
285
/*=======================================================================*\
286
    Function:   _response_to_rss
287
    Purpose:    parse an HTTP response object into an RSS object
288
    Input:      an HTTP response object (see Snoopy)
289
    Output:     parsed RSS object (see rss_parse)
290
\*=======================================================================*/
291
function _response_to_rss ($resp) {
292
    $rss = new MagpieRSS( $resp->results, MAGPIE_OUTPUT_ENCODING, MAGPIE_INPUT_ENCODING, MAGPIE_DETECT_ENCODING );
293
    
294
    // if RSS parsed successfully       
295
    if ( $rss and !$rss->ERROR) {
296
        
297
        // find Etag, and Last-Modified
298
        foreach($resp->headers as $h) {
299
            // 2003-03-02 - Nicola Asuni (www.tecnick.com) - fixed bug "Undefined offset: 1"
300
            if (strpos($h, ": ")) {
301
                list($field, $val) = explode(": ", $h, 2);
302
            }
303
            else {
304
                $field = $h;
305
                $val = "";
306
            }
307
            
308
            if ( $field == 'ETag' ) {
309
                $rss->etag = $val;
0 ignored issues
show
Bug introduced by
The property etag does not seem to exist in MagpieRSS.

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
310
            }
311
            
312
            if ( $field == 'Last-Modified' ) {
313
                $rss->last_modified = $val;
0 ignored issues
show
Bug introduced by
The property last_modified does not seem to exist in MagpieRSS.

An attempt at access to an undefined property has been detected. This may either be a typographical error or the property has been renamed but there are still references to its old name.

If you really want to allow access to undefined properties, you can define magic methods to allow access. See the php core documentation on Overloading.

Loading history...
314
            }
315
        }
316
        
317
        return $rss;    
318
    } // else construct error message
319
    else {
320
        $errormsg = "Failed to parse RSS file.";
321
        
322
        if ($rss) {
323
            $errormsg .= " (" . $rss->ERROR . ")";
324
        }
325
        error($errormsg,E_USER_NOTICE);
326
        
327
        return false;
328
    } // end if ($rss and !$rss->error)
329
}
330
331
/*=======================================================================*\
332
    Function:   init
333
    Purpose:    setup constants with default values
334
                check for user overrides
335
\*=======================================================================*/
336
function init () {
337
    if ( defined('MAGPIE_INITALIZED') ) {
338
        return;
339
    }
340
    else {
341
        define('MAGPIE_INITALIZED', true);
342
    }
343
    
344
    if ( !defined('MAGPIE_CACHE_ON') ) {
345
        define('MAGPIE_CACHE_ON', true);
346
    }
347
348
    if ( !defined('MAGPIE_CACHE_DIR') ) {
349
        define('MAGPIE_CACHE_DIR', './cache');
350
    }
351
352
    if ( !defined('MAGPIE_CACHE_AGE') ) {
353
        define('MAGPIE_CACHE_AGE', 60*60); // one hour
354
    }
355
356
    if ( !defined('MAGPIE_CACHE_FRESH_ONLY') ) {
357
        define('MAGPIE_CACHE_FRESH_ONLY', false);
358
    }
359
360
    if ( !defined('MAGPIE_OUTPUT_ENCODING') ) {
361
        define('MAGPIE_OUTPUT_ENCODING', 'UTF-8');
362
    }
363
    
364
    if ( !defined('MAGPIE_INPUT_ENCODING') ) {
365
        define('MAGPIE_INPUT_ENCODING', null);
366
    }
367
    
368
    if ( !defined('MAGPIE_DETECT_ENCODING') ) {
369
        define('MAGPIE_DETECT_ENCODING', true);
370
    }
371
    
372
    if ( !defined('MAGPIE_DEBUG') ) {
373
        define('MAGPIE_DEBUG', 0);
374
    }
375
    
376
    if ( !defined('MAGPIE_USER_AGENT') ) {
377
        $ua = 'MagpieRSS/'. MAGPIE_VERSION . ' (+http://magpierss.sf.net';
378
        
379
        if ( MAGPIE_CACHE_ON ) {
380
            $ua = $ua . ')';
381
        }
382
        else {
383
            $ua = $ua . '; No cache)';
384
        }
385
        
386
        define('MAGPIE_USER_AGENT', $ua);
387
    }
388
    
389
    if ( !defined('MAGPIE_FETCH_TIME_OUT') ) {
390
        define('MAGPIE_FETCH_TIME_OUT', 5); // 5 second timeout
391
    }
392
    
393
    // use gzip encoding to fetch rss files if supported?
394
    if ( !defined('MAGPIE_USE_GZIP') ) {
395
        define('MAGPIE_USE_GZIP', true);    
396
    }
397
}
398
399
// NOTE: the following code should really be in Snoopy, or at least
400
// somewhere other then rss_fetch!
401
402
/*=======================================================================*\
403
    HTTP STATUS CODE PREDICATES
404
    These functions attempt to classify an HTTP status code
405
    based on RFC 2616 and RFC 2518.
406
    
407
    All of them take an HTTP status code as input, and return true or false
408
409
    All this code is adapted from LWP's HTTP::Status.
410
\*=======================================================================*/
411
412
413
/*=======================================================================*\
414
    Function:   is_info
415
    Purpose:    return true if Informational status code
416
\*=======================================================================*/
417
function is_info ($sc) { 
418
    return $sc >= 100 && $sc < 200; 
419
}
420
421
/*=======================================================================*\
422
    Function:   is_success
423
    Purpose:    return true if Successful status code
424
\*=======================================================================*/
425
function is_success ($sc) { 
426
    return $sc >= 200 && $sc < 300; 
427
}
428
429
/*=======================================================================*\
430
    Function:   is_redirect
431
    Purpose:    return true if Redirection status code
432
\*=======================================================================*/
433
function is_redirect ($sc) { 
434
    return $sc >= 300 && $sc < 400; 
435
}
436
437
/*=======================================================================*\
438
    Function:   is_error
439
    Purpose:    return true if Error status code
440
\*=======================================================================*/
441
function is_error ($sc) { 
442
    return $sc >= 400 && $sc < 600; 
443
}
444
445
/*=======================================================================*\
446
    Function:   is_client_error
447
    Purpose:    return true if Error status code, and its a client error
448
\*=======================================================================*/
449
function is_client_error ($sc) { 
450
    return $sc >= 400 && $sc < 500; 
451
}
452
453
/*=======================================================================*\
454
    Function:   is_client_error
455
    Purpose:    return true if Error status code, and its a server error
456
\*=======================================================================*/
457
function is_server_error ($sc) { 
458
    return $sc >= 500 && $sc < 600; 
459
}
460