Completed
Push — sidebaracl ( 7a112d...7c3e4a )
by Andreas
04:38
created

SitemapItem::__construct()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 6
Code Lines 5

Duplication

Lines 0
Ratio 0 %
Metric Value
dl 0
loc 6
rs 9.4285
cc 1
eloc 5
nc 1
nop 4
1
<?php
2
/**
3
 * Sitemap handling functions
4
 *
5
 * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
6
 * @author     Michael Hamann <[email protected]>
7
 */
8
9
if(!defined('DOKU_INC')) die('meh.');
10
11
/**
12
 * A class for building sitemaps and pinging search engines with the sitemap URL.
13
 *
14
 * @author Michael Hamann
15
 */
16
class Sitemapper {
17
    /**
18
     * Builds a Google Sitemap of all public pages known to the indexer
19
     *
20
     * The map is placed in the cache directory named sitemap.xml.gz - This
21
     * file needs to be writable!
22
     *
23
     * @author Michael Hamann
24
     * @author Andreas Gohr
25
     * @link   https://www.google.com/webmasters/sitemaps/docs/en/about.html
26
     * @link   http://www.sitemaps.org/
27
     *
28
     * @return bool
29
     */
30
    public static function generate(){
31
        global $conf;
32
        if($conf['sitemap'] < 1 || !is_numeric($conf['sitemap'])) return false;
33
34
        $sitemap = Sitemapper::getFilePath();
35
36
        if(file_exists($sitemap)){
37
            if(!is_writable($sitemap)) return false;
38
        }else{
39
            if(!is_writable(dirname($sitemap))) return false;
40
        }
41
42
        if(@filesize($sitemap) &&
43
           @filemtime($sitemap) > (time()-($conf['sitemap']*86400))){ // 60*60*24=86400
44
            dbglog('Sitemapper::generate(): Sitemap up to date');
45
            return false;
46
        }
47
48
        dbglog("Sitemapper::generate(): using $sitemap");
49
50
        $pages = idx_get_indexer()->getPages();
51
        dbglog('Sitemapper::generate(): creating sitemap using '.count($pages).' pages');
52
        $items = array();
53
54
        // build the sitemap items
55
        foreach($pages as $id){
56
            //skip hidden, non existing and restricted files
57
            if(isHiddenPage($id)) continue;
58
            if(auth_aclcheck($id,'',array()) < AUTH_READ) continue;
59
            $item = SitemapItem::createFromID($id);
60
            if ($item !== null)
61
                $items[] = $item;
62
        }
63
64
        $eventData = array('items' => &$items, 'sitemap' => &$sitemap);
65
        $event = new Doku_Event('SITEMAP_GENERATE', $eventData);
66
        if ($event->advise_before(true)) {
67
            //save the new sitemap
68
            $event->result = io_saveFile($sitemap, Sitemapper::getXML($items));
69
        }
70
        $event->advise_after();
71
72
        return $event->result;
73
    }
74
75
    /**
76
     * Builds the sitemap XML string from the given array auf SitemapItems.
77
     *
78
     * @param $items array The SitemapItems that shall be included in the sitemap.
79
     * @return string The sitemap XML.
80
     *
81
     * @author Michael Hamann
82
     */
83
    private static function getXML($items) {
84
        ob_start();
85
        echo '<?xml version="1.0" encoding="UTF-8"?>'.NL;
86
        echo '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">'.NL;
87
        foreach ($items as $item) {
88
            /** @var SitemapItem $item */
89
            echo $item->toXML();
90
        }
91
        echo '</urlset>'.NL;
92
        $result = ob_get_contents();
93
        ob_end_clean();
94
        return $result;
95
    }
96
97
    /**
98
     * Helper function for getting the path to the sitemap file.
99
     *
100
     * @return string The path to the sitemap file.
101
     *
102
     * @author Michael Hamann
103
     */
104
    public static function getFilePath() {
105
        global $conf;
106
107
        $sitemap = $conf['cachedir'].'/sitemap.xml';
108
        if (self::sitemapIsCompressed()) {
109
            $sitemap .= '.gz';
110
        }
111
112
        return $sitemap;
113
    }
114
115
    /**
116
     * Helper function for checking if the sitemap is compressed
117
     *
118
     * @return bool If the sitemap file is compressed
119
     */
120
    public static function sitemapIsCompressed() {
121
        global $conf;
122
        return $conf['compression'] === 'bz2' || $conf['compression'] === 'gz';
123
    }
124
125
    /**
126
     * Pings search engines with the sitemap url. Plugins can add or remove
127
     * urls to ping using the SITEMAP_PING event.
128
     *
129
     * @author Michael Hamann
130
     *
131
     * @return bool
132
     */
133
    public static function pingSearchEngines() {
134
        //ping search engines...
135
        $http = new DokuHTTPClient();
136
        $http->timeout = 8;
0 ignored issues
show
Bug introduced by
The property timeout cannot be accessed from this context as it is declared private in class HTTPClient.

This check looks for access to properties that are not accessible from the current context.

If you need to make a property accessible to another context you can either raise its visibility level or provide an accessible getter in the defining class.

Loading history...
137
138
        $encoded_sitemap_url = urlencode(wl('', array('do' => 'sitemap'), true, '&'));
139
        $ping_urls = array(
140
            'google'    => 'http://www.google.com/webmasters/sitemaps/ping?sitemap='.$encoded_sitemap_url,
141
            'microsoft' => 'http://www.bing.com/webmaster/ping.aspx?siteMap='.$encoded_sitemap_url,
142
            'yandex'    => 'http://blogs.yandex.ru/pings/?status=success&url='.$encoded_sitemap_url
143
        );
144
145
        $data = array('ping_urls' => $ping_urls,
146
                            'encoded_sitemap_url' => $encoded_sitemap_url
147
        );
148
        $event = new Doku_Event('SITEMAP_PING', $data);
149
        if ($event->advise_before(true)) {
150
            foreach ($data['ping_urls'] as $name => $url) {
151
                dbglog("Sitemapper::PingSearchEngines(): pinging $name");
152
                $resp = $http->get($url);
153
                if($http->error) dbglog("Sitemapper:pingSearchengines(): $http->error");
0 ignored issues
show
Bug introduced by
The property error cannot be accessed from this context as it is declared private in class HTTPClient.

This check looks for access to properties that are not accessible from the current context.

If you need to make a property accessible to another context you can either raise its visibility level or provide an accessible getter in the defining class.

Loading history...
154
                dbglog('Sitemapper:pingSearchengines(): '.preg_replace('/[\n\r]/',' ',strip_tags($resp)));
155
            }
156
        }
157
        $event->advise_after();
158
159
        return true;
160
    }
161
}
162
163
/**
164
 * An item of a sitemap.
165
 *
166
 * @author Michael Hamann
167
 */
168
class SitemapItem {
169
    public $url;
170
    public $lastmod;
171
    public $changefreq;
172
    public $priority;
173
174
    /**
175
     * Create a new item.
176
     *
177
     * @param string $url        The url of the item
178
     * @param int    $lastmod    Timestamp of the last modification
179
     * @param string $changefreq How frequently the item is likely to change. Valid values: always, hourly, daily, weekly, monthly, yearly, never.
180
     * @param $priority float|string The priority of the item relative to other URLs on your site. Valid values range from 0.0 to 1.0.
181
     */
182
    public function __construct($url, $lastmod, $changefreq = null, $priority = null) {
183
        $this->url = $url;
184
        $this->lastmod = $lastmod;
185
        $this->changefreq = $changefreq;
186
        $this->priority = $priority;
187
    }
188
189
    /**
190
     * Helper function for creating an item for a wikipage id.
191
     *
192
     * @param string       $id         A wikipage id.
193
     * @param string       $changefreq How frequently the item is likely to change. Valid values: always, hourly, daily, weekly, monthly, yearly, never.
194
     * @param float|string $priority   The priority of the item relative to other URLs on your site. Valid values     range from 0.0 to 1.0.
195
     * @return SitemapItem The sitemap item.
196
     */
197
    public static function createFromID($id, $changefreq = null, $priority = null) {
198
        $id = trim($id);
199
        $date = @filemtime(wikiFN($id));
200
        if(!$date) return null;
201
        return new SitemapItem(wl($id, '', true), $date, $changefreq, $priority);
202
    }
203
204
    /**
205
     * Get the XML representation of the sitemap item.
206
     *
207
     * @return string The XML representation.
208
     */
209
    public function toXML() {
210
        $result = '  <url>'.NL
211
                 .'    <loc>'.hsc($this->url).'</loc>'.NL
212
                 .'    <lastmod>'.date_iso8601($this->lastmod).'</lastmod>'.NL;
213
        if ($this->changefreq !== null)
214
            $result .= '    <changefreq>'.hsc($this->changefreq).'</changefreq>'.NL;
215
        if ($this->priority !== null)
216
            $result .= '    <priority>'.hsc($this->priority).'</priority>'.NL;
217
        $result .= '  </url>'.NL;
218
        return $result;
219
    }
220
}
221