Completed
Push — master ( bf2414...1236f2 )
by Miguel
10s
created

getCrawlerWaitPeriod()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 2

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 1
Metric Value
c 1
b 0
f 1
dl 0
loc 3
rs 10
cc 1
eloc 2
nc 1
nop 0
1
<?php
2
3
/**
4
 * Nexcess.net Turpentine Extension for Magento
5
 * Copyright (C) 2012  Nexcess.net L.L.C.
6
 *
7
 * This program is free software; you can redistribute it and/or modify
8
 * it under the terms of the GNU General Public License as published by
9
 * the Free Software Foundation; either version 2 of the License, or
10
 * (at your option) any later version.
11
 *
12
 * This program is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15
 * GNU General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU General Public License along
18
 * with this program; if not, write to the Free Software Foundation, Inc.,
19
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
20
 */
21
22
class Nexcessnet_Turpentine_Helper_Cron extends Mage_Core_Helper_Abstract {
0 ignored issues
show
Coding Style Compatibility introduced by
PSR1 recommends that each class must be in a namespace of at least one level to avoid collisions.

You can fix this by adding a namespace to your class:

namespace YourVendor;

class YourClass { }

When choosing a vendor namespace, try to pick something that is not too generic to avoid conflicts with other libraries.

Loading history...
23
24
    /**
25
     * Key to store the URL queue under in the cache
26
     *
27
     * @var string
28
     */
29
    const CRAWLER_URLS_CACHE_ID = 'turpentine_crawler_url_queue';
30
31
    /**
32
     * Crawler client singleton
33
     *
34
     * @var Varien_Http_Client
35
     */
36
    protected $_crawlerClient = null;
37
38
    /**
39
     * Get the execution time used so far
40
     *
41
     * @return int
42
     */
43
    public function getRunTime() {
44
        $usage = getrusage();
45
        return $usage['ru_utime.tv_sec'];
46
    }
47
48
    /**
49
     * Get the max execution time (or 0 if unlimited)
50
     *
51
     * @return int
52
     */
53
    public function getAllowedRunTime() {
54
        return (int) ini_get('max_execution_time');
55
    }
56
57
    /**
58
     * Add a single URL to the queue, returns whether it was actually added
59
     * to the queue or not (false if it was already in the queue)
60
     *
61
     * @param string $url
62
     * @return bool
63
     */
64
    public function addUrlToCrawlerQueue($url) {
65
        return $this->addUrlsToCrawlerQueue(array($url));
66
    }
67
68
    /**
69
     * Add a list of URLs to the queue, returns how many unique URLs were
70
     * actually added to the queue
71
     *
72
     * @param array $urls
73
     * @return int
74
     */
75
    public function addUrlsToCrawlerQueue(array $urls) {
76
        // TODO: remove this debug message
77
        if ($this->getCrawlerDebugEnabled()) {
78
            foreach ($urls as $url) {
79
                Mage::helper('turpentine/debug')->log(
80
                    'Adding URL to queue: %s', $url );
81
            }
82
        }
83
        $oldQueue = $this->_readUrlQueue();
84
        $newQueue = array_unique(array_merge($oldQueue, $urls));
85
        $this->_writeUrlQueue($newQueue);
86
        $diff = count($newQueue) - count($oldQueue);
87
        return $diff;
88
    }
89
90
    /**
91
     * Pop a URL to crawl off the queue, or null if no URLs left
92
     *
93
     * @return string|null
94
     */
95
    public function getNextUrl() {
96
        $urls = $this->_readUrlQueue();
97
        $nextUrl = array_shift($urls);
98
        $this->_writeUrlQueue($urls);
99
        return $nextUrl;
100
    }
101
102
    /**
103
     * Get the current URL queue
104
     *
105
     * @return array
106
     */
107
    public function getUrlQueue() {
108
        return $this->_readUrlQueue();
109
    }
110
111
    /**
112
     * Get the crawler http client
113
     *
114
     * @return Varien_Http_Client
115
     */
116
    public function getCrawlerClient() {
117
        if (is_null($this->_crawlerClient)) {
118
            $this->_crawlerClient = new Varien_Http_Client(null, array(
119
                'useragent'     => sprintf(
120
                    'Nexcessnet_Turpentine/%s Magento/%s Varien_Http_Client',
121
                    Mage::helper('turpentine/data')->getVersion(),
122
                    Mage::getVersion() ),
123
                'keepalive'     => true,
124
            ));
125
            $this->_crawlerClient->setCookie('frontend', 'crawler-session');
126
        }
127
        return $this->_crawlerClient;
128
    }
129
130
    /**
131
     * Get if the crawler is enabled
132
     *
133
     * @return bool
134
     */
135
    public function getCrawlerEnabled() {
136
        return Mage::getStoreConfig('turpentine_varnish/general/crawler_enable');
137
    }
138
139
    /**
140
     * Get if crawler debugging is enabled
141
     *
142
     * @return bool
143
     */
144
    public function getCrawlerDebugEnabled() {
145
        return Mage::getStoreConfig('turpentine_varnish/general/crawler_debug');
146
    }
147
148
    /**
149
     * Get number of urls to crawl per batch
150
     *
151
     * @return int
152
     */
153
    public function getCrawlerBatchSize() {
154
        return Mage::getStoreConfig('turpentine_varnish/general/crawler_batchsize');
155
    }
156
157
    /**
158
     * Get time in seconds to wait between url batches
159
     *
160
     * @return int
161
     */
162
    public function getCrawlerWaitPeriod() {
163
        return Mage::getStoreConfig('turpentine_varnish/general/crawler_batchwait');
164
    }
165
166
    /**
167
     * Get the list of all URLs
168
     *
169
     * @return array
170
     */
171
    public function getAllUrls() {
172
        $urls = array();
173
        $origStore = Mage::app()->getStore();
174
        $visibility = array(
175
            Mage_Catalog_Model_Product_Visibility::VISIBILITY_BOTH,
176
            Mage_Catalog_Model_Product_Visibility::VISIBILITY_IN_CATALOG,
177
        );
178
        foreach (Mage::app()->getStores() as $storeId => $store) {
179
            Mage::app()->setCurrentStore($store);
180
            $baseUrl = $store->getBaseUrl(Mage_Core_Model_Store::URL_TYPE_LINK);
181
            $urls[] = $baseUrl;
182
            foreach (Mage::getModel('catalog/category')
183
                        ->getCollection($storeId)
184
                        ->addIsActiveFilter()
185
                            as $cat) {
186
                $urls[] = $cat->getUrl();
187
                foreach ($cat->getProductCollection($storeId)
188
                            ->addUrlRewrite($cat->getId())
189
                            ->addAttributeToFilter('visibility', $visibility)
190
                                as $prod) {
191
                    $urls[] = $prod->getProductUrl();
192
                }
193
            }
194
            $sitemap = (Mage::getConfig()->getNode('modules/MageWorx_XSitemap') !== FALSE) ?
195
                                                           'xsitemap/cms_page' : 'sitemap/cms_page';
196
            foreach (Mage::getResourceModel($sitemap)
197
                        ->getCollection($storeId) as $item) {
198
                $urls[] = $baseUrl.$item->getUrl();
199
            }
200
        }
201
        Mage::app()->setCurrentStore($origStore);
202
        return array_unique($urls);
203
    }
204
205
    /**
206
     * Add URLs to the queue by product model
207
     *
208
     * @param Mage_Catalog_Model_Product $product
209
     * @return int
210
     */
211
    public function addProductToCrawlerQueue($product) {
212
        $productUrls = array();
213
        $origStore = Mage::app()->getStore();
214
        foreach (Mage::app()->getStores() as $storeId => $store) {
215
            Mage::app()->setCurrentStore($store);
216
            $baseUrl = $store->getBaseUrl(
217
                Mage_Core_Model_Store::URL_TYPE_LINK );
218
            $productUrls[] = $product->getProductUrl();
219
            foreach ($product->getCategoryIds() as $catId) {
220
                $cat = Mage::getModel('catalog/category')->load($catId);
221
                $productUrls[] = rtrim($baseUrl, '/').'/'.
222
                    ltrim($product->getUrlModel()
223
                        ->getUrlPath($product, $cat), '/');
224
            }
225
        }
226
        Mage::app()->setCurrentStore($origStore);
227
        return $this->addUrlsToCrawlerQueue($productUrls);
228
    }
229
230
    /**
231
     * Add URLs to the queue by category model
232
     *
233
     * @param Mage_Catalog_Model_Category $category
234
     * @return int
235
     */
236
    public function addCategoryToCrawlerQueue($category) {
237
        $catUrls = array();
238
        $origStore = Mage::app()->getStore();
239
        foreach (Mage::app()->getStores() as $storeId => $store) {
240
            Mage::app()->setCurrentStore($store);
241
            $catUrls[] = $category->getUrl();
242
        }
243
        Mage::app()->setCurrentStore($origStore);
244
        return $this->addUrlsToCrawlerQueue($catUrls);
245
    }
246
247
    /**
248
     * Add URLs to queue by CMS page ID
249
     *
250
     * @param int $cmsPageId
251
     * @return int
252
     */
253
    public function addCmsPageToCrawlerQueue($cmsPageId) {
254
        $page = Mage::getModel('cms/page')->load($cmsPageId);
255
        $pageUrls = array();
256
        $origStore = Mage::app()->getStore();
257
        foreach (Mage::app()->getStores() as $storeId => $store) {
258
            Mage::app()->setCurrentStore($store);
259
            $page->setStoreId($storeId);
260
            $pageUrls[] = Mage::getUrl(null,
261
                array('_direct' => $page->getIdentifier()));
262
        }
263
        Mage::app()->setCurrentStore($origStore);
264
        return $this->addUrlsToCrawlerQueue($pageUrls);
265
    }
266
267
    /**
268
     * Get the crawler URL queue from the cache
269
     *
270
     * @return array
271
     */
272
    protected function _readUrlQueue() {
273
        $readQueue = @unserialize(
274
            Mage::app()->loadCache(self::CRAWLER_URLS_CACHE_ID) );
275
        if ( ! is_array($readQueue)) {
276
            // This is the first time the queue has been read since the last
277
            // cache flush (or the queue is corrupt)
278
            // Returning an empty array here would be the proper behavior,
279
            // but causes the queue to not be saved on the full cache flush event
280
            return $this->getAllUrls();
281
        } else {
282
            return $readQueue;
283
        }
284
    }
285
286
    /**
287
     * Save the crawler URL queue to the cache
288
     *
289
     * @param  array  $urls
290
     * @return null
291
     */
292
    protected function _writeUrlQueue(array $urls) {
293
        return Mage::app()->saveCache(
294
            serialize($urls), self::CRAWLER_URLS_CACHE_ID );
295
    }
296
}
297