Passed
Pull Request — master (#16)
by Chris
01:38
created

SwiftypeSiteTreeCrawlerExtension::collateUrls()   A

Complexity

Conditions 5
Paths 1

Size

Total Lines 41
Code Lines 15

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 5
eloc 15
c 1
b 0
f 0
nc 1
nop 0
dl 0
loc 41
rs 9.4555
1
<?php
2
3
namespace Ichaber\SSSwiftype\Extensions;
4
5
use Exception;
6
use Ichaber\SSSwiftype\Service\SwiftypeCrawler;
7
use Psr\Container\NotFoundExceptionInterface;
8
use Psr\Log\LoggerInterface;
9
use SilverStripe\CMS\Model\SiteTree;
10
use SilverStripe\CMS\Model\SiteTreeExtension;
11
use SilverStripe\Control\Director;
12
use SilverStripe\Core\Config\Config;
13
use SilverStripe\Core\Injector\Injector;
14
use SilverStripe\SiteConfig\SiteConfig;
15
use SilverStripe\Versioned\Versioned;
16
17
/**
18
 * Class SwiftypeSiteTreeCrawlerExtension
19
 *
20
 * @package Ichaber\SSSwiftype\Extensions
21
 * @property SiteTree|$this $owner
22
 */
23
class SwiftypeSiteTreeCrawlerExtension extends SiteTreeExtension
24
{
25
    /**
26
     * Urls to crawl
27
     *
28
     * array keyed by getOwnerKey
29
     *
30
     * @var array
31
     */
32
    private $urlsToCrawl = [];
33
34
    /**
35
     * @param array $urls
36
     */
37
    public function setUrlsToCrawl(array $urls) {
38
        $this->urlsToCrawl = $urls;
39
    }
40
41
    /**
42
     * @return array
43
     */
44
    public function getUrlsToCrawl(): array
45
    {
46
        return $this->urlsToCrawl;
47
    }
48
49
    /**
50
     * We need to collate Urls before we write, just in case an author has changed the Page's Url Segment. If they
51
     * have, then we need to request Swiftype to reindex both the old Url (which should then be marked by Swiftype
52
     * as a 404), and the new Url
53
     */
54
    public function onBeforeWrite(): void
55
    {
56
        $this->collateUrls();
57
    }
58
59
    /**
60
     * After a publish has occurred, we can collate and process immediately (no need to split things out like during
61
     * an unpublish)
62
     *
63
     * @param SiteTree|mixed $original
64
     * @return void
65
     */
66
    public function onAfterPublish(&$original): void
67
    {
68
        $this->collateUrls();
69
        $this->processCollatedUrls();
70
71
        // Check to see if the clearing of cache has been disabled (useful for unit testing, or any other reason you
72
        // might have to disable it)
73
        $clearCacheDisabled = Config::inst()->get(static::class, 'clear_cache_disabled');
74
75
        if ($clearCacheDisabled) {
76
            return;
77
        }
78
79
        // It's important that we clear the cache after we have finished requesting reindex from Swiftype
80
        $this->clearCacheSingle();
81
    }
82
83
    /**
84
     * We need to collate the Urls to be purged *before* we complete the unpublish action (otherwise, the LIVE Urls
85
     * will no longer be available, since the page is now unpublished)
86
     */
87
    public function onBeforeUnpublish(): void
88
    {
89
        $this->collateUrls();
90
    }
91
92
    /**
93
     * After the unpublish has completed, we can now request Swiftype to reindex the Urls that we collated
94
     */
95
    public function onAfterUnpublish(): void
96
    {
97
        $this->processCollatedUrls();
98
99
        // Check to see if the clearing of cache has been disabled (useful for unit testing, or any other reason you
100
        // might have to disable it)
101
        $clearCacheDisabled = Config::inst()->get(static::class, 'clear_cache_disabled');
102
103
        if ($clearCacheDisabled) {
104
            return;
105
        }
106
107
        // It's important that we clear the cache after we have finished requesting reindex from Swiftype
108
        $this->clearCacheSingle();
109
    }
110
111
    /**
112
     * You may need to clear the cache at some point during your particular process
113
     *
114
     * Reset all Urls for any/all objects that might be in the cache (keeping in mind that Extensions are singleton,
115
     * so the UrlsToCache could be accessed via singleton and it could contain Urls for many owner objects)
116
     *
117
     * We don't use flushCache (which is called from DataObject) because this is called between write and un/publish,
118
     * and we need our cache to persist through these states
119
     */
120
    public function clearCacheAll(): void
121
    {
122
        $this->setUrlsToCrawl([]);
123
    }
124
125
    /**
126
     * You may need to clear the cache at some point during your particular process
127
     *
128
     * Reset only the Urls related to this particular owner object (keeping in mind that Extensions are singleton,
129
     * so the UrlsToCache could be accessed via singleton and it could contain Urls for many owner objects)
130
     *
131
     * We don't use flushCache (which is called from DataObject) because this is called between write and un/publish,
132
     * and we need our cache to persist through these states
133
     */
134
    public function clearCacheSingle(): void
135
    {
136
        $urls = $this->getUrlsToCrawl();
137
        $key = $this->getOwnerKey();
138
139
        // Nothing for us to do here
140
        if ($key === null) {
141
            return;
142
        }
143
144
        // Nothing for us to do here
145
        if (!array_key_exists($key, $urls)) {
146
            return;
147
        }
148
149
        // Remove this key and it's Urls
150
        unset($urls[$key]);
151
152
        $this->setUrlsToCrawl($urls);
153
    }
154
155
    /**
156
     * Collate Urls to crawl
157
     *
158
     * Extensions are singleton, so we use the owner key to make sure that we're only processing Urls directly related
159
     * to the desired record.
160
     *
161
     * You might need to collate more than one URL per Page (maybe you're using Fluent or another translation module).
162
     * This is the method you will want to override in order to add that additional logic.
163
     */
164
    public function collateUrls(): void
165
    {
166
        // Grab any existing Urls so that we can add to it
167
        $urls = $this->getUrlsToCrawl();
168
169
        // Set us to a LIVE stage/reading_mode
170
        $this->withVersionContext(function() use (&$urls) {
171
            /** @var SiteTree $owner */
172
            $owner = $this->getOwner();
173
            $key = $this->getOwnerKey();
174
175
            // We can't do anything if we don't have a key to use
176
            if ($key === null) {
177
                return;
178
            }
179
180
            // Create a new container for this key
181
            if (!array_key_exists($key, $urls)) {
182
                $urls[$key] = [];
183
            }
184
185
            // Grab the absolute live link without ?stage=Live appended
186
            $link = $owner->getAbsoluteLiveLink(false);
187
188
            // If this record is not published, or we're unable to get a "Live Link" (for whatever reason), then there
189
            // is nothing more we can do here
190
            if (!$link) {
191
                return;
192
            }
193
194
            // Nothing for us to do here, the Link is already being tracked
195
            if (in_array($link, $urls[$key])) {
196
                return;
197
            }
198
199
            // Add our base URL to this key
200
            $urls[$key][] = $link;
201
        });
202
203
        // Update the Urls we have stored for indexing
204
        $this->setUrlsToCrawl($urls);
205
    }
206
207
    /**
208
     * Send requests to Swiftype to reindex each of the Urls that we have previously collated
209
     */
210
    protected function processCollatedUrls(): void
211
    {
212
        // Fetch the Urls that we need to reindex
213
        $key = $this->getOwnerKey();
214
215
        // We can't do anything if we don't have a key to process
216
        if ($key === null) {
217
            return;
218
        }
219
220
        $urls = $this->getUrlsToCrawl();
221
222
        // There is nothing for us to do here if there are no Urls
223
        if (count(array_keys($urls)) === 0) {
224
            return;
225
        }
226
227
        // There are no Urls for this particular key
228
        if (!array_key_exists($key, $urls)) {
229
            return;
230
        }
231
232
        // Force the reindexing of each URL we collated
233
        foreach ($urls[$key] as $url)  {
234
            $this->forceSwiftypeIndex($url);
235
        }
236
    }
237
238
    /**
239
     * @param string $updateUrl
240
     * @return bool
241
     */
242
    protected function forceSwiftypeIndex(string $updateUrl): bool
243
    {
244
        // We don't reindex dev environments
245
        if (Director::isDev()) {
246
            return true;
247
        }
248
249
        $crawler = SwiftypeCrawler::create();
250
251
        return $crawler->send($updateUrl);
252
    }
253
254
    /**
255
     * @return string
256
     */
257
    protected function getOwnerKey(): ?string
258
    {
259
        $owner = $this->owner;
260
261
        // Can't generate a key if the owner has not yet been written to the DB
262
        if (!$owner->isInDB()) {
1 ignored issue
show
Bug introduced by
The method isInDB() does not exist on Ichaber\SSSwiftype\Exten...iteTreeCrawlerExtension. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-call  annotation

262
        if (!$owner->/** @scrutinizer ignore-call */ isInDB()) {

This check looks for calls to methods that do not seem to exist on a given type. It looks for the method on the type itself as well as in inherited classes or implemented interfaces.

This is most likely a typographical error or the method has been renamed.

Loading history...
263
            return null;
264
        }
265
266
        $key = str_replace('\\', '', $owner->ClassName . $owner->ID);
2 ignored issues
show
Bug Best Practice introduced by
The property ClassName does not exist on Ichaber\SSSwiftype\Exten...iteTreeCrawlerExtension. Did you maybe forget to declare it?
Loading history...
Bug Best Practice introduced by
The property ID does not exist on Ichaber\SSSwiftype\Exten...iteTreeCrawlerExtension. Did you maybe forget to declare it?
Loading history...
267
268
        return $key;
269
    }
270
271
    /**
272
     * Sets the version context to Live as that's what crawlers will (normally) see
273
     *
274
     * The main function is to suppress the ?stage=Live querystring. LeftAndMain will set the default
275
     * reading mode to 'DRAFT' when initialising so to counter this we need to re-set the default
276
     * reading mode back to LIVE
277
     *
278
     * @param callable $callback
279
     */
280
    private function withVersionContext(callable $callback): void
281
    {
282
        Versioned::withVersionedMode(static function() use ($callback) {
283
            // Grab our current stage and reading mode
284
            $originalDefaultReadingMode = Versioned::get_default_reading_mode();
285
            $originalReadingMode = Versioned::get_reading_mode();
286
            $originalStage = Versioned::get_stage();
287
288
            // Set our stage and reading mode to LIVE
289
            Versioned::set_default_reading_mode('Stage.' . Versioned::LIVE);
290
            Versioned::set_reading_mode('Stage.' . Versioned::LIVE);
291
            Versioned::set_stage(Versioned::LIVE);
292
293
            // Process whatever callback was provided
294
            $callback();
295
296
            // Set us back to the original stage and reading mode
297
            if ($originalReadingMode) {
298
                Versioned::set_default_reading_mode($originalDefaultReadingMode);
299
                Versioned::set_reading_mode($originalReadingMode);
300
            }
301
302
            if ($originalStage) {
303
                Versioned::set_stage($originalStage);
304
            }
305
        });
306
    }
307
}
308