Passed
Pull Request — master (#28)
by
unknown
02:56
created

SwiftypeFileCrawlerExtension::withVersionContext()   A

Complexity

Conditions 3
Paths 1

Size

Total Lines 24
Code Lines 13

Duplication

Lines 0
Ratio 0 %

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 3
eloc 13
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 24
rs 9.8333
1
<?php
2
3
namespace Ichaber\SSSwiftype\Extensions;
4
5
use Ichaber\SSSwiftype\Service\SwiftypeCrawler;
6
use Ichaber\SSSwiftype\Tests\Fake\SwiftypeFile;
7
use SilverStripe\Assets\File;
8
use SilverStripe\CMS\Model\SiteTree;
9
use SilverStripe\ORM\DataExtension;
10
use SilverStripe\Control\Director;
11
use SilverStripe\Core\Config\Config;
12
use SilverStripe\Versioned\Versioned;
13
14
/**
15
 * Class SwiftypeFileCrawlerExtension
16
 *
17
 * @package Ichaber\SSSwiftype\Extensions
18
 * @property SwiftypeFile|$this $owner
19
 */
20
class SwiftypeFileCrawlerExtension extends DataExtension
21
{
22
    /**
23
     * Urls to crawl
24
     *
25
     * array keyed by getOwnerKey
26
     *
27
     * @var array
28
     */
29
    private $urlsToCrawl = [];
30
31
    /**
32
     * @param array $urls
33
     */
34
    public function setUrlsToCrawl(array $urls) {
35
        $this->urlsToCrawl = $urls;
36
    }
37
38
    /**
39
     * @return array
40
     */
41
    public function getUrlsToCrawl(): array
42
    {
43
        return $this->urlsToCrawl;
44
    }
45
46
    /**
47
     * We need to collate Urls before we write, just in case an author has changed the File's name (URL). If they
48
     * have, then we need to request Swiftype to reindex both the old Url (which should then be marked by Swiftype
49
     * as a 404), and the new Url
50
     */
51
    public function onBeforeWrite(): void
52
    {
53
        $this->collateUrls();
54
    }
55
56
    /**
57
     * After a publish has occurred, we can collate and process immediately (no need to split things out like during
58
     * an unpublish)
59
     *
60
     * @return void
61
     */
62
    public function onAfterPublish(): void
63
    {
64
        $this->collateUrls();
65
        $this->processCollatedUrls();
66
67
        // Check to see if the clearing of cache has been disabled (useful for unit testing, or any other reason you
68
        // might have to disable it)
69
        $clearCacheDisabled = Config::inst()->get(static::class, 'clear_cache_disabled');
70
71
        if ($clearCacheDisabled) {
72
            return;
73
        }
74
75
        // It's important that we clear the cache after we have finished requesting reindex from Swiftype
76
        $this->clearCacheSingle();
77
    }
78
79
    /**
80
     * We need to collate the Urls to be purged *before* we complete the unpublish action (otherwise, the LIVE Urls
81
     * will no longer be available, since the page is now unpublished)
82
     */
83
    public function onBeforeUnpublish(): void
84
    {
85
        $this->collateUrls();
86
    }
87
88
    /**
89
     * After the unpublish has completed, we can now request Swiftype to reindex the Urls that we collated
90
     */
91
    public function onAfterUnpublish(): void
92
    {
93
        $this->processCollatedUrls();
94
95
        // Check to see if the clearing of cache has been disabled (useful for unit testing, or any other reason you
96
        // might have to disable it)
97
        $clearCacheDisabled = Config::inst()->get(static::class, 'clear_cache_disabled');
98
99
        if ($clearCacheDisabled) {
100
            return;
101
        }
102
103
        // It's important that we clear the cache after we have finished requesting reindex from Swiftype
104
        $this->clearCacheSingle();
105
    }
106
107
    /**
108
     * You may need to clear the cache at some point during your particular process
109
     *
110
     * Reset all Urls for any/all objects that might be in the cache (keeping in mind that Extensions are singleton,
111
     * so the UrlsToCache could be accessed via singleton and it could contain Urls for many owner objects)
112
     *
113
     * We don't use flushCache (which is called from DataObject) because this is called between write and un/publish,
114
     * and we need our cache to persist through these states
115
     */
116
    public function clearCacheAll(): void
117
    {
118
        $this->setUrlsToCrawl([]);
119
    }
120
121
    /**
122
     * You may need to clear the cache at some point during your particular process
123
     *
124
     * Reset only the Urls related to this particular owner object (keeping in mind that Extensions are singleton,
125
     * so the UrlsToCache could be accessed via singleton and it could contain Urls for many owner objects)
126
     *
127
     * We don't use flushCache (which is called from DataObject) because this is called between write and un/publish,
128
     * and we need our cache to persist through these states
129
     */
130
    public function clearCacheSingle(): void
131
    {
132
        $urls = $this->getUrlsToCrawl();
133
        $key = $this->getOwnerKey();
134
135
        // Nothing for us to do here
136
        if ($key === null) {
137
            return;
138
        }
139
140
        // Nothing for us to do here
141
        if (!array_key_exists($key, $urls)) {
142
            return;
143
        }
144
145
        // Remove this key and it's Urls
146
        unset($urls[$key]);
147
148
        $this->setUrlsToCrawl($urls);
149
    }
150
151
    /**
152
     * Collate Urls to crawl
153
     *
154
     * Extensions are singleton, so we use the owner key to make sure that we're only processing Urls directly related
155
     * to the desired record.
156
     *
157
     * You might need to collate more than one URL per Page (maybe you're using Fluent or another translation module).
158
     * This is the method you will want to override in order to add that additional logic.
159
     */
160
    public function collateUrls(): void
161
    {
162
        if (!$this->checkFileIsToBeReindexed()) {
163
            return;
164
        }
165
166
        // Grab any existing Urls so that we can add to it
167
        $urls = $this->getUrlsToCrawl();
168
169
        // Set us to a LIVE stage/reading_mode
170
        $this->withVersionContext(function() use (&$urls) {
171
            /** @var File $owner */
172
            $owner = $this->getOwner();
173
            $key = $this->getOwnerKey();
174
175
            // We can't do anything if we don't have a key to use
176
            if ($key === null) {
177
                return;
178
            }
179
180
            // Create a new container for this key
181
            if (!array_key_exists($key, $urls)) {
182
                $urls[$key] = [];
183
            }
184
185
            // Grab the absolute live link without ?stage=Live appended
186
            $link = $owner->getAbsoluteURL();
187
188
            // If this record is not published, or we're unable to get a "Live Link" (for whatever reason), then there
189
            // is nothing more we can do here
190
            if (!$link) {
191
                return;
192
            }
193
194
            // Nothing for us to do here, the Link is already being tracked
195
            if (in_array($link, $urls[$key])) {
196
                return;
197
            }
198
199
            // Add our base URL to this key
200
            $urls[$key][] = $link;
201
        });
202
203
        // Update the Urls we have stored for indexing
204
        $this->setUrlsToCrawl($urls);
205
    }
206
207
    /**
208
     * Send requests to Swiftype to reindex each of the Urls that we have previously collated
209
     */
210
    protected function processCollatedUrls(): void
211
    {
212
        // Fetch the Urls that we need to reindex
213
        $key = $this->getOwnerKey();
214
215
        // We can't do anything if we don't have a key to process
216
        if ($key === null) {
217
            return;
218
        }
219
220
        $urls = $this->getUrlsToCrawl();
221
222
        // There is nothing for us to do here if there are no Urls
223
        if (count(array_keys($urls)) === 0) {
224
            return;
225
        }
226
227
        // There are no Urls for this particular key
228
        if (!array_key_exists($key, $urls)) {
229
            return;
230
        }
231
232
        // Force the reindexing of each URL we collated
233
        foreach ($urls[$key] as $url)  {
234
            $this->forceSwiftypeIndex($url);
235
        }
236
    }
237
238
    /**
239
     * @param string $updateUrl
240
     * @return bool
241
     */
242
    protected function forceSwiftypeIndex(string $updateUrl): bool
243
    {
244
        // We don't reindex dev environments
245
        if (Director::isDev()) {
246
            return true;
247
        }
248
249
        $crawler = SwiftypeCrawler::create();
250
251
        return $crawler->send($updateUrl);
252
    }
253
254
    /**
255
     * @return string
256
     */
257
    protected function getOwnerKey(): ?string
258
    {
259
        $owner = $this->owner;
260
261
        // Can't generate a key if the owner has not yet been written to the DB
262
        if (!$owner->isInDB()) {
263
            return null;
264
        }
265
266
        $key = str_replace('\\', '', $owner->ClassName . $owner->ID);
267
268
        return $key;
269
    }
270
271
    /**
272
     * Method to check our file types whitelist since we don't want to index files that aren't required in the index
273
     * e.g. image files.
274
     *
275
     * @return bool
276
     */
277
    protected function checkFileIsToBeReindexed()
278
    {
279
        // only reindex file types we need.
280
        $fileType = File::get_file_extension($this->getOwner()->Filename);
281
282
        return in_array($fileType, $this->getOwner()->config()->get('reindex_files_whitelist'),true);
283
    }
284
285
    /**
286
     * Sets the version context to Live as that's what crawlers will (normally) see
287
     *
288
     * The main function is to suppress the ?stage=Live querystring. LeftAndMain will set the default
289
     * reading mode to 'DRAFT' when initialising so to counter this we need to re-set the default
290
     * reading mode back to LIVE
291
     *
292
     * @param callable $callback
293
     */
294
    private function withVersionContext(callable $callback): void
295
    {
296
        Versioned::withVersionedMode(static function() use ($callback) {
297
            // Grab our current stage and reading mode
298
            $originalDefaultReadingMode = Versioned::get_default_reading_mode();
299
            $originalReadingMode = Versioned::get_reading_mode();
300
            $originalStage = Versioned::get_stage();
301
302
            // Set our stage and reading mode to LIVE
303
            Versioned::set_default_reading_mode('Stage.' . Versioned::LIVE);
304
            Versioned::set_reading_mode('Stage.' . Versioned::LIVE);
305
            Versioned::set_stage(Versioned::LIVE);
306
307
            // Process whatever callback was provided
308
            $callback();
309
310
            // Set us back to the original stage and reading mode
311
            if ($originalReadingMode) {
312
                Versioned::set_default_reading_mode($originalDefaultReadingMode);
313
                Versioned::set_reading_mode($originalReadingMode);
314
            }
315
316
            if ($originalStage) {
317
                Versioned::set_stage($originalStage);
318
            }
319
        });
320
    }
321
}
322