Passed
Pull Request — master (#104)
by
unknown
02:52
created

Job::createJobsFromData()   B

Complexity

Conditions 8
Paths 35

Size

Total Lines 57
Code Lines 25

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 8
eloc 25
nc 35
nop 4
dl 0
loc 57
rs 8.4444
c 0
b 0
f 0

How to fix   Long Method   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace SilverStripe\StaticPublishQueue;
4
5
use SilverStripe\Core\Config\Configurable;
6
use SilverStripe\Core\Extensible;
7
use SilverStripe\Core\Injector\Injectable;
8
use SilverStripe\Core\Injector\Injector;
9
use SilverStripe\ORM\ValidationException;
10
use SilverStripe\StaticPublishQueue\Service\URLSanitisationService;
11
use stdClass;
12
use Symbiote\QueuedJobs\Services\AbstractQueuedJob;
13
use Symbiote\QueuedJobs\Services\QueuedJobService;
14
15
abstract class Job extends AbstractQueuedJob
16
{
17
    use Configurable;
18
    use Extensible;
19
    use Injectable;
20
21
    /**
22
     * Number of URLs per job allows you to split work into multiple smaller jobs instead of having one large job
23
     * this is useful if you're running a queue setup will parallel processing
24
     * if this number is too high you're limiting the parallel processing opportunity
25
     * if this number is too low you're using your resources inefficiently
26
     * as every job processing has a fixed overhead which adds up if there are too many jobs
27
     *
28
     * in case you project is complex and you are struggling to find the correct number
29
     * it's possible to move this value to a CMS setting and adjust as needed without the need of changing the code
30
     * use @see Job::getUrlsPerJob() to override the value lookup
31
     * you can subclass your jobs and implement your own getUrlsPerJob() method which will look into CMS setting
32
     *
33
     * batching capability can be disabled if urls per job is set to 0
34
     * in such case, all URLs will be put into one job
35
     *
36
     * @var int
37
     * @config
38
     */
39
    private static $urls_per_job = 0;
0 ignored issues
show
introduced by
The private property $urls_per_job is not used, and could be removed.
Loading history...
40
41
    /**
42
     * Number of URLs processed during one call of @see AbstractQueuedJob::process
43
     * this number should be set to a value which represents number of URLs which is reasonable to process in one go
44
     * this number will vary depending on project, more specifically it depends on:
45
     * - time to render your pages
46
     * - infrastructure
47
     *
48
     * if this number is too large jobs may experience performance / memory issues
49
     * if this number is too low the jobs will produce more overhead which may cause inefficiencies
50
     *
51
     * in case you project is complex and you are struggling to find the correct number
52
     * it's possible to move this value to a CMS setting and adjust as needed without the need of changing the code
53
     * use @see Job::getChunkSize() to override the value lookup
54
     * you can subclass your jobs and implement your own getChunkSize() method which will look into CMS setting
55
     *
56
     * chunking capability can be disabled if chunk size is set to 0
57
     * in such case, all URLs will be processed in one go
58
     *
59
     * @var int
60
     * @config
61
     */
62
    private static $chunk_size = 200;
0 ignored issues
show
introduced by
The private property $chunk_size is not used, and could be removed.
Loading history...
63
64
    public function getRunAsMemberID()
65
    {
66
        // static cache manipulation jobs need to run without a user
67
        // this is because we don't want any session related data to become part of URLs
68
        // for example stage GET param is injected into URLs when user is logged in
69
        // this is problematic as stage param must not be present in statically published URLs
70
        // as they always refer to live content
71
        // including stage param in visiting URL is meant to bypass static cache and redirect to admin login
72
        // this is something we definitely don't want for statically cached pages
73
        return 0;
74
    }
75
76
    /**
77
     * Set totalSteps to reflect how many URLs need to be processed
78
     * note that chunk size may change during runtime (if CMS setting override is used)
79
     * therefore it's much more accurate and useful to keep track of number of completed URLs
80
     * as opposed to completed chunks
81
     */
82
    public function setup()
83
    {
84
        parent::setup();
85
        $this->totalSteps = count($this->jobData->URLsToProcess);
86
    }
87
88
    public function getSignature()
89
    {
90
        return md5(implode('-', [static::class, implode('-', array_keys($this->URLsToProcess))]));
91
    }
92
93
    public function process()
94
    {
95
        $chunkSize = $this->getChunkSize();
96
        $count = 0;
97
        foreach ($this->jobData->URLsToProcess as $url => $priority) {
98
            $count += 1;
99
            if ($chunkSize > 0 && $count > $chunkSize) {
100
                break;
101
            }
102
103
            $this->processUrl($url, $priority);
104
        }
105
106
        $this->updateCompletedState();
107
    }
108
109
    /**
110
     * Generate static cache related jobs from data
111
     *
112
     * @param array $urls URLs to be processed into jobs
113
     * @param string $message will be stored in job data and it's useful debug information
114
     * @param int|null $urlsPerJob number of URLs per job, defaults to Job specific configuration
115
     * @param string|null $jobClass job class used to create jobs, defaults to current class
116
     * @return array|Job[]
117
     */
118
    public function createJobsFromData(
119
        array $urls,
120
        $message = '',
121
        $urlsPerJob = null,
122
        $jobClass = null
123
    ) {
124
        if (count($urls) === 0) {
125
            return [];
126
        }
127
128
        // remove duplicate URLs
129
        $urls = array_unique($urls);
130
131
        // fall back to current job class if we don't have an explicit value set
132
        if ($jobClass === null) {
133
            $jobClass = static::class;
134
        }
135
136
        // validate job class
137
        $job = singleton($jobClass);
138
        if (!($job instanceof Job)) {
139
            throw new ValidationException(
140
                sprintf('Invalid job class %s, expected instace of %s', get_class($job), Job::class)
141
            );
142
        }
143
144
        // fall back to current job urls_per_job if we don't have an explicit value set
145
        if ($urlsPerJob === null) {
146
            $urlsPerJob = $job->getUrlsPerJob();
147
        }
148
149
        // if no message is provided don't include it
150
        $message = (strlen($message) > 0) ? $message . ': ' : '';
151
152
        // batch URLs
153
        $batches = ($urlsPerJob > 0) ? array_chunk($urls, $urlsPerJob) : [$urls];
154
155
        $jobs = [];
156
        foreach ($batches as $urls) {
157
            // sanitise the URLS
158
            $urlService = Injector::inst()->create(URLSanitisationService::class);
159
            $urlService->addURLs($urls);
160
            $urls = $urlService->getURLs(true);
161
162
            // create job and populate it with data
163
            $job = Injector::inst()->create($jobClass);
164
            $jobData = new stdClass();
165
            $jobData->URLsToProcess = $urls;
166
167
            $job->setJobData(count($jobData->URLsToProcess), 0, false, $jobData, [
168
                $message . var_export(array_keys($jobData->URLsToProcess), true),
169
            ]);
170
171
            $jobs[] = $job;
172
        }
173
174
        return $jobs;
175
    }
176
177
    /**
178
     * Generate and queue static cache related jobs from data
179
     *
180
     * @param array $urls URLs to be processed into jobs
181
     * @param string $message will be stored in job data and it's useful debug information
182
     * @param int|null $urlsPerJob number of URLs per job, defaults to Job specific configuration
183
     * @param string|null $jobClass job class used to create jobs, defaults to current class
184
     */
185
    public function queueJobsFromData(
186
        array $urls,
187
        $message = '',
188
        $urlsPerJob = null,
189
        $jobClass = null
190
    ) {
191
        $jobs = $this->createJobsFromData($urls, $message, $urlsPerJob, $jobClass);
192
193
        // default queue process
194
        $service = QueuedJobService::singleton();
195
196
        foreach ($jobs as $job) {
197
            $service->queueJob($job);
198
        }
199
    }
200
201
    /**
202
     * Implement this method to process URL
203
     *
204
     * @param string $url
205
     * @param int $priority
206
     */
207
    abstract protected function processUrl($url, $priority);
208
209
    /**
210
     * Move URL to list of processed URLs and update job step to indicate progress
211
     * indication of progress is important for jobs which take long time to process
212
     * jobs that do not indicate progress may be identified as stalled by the queue
213
     * and may end up paused
214
     *
215
     * @param string $url
216
     */
217
    protected function markUrlAsProcessed($url)
218
    {
219
        $this->jobData->ProcessedURLs[$url] = $url;
220
        unset($this->jobData->URLsToProcess[$url]);
221
        $this->currentStep += 1;
222
    }
223
224
    /**
225
     * Check if job is complete and update the job state if needed
226
     */
227
    protected function updateCompletedState()
228
    {
229
        if (count($this->jobData->URLsToProcess) > 0) {
230
            return;
231
        }
232
233
        $this->isComplete = true;
234
    }
235
236
    /**
237
     * @return int
238
     */
239
    protected function getUrlsPerJob()
240
    {
241
        $urlsPerJob = (int) $this->config()->get('urls_per_job');
242
243
        return ($urlsPerJob > 0) ? $urlsPerJob : 0;
244
    }
245
246
    /**
247
     * @return int
248
     */
249
    protected function getChunkSize()
250
    {
251
        $chunkSize = (int) $this->config()->get('chunk_size');
252
253
        return ($chunkSize > 0) ? $chunkSize : 0;
254
    }
255
256
    /**
257
     * This function can be overridden to handle the case of failure of specific URL processing
258
     * such case is not handled by default which results in all such errors being effectively silenced
259
     *
260
     * @param string $url
261
     * @param array $meta
262
     */
263
    protected function handleFailedUrl($url, array $meta)
264
    {
265
        // no op
266
    }
267
}
268