Completed
Pull Request — master (#104)
by
unknown
02:20
created

Job::queueJobsFromData()   B

Complexity

Conditions 10
Paths 99

Size

Total Lines 70
Code Lines 30

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 10
eloc 30
nc 99
nop 5
dl 0
loc 70
rs 7.6666
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace SilverStripe\StaticPublishQueue;
4
5
use SilverStripe\Core\Config\Configurable;
6
use SilverStripe\Core\Extensible;
7
use SilverStripe\Core\Injector\Injectable;
8
use SilverStripe\Core\Injector\Injector;
9
use SilverStripe\ORM\ValidationException;
10
use SilverStripe\StaticPublishQueue\Service\URLSanitisationService;
11
use stdClass;
12
use Symbiote\QueuedJobs\Services\AbstractQueuedJob;
13
use Symbiote\QueuedJobs\Services\QueuedJobService;
14
15
abstract class Job extends AbstractQueuedJob
16
{
17
    use Configurable;
18
    use Extensible;
19
    use Injectable;
20
21
    /**
22
     * Number of URLs per job allows you to split work into multiple smaller jobs instead of having one large job
23
     * this is useful if you're running a queue setup will parallel processing
24
     * if this number is too high you're limiting the parallel processing opportunity
25
     * if this number is too low you're using your resources inefficiently
26
     * as every job processing has a fixed overhead which adds up if there are too many jobs
27
     *
28
     * in case you project is complex and you are struggling to find the correct number
29
     * it's possible to move this value to a CMS setting and adjust as needed without the need of changing the code
30
     * use @see Job::getUrlsPerJob() to override the value lookup
31
     * you can subclass your jobs and implement your own getUrlsPerJob() method which will look into CMS setting
32
     *
33
     * batching capability can be disabled if urls per job is set to 0
34
     * in such case, all URLs will be put into one job
35
     *
36
     * @var int
37
     * @config
38
     */
39
    private static $urls_per_job = 0;
0 ignored issues
show
introduced by
The private property $urls_per_job is not used, and could be removed.
Loading history...
40
41
    /**
42
     * Number of URLs processed during one call of @see AbstractQueuedJob::process
43
     * this number should be set to a value which represents number of URLs which is reasonable to process in one go
44
     * this number will vary depending on project, more specifically it depends on:
45
     * - time to render your pages
46
     * - infrastructure
47
     *
48
     * if this number is too large jobs may experience performance / memory issues
49
     * if this number is too low the jobs will produce more overhead which may cause inefficiencies
50
     *
51
     * in case you project is complex and you are struggling to find the correct number
52
     * it's possible to move this value to a CMS setting and adjust as needed without the need of changing the code
53
     * use @see Job::getChunkSize() to override the value lookup
54
     * you can subclass your jobs and implement your own getChunkSize() method which will look into CMS setting
55
     *
56
     * chunking capability can be disabled if chunk size is set to 0
57
     * in such case, all URLs will be processed in one go
58
     *
59
     * @var int
60
     * @config
61
     */
62
    private static $chunk_size = 200;
0 ignored issues
show
introduced by
The private property $chunk_size is not used, and could be removed.
Loading history...
63
64
    public function getRunAsMemberID()
65
    {
66
        // static cache manipulation jobs need to run without a user
67
        // this is because we don't want any session related data to become part of URLs
68
        // for example stage GET param is injected into URLs when user is logged in
69
        // this is problematic as stage param must not be present in statically published URLs
70
        // as they always refer to live content
71
        // including stage param in visiting URL is meant to bypass static cache and redirect to admin login
72
        // this is something we definitely don't want for statically cached pages
73
        return 0;
74
    }
75
76
    /**
77
     * Set totalSteps to reflect how many URLs need to be processed
78
     * note that chunk size may change during runtime (if CMS setting override is used)
79
     * therefore it's much more accurate and useful to keep track of number of completed URLs
80
     * as opposed to completed chunks
81
     */
82
    public function setup()
83
    {
84
        parent::setup();
85
        $this->totalSteps = count($this->jobData->URLsToProcess);
86
    }
87
88
    public function getSignature()
89
    {
90
        return md5(implode('-', [static::class, implode('-', array_keys($this->URLsToProcess))]));
91
    }
92
93
    public function process()
94
    {
95
        $chunkSize = $this->getChunkSize();
96
        $count = 0;
97
        foreach ($this->jobData->URLsToProcess as $url => $priority) {
98
            $count += 1;
99
            if ($chunkSize > 0 && $count > $chunkSize) {
100
                break;
101
            }
102
103
            $this->processUrl($url, $priority);
104
        }
105
106
        $this->updateCompletedState();
107
    }
108
109
    /**
110
     * Generate and queue static cache related jobs from data
111
     *
112
     * manipulationCallback details
113
     * if you use custom callback be sure to return the array of jobs
114
     * example use cases:
115
     * - I want to override queue settings (change the queue type, schedule jobs for specific time...)
116
     * - I want to add some custom data to the jobs before the jobs are queued (common job identifier...)
117
     * - I don't want to queue the jobs, I only want the jobs to be returned (can be used for filtering)
118
     * <code>
119
     * GenerateStaticCacheJob::singleton()->queueJobsFromData($urls, '', null, null, function (array $jobs) {
120
     *     // do something here with the jobs
121
     *     return $jobs;
122
     * });
123
     * </code>
124
     *
125
     * @param array $urls URLs to be processed into jobs
126
     * @param string $message will be stored in job data and it's useful debug information
127
     * @param null|int $urlsPerJob number of URLs per job, defaults to Job specific configuration
128
     * @param null|string $jobClass job class used to create jobs, defaults ti current class
129
     * @param null|callable $manipulationCallback pass callback to handle job queue process, defaults to standard queue process
130
     * @return array|Job[]
131
     */
132
    public function queueJobsFromData(
133
        array $urls,
134
        $message = '',
135
        $urlsPerJob = null,
136
        $jobClass = null,
137
        $manipulationCallback = null
138
    ) {
139
        if (count($urls) === 0) {
140
            return [];
141
        }
142
143
        // remove duplicate URLs
144
        $urls = array_unique($urls);
145
146
        // fall back to current job class if we don't have an explicit value set
147
        if ($jobClass === null) {
148
            $jobClass = static::class;
149
        }
150
151
        // validate job class
152
        $job = singleton($jobClass);
153
        if (!($job instanceof Job)) {
154
            throw new ValidationException(
155
                sprintf('Invalid job class %s, expected instace of %s', get_class($job),Job::class)
156
            );
157
        }
158
159
        // fall back to current job urls_per_job if we don't have an explicit value set
160
        if ($urlsPerJob === null) {
161
            $urlsPerJob = $job->getUrlsPerJob();
162
        }
163
164
        // if no message is provided don't include it
165
        $message = (strlen($message) > 0) ? $message. ': ' : '';
166
167
        // batch URLs
168
        $batches = ($urlsPerJob > 0) ? array_chunk($urls, $urlsPerJob) : [$urls];
169
170
        $jobs = [];
171
        foreach ($batches as $urls) {
172
            // sanitise the URLS
173
            $urlService = Injector::inst()->create(URLSanitisationService::class);
174
            $urlService->addURLs($urls);
175
            $urls = $urlService->getURLs(true);
176
177
            // create job and populate it with data
178
            $job = Injector::inst()->create($jobClass);
179
            $jobData = new stdClass();
180
            $jobData->URLsToProcess = $urls;
181
182
            $job->setJobData(count($jobData->URLsToProcess), 0, false, $jobData, [
183
                $message . var_export(array_keys($jobData->URLsToProcess), true),
184
            ]);
185
186
            $jobs[] = $job;
187
        }
188
189
        if ($manipulationCallback !== null) {
190
            // custom queue process
191
            return $manipulationCallback($jobs);
192
        }
193
194
        // default queue process
195
        $service = QueuedJobService::singleton();
196
197
        foreach ($jobs as $job) {
198
            $service->queueJob($job);
199
        }
200
201
        return $jobs;
202
    }
203
204
    /**
205
     * Implement this method to process URL
206
     *
207
     * @param string $url
208
     * @param int $priority
209
     */
210
    abstract protected function processUrl($url, $priority);
211
212
    /**
213
     * Move URL to list of processed URLs and update job step to indicate progress
214
     * indication of progress is important for jobs which take long time to process
215
     * jobs that do not indicate progress may be identified as stalled by the queue
216
     * and may end up paused
217
     *
218
     * @param string $url
219
     */
220
    protected function markUrlAsProcessed($url)
221
    {
222
        $this->jobData->ProcessedURLs[$url] = $url;
223
        unset($this->jobData->URLsToProcess[$url]);
224
        $this->currentStep += 1;
225
    }
226
227
    /**
228
     * Check if job is complete and update the job state if needed
229
     */
230
    protected function updateCompletedState()
231
    {
232
        if (count($this->jobData->URLsToProcess) > 0) {
233
            return;
234
        }
235
236
        $this->isComplete = true;
237
    }
238
239
    /**
240
     * @return int
241
     */
242
    protected function getUrlsPerJob()
243
    {
244
        $urlsPerJob = (int) $this->config()->get('urls_per_job');
245
246
        return ($urlsPerJob > 0) ? $urlsPerJob : 0;
247
    }
248
249
    /**
250
     * @return int
251
     */
252
    protected function getChunkSize()
253
    {
254
        $chunkSize = (int) $this->config()->get('chunk_size');
255
256
        return ($chunkSize > 0) ? $chunkSize : 0;
257
    }
258
259
    /**
260
     * This function can be overridden to handle the case of failure of specific URL processing
261
     * such case is not handled by default which results in all such errors being effectively silenced
262
     *
263
     * @param string $url
264
     * @param array $meta
265
     */
266
    protected function handleFailedUrl($url, array $meta)
267
    {
268
        // no op
269
    }
270
}
271