Completed
Pull Request — master (#104)
by
unknown
12:59
created

Job::queueJobsFromData()   B

Complexity

Conditions 10
Paths 99

Size

Total Lines 70
Code Lines 30

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 10
eloc 30
nc 99
nop 5
dl 0
loc 70
rs 7.6666
c 0
b 0
f 0

How to fix   Long Method    Complexity   

Long Method

Small methods make your code easier to understand, in particular if combined with a good name. Besides, if your method is small, finding a good name is usually much easier.

For example, if you find yourself adding comments to a method's body, this is usually a good sign to extract the commented part to a new method, and use the comment as a starting point when coming up with a good name for this new method.

Commonly applied refactorings include:

1
<?php
2
3
namespace SilverStripe\StaticPublishQueue;
4
5
use SilverStripe\Core\Config\Configurable;
6
use SilverStripe\Core\Extensible;
7
use SilverStripe\Core\Injector\Injectable;
8
use SilverStripe\Core\Injector\Injector;
9
use SilverStripe\ORM\ValidationException;
10
use SilverStripe\StaticPublishQueue\Service\URLSanitisationService;
11
use stdClass;
12
use Symbiote\QueuedJobs\Services\AbstractQueuedJob;
13
use Symbiote\QueuedJobs\Services\QueuedJobService;
14
15
abstract class Job extends AbstractQueuedJob
16
{
17
    use Configurable;
18
    use Extensible;
19
    use Injectable;
20
21
    /**
22
     * Number of URLs per job allows you to split work into multiple smaller jobs instead of having one large job
23
     * this is useful if you're running a queue setup will parallel processing
24
     * if this number is too high you're limiting the parallel processing opportunity
25
     * if this number is too low you're using your resources inefficiently
26
     * as every job processing has a fixed overhead which adds up if there are too many jobs
27
     *
28
     * in case you project is complex and you are struggling to find the correct number
29
     * it's possible to move this value to a CMS setting and adjust as needed without the need of changing the code
30
     * use @see Job::getUrlsPerJob() to override the value lookup
31
     * you can subclass your jobs and implement your own getUrlsPerJob() method which will look into CMS setting
32
     *
33
     * batching capability can be disabled if urls per job is set to 0
34
     * in such case, all URLs will be put into one job
35
     *
36
     * @var int
37
     * @config
38
     */
39
    private static $urls_per_job = 0;
0 ignored issues
show
introduced by
The private property $urls_per_job is not used, and could be removed.
Loading history...
40
41
    /**
42
     * Number of URLs processed during one call of @see AbstractQueuedJob::process
43
     * this number should be set to a value which represents number of URLs which is reasonable to process in one go
44
     * this number will vary depending on project, more specifically it depends on:
45
     * - time to render your pages
46
     * - infrastructure
47
     *
48
     * if this number is too large jobs may experience performance / memory issues
49
     * if this number is too low the jobs will produce more overhead which may cause inefficiencies
50
     *
51
     * in case you project is complex and you are struggling to find the correct number
52
     * it's possible to move this value to a CMS setting and adjust as needed without the need of changing the code
53
     * use @see Job::getChunkSize() to override the value lookup
54
     * you can subclass your jobs and implement your own getChunkSize() method which will look into CMS setting
55
     *
56
     * chunking capability can be disabled if chunk size is set to 0
57
     * in such case, all URLs will be processed in one go
58
     *
59
     * @var int
60
     * @config
61
     */
62
    private static $chunk_size = 200;
0 ignored issues
show
introduced by
The private property $chunk_size is not used, and could be removed.
Loading history...
63
64
    /**
65
     * Set totalSteps to reflect how many URLs need to be processed
66
     * note that chunk size may change during runtime (if CMS setting override is used)
67
     * therefore it's much more accurate and useful to keep track of number of completed URLs
68
     * as opposed to completed chunks
69
     */
70
    public function setup()
71
    {
72
        parent::setup();
73
        $this->totalSteps = count($this->jobData->URLsToProcess);
74
    }
75
76
    public function getSignature()
77
    {
78
        return md5(implode('-', [static::class, implode('-', array_keys($this->URLsToProcess))]));
79
    }
80
81
    public function process()
82
    {
83
        $chunkSize = $this->getChunkSize();
84
        $count = 0;
85
        foreach ($this->jobData->URLsToProcess as $url => $priority) {
86
            $count += 1;
87
            if ($chunkSize > 0 && $count > $chunkSize) {
88
                break;
89
            }
90
91
            $this->processUrl($url, $priority);
92
        }
93
94
        $this->updateCompletedState();
95
    }
96
97
    /**
98
     * Generate and queue static cache related jobs from data
99
     *
100
     * manipulationCallback details
101
     * if you use custom callback be sure to return the array of jobs
102
     * example use cases:
103
     * - I want to override queue settings (change the queue type, schedule jobs for specific time...)
104
     * - I want to add some custom data to the jobs before the jobs are queued (common job identifier...)
105
     * - I don't want to queue the jobs, I only want the jobs to be returned (can be used for filtering)
106
     * <code>
107
     * GenerateStaticCacheJob::singleton()->queueJobsFromData($urls, '', null, null, function (array $jobs) {
108
     *     // do something here with the jobs
109
     *     return $jobs;
110
     * });
111
     * </code>
112
     *
113
     * @param array $urls URLs to be processed into jobs
114
     * @param string $message will be stored in job data and it's useful debug information
115
     * @param null|int $urlsPerJob number of URLs per job, defaults to Job specific configuration
116
     * @param null|string $jobClass job class used to create jobs, defaults ti current class
117
     * @param null|callable $manipulationCallback pass callback to handle job queue process, defaults to standard queue process
118
     * @return array|Job[]
119
     */
120
    public function queueJobsFromData(
121
        array $urls,
122
        $message = '',
123
        $urlsPerJob = null,
124
        $jobClass = null,
125
        $manipulationCallback = null
126
    ) {
127
        if (count($urls) === 0) {
128
            return [];
129
        }
130
131
        // remove duplicate URLs
132
        $urls = array_unique($urls);
133
134
        // fall back to current job class if we don't have an explicit value set
135
        if ($jobClass === null) {
136
            $jobClass = static::class;
137
        }
138
139
        // validate job class
140
        $job = singleton($jobClass);
141
        if (!($job instanceof Job)) {
142
            throw new ValidationException(
143
                sprintf('Invalid job class %s, expected instace of %s', get_class($job),Job::class)
144
            );
145
        }
146
147
        // fall back to current job urls_per_job if we don't have an explicit value set
148
        if ($urlsPerJob === null) {
149
            $urlsPerJob = $job->getUrlsPerJob();
150
        }
151
152
        // if no message is provided don't include it
153
        $message = (strlen($message) > 0) ? $message. ': ' : '';
154
155
        // batch URLs
156
        $batches = ($urlsPerJob > 0) ? array_chunk($urls, $urlsPerJob) : [$urls];
157
158
        $jobs = [];
159
        foreach ($batches as $urls) {
160
            // sanitise the URLS
161
            $urlService = Injector::inst()->create(URLSanitisationService::class);
162
            $urlService->addURLs($urls);
163
            $urls = $urlService->getURLs();
164
165
            // create job and populate it with data
166
            $job = Injector::inst()->create($jobClass);
167
            $jobData = new stdClass();
168
            $jobData->URLsToProcess = $urls;
169
170
            $job->setJobData(count($jobData->URLsToProcess), 0, false, $jobData, [
171
                $message . var_export(array_keys($jobData->URLsToProcess), true),
172
            ]);
173
174
            $jobs[] = $job;
175
        }
176
177
        if ($manipulationCallback !== null) {
178
            // custom queue process
179
            return $manipulationCallback($jobs);
180
        }
181
182
        // default queue process
183
        $service = QueuedJobService::singleton();
184
185
        foreach ($jobs as $job) {
186
            $service->queueJob($job);
187
        }
188
189
        return $jobs;
190
    }
191
192
    /**
193
     * Implement this method to process URL
194
     *
195
     * @param string $url
196
     * @param int $priority
197
     */
198
    abstract protected function processUrl($url, $priority);
199
200
    /**
201
     * Move URL to list of processed URLs and update job step to indicate progress
202
     * indication of progress is important for jobs which take long time to process
203
     * jobs that do not indicate progress may be identified as stalled by the queue
204
     * and may end up paused
205
     *
206
     * @param string $url
207
     */
208
    protected function markUrlAsProcessed($url)
209
    {
210
        $this->jobData->ProcessedURLs[$url] = $url;
211
        unset($this->jobData->URLsToProcess[$url]);
212
        $this->currentStep += 1;
213
    }
214
215
    /**
216
     * Check if job is complete and update the job state if needed
217
     */
218
    protected function updateCompletedState()
219
    {
220
        if (count($this->jobData->URLsToProcess) > 0) {
221
            return;
222
        }
223
224
        $this->isComplete = true;
225
    }
226
227
    /**
228
     * @return int
229
     */
230
    protected function getUrlsPerJob()
231
    {
232
        $urlsPerJob = (int) $this->config()->get('urls_per_job');
233
234
        return ($urlsPerJob > 0) ? $urlsPerJob : 0;
235
    }
236
237
    /**
238
     * @return int
239
     */
240
    protected function getChunkSize()
241
    {
242
        $chunkSize = (int) $this->config()->get('chunk_size');
243
244
        return ($chunkSize > 0) ? $chunkSize : 0;
245
    }
246
247
    /**
248
     * This function can be overridden to handle the case of failure of specific URL processing
249
     * such case is not handled by default which results in all such errors being effectively silenced
250
     *
251
     * @param string $url
252
     * @param array $meta
253
     */
254
    protected function handleFailedUrl($url, array $meta)
255
    {
256
        // no op
257
    }
258
}
259