Passed
Pull Request — master (#104)
by
unknown
02:00
created

Job::process()   A

Complexity

Conditions 4
Paths 3

Size

Total Lines 14
Code Lines 8

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 4
eloc 8
nc 3
nop 0
dl 0
loc 14
rs 10
c 0
b 0
f 0
1
<?php
2
3
namespace SilverStripe\StaticPublishQueue;
4
5
use SilverStripe\Core\Config\Configurable;
6
use SilverStripe\Core\Extensible;
7
use SilverStripe\Core\Injector\Injector;
8
use SilverStripe\ORM\ValidationException;
9
use SilverStripe\StaticPublishQueue\Service\URLSanitisationService;
10
use stdClass;
11
use Symbiote\QueuedJobs\Services\AbstractQueuedJob;
12
use Symbiote\QueuedJobs\Services\QueuedJobService;
13
14
abstract class Job extends AbstractQueuedJob
15
{
16
    use Configurable;
17
    use Extensible;
18
19
    /**
20
     * Number of URLs per job allows you to split work into multiple smaller jobs instead of having one large job
21
     * this is useful if you're running a queue setup will parallel processing
22
     * if this number is too high you're limiting the parallel processing opportunity
23
     * if this number is too low you're using your resources inefficiently
24
     * as every job processing has a fixed overhead which adds up if there are too many jobs
25
     *
26
     * in case you project is complex and you are struggling to find the correct number
27
     * it's possible to move this value to a CMS setting and adjust as needed without the need of changing the code
28
     * use @see Job::getUrlsPerJob() to override the value lookup
29
     * you can subclass your jobs and implement your own getUrlsPerJob() method which will look into CMS setting
30
     *
31
     * batching capability can be disabled if urls per job is set to 0
32
     * in such case, all URLs will be put into one job
33
     *
34
     * @var int
35
     * @config
36
     */
37
    private static $urls_per_job = 0;
0 ignored issues
show
introduced by
The private property $urls_per_job is not used, and could be removed.
Loading history...
38
39
    /**
40
     * Number of URLs processed during one call of @see AbstractQueuedJob::process
41
     * this number should be set to a value which represents number of URLs which is reasonable to process in one go
42
     * this number will vary depending on project, more specifically it depends on:
43
     * - time to render your pages
44
     * - infrastructure
45
     *
46
     * if this number is too large jobs may experience performance / memory issues
47
     * if this number is too low the jobs will produce more overhead which may cause inefficiencies
48
     *
49
     * in case you project is complex and you are struggling to find the correct number
50
     * it's possible to move this value to a CMS setting and adjust as needed without the need of changing the code
51
     * use @see Job::getChunkSize() to override the value lookup
52
     * you can subclass your jobs and implement your own getChunkSize() method which will look into CMS setting
53
     *
54
     * chunking capability can be disabled if chunk size is set to 0
55
     * in such case, all URLs will be processed in one go
56
     *
57
     * @var int
58
     * @config
59
     */
60
    private static $chunk_size = 200;
0 ignored issues
show
introduced by
The private property $chunk_size is not used, and could be removed.
Loading history...
61
62
    /**
63
     * Set totalSteps to reflect how many URLs need to be processed
64
     * note that chunk size may change during runtime (if CMS setting override is used)
65
     * therefore it's much more accurate and useful to keep track of number of completed URLs
66
     * as opposed to completed chunks
67
     */
68
    public function setup()
69
    {
70
        parent::setup();
71
        $this->totalSteps = count($this->jobData->URLsToProcess);
72
    }
73
74
    public function getSignature()
75
    {
76
        return md5(implode('-', [static::class, implode('-', array_keys($this->URLsToProcess))]));
77
    }
78
79
    public function process()
80
    {
81
        $chunkSize = $this->getChunkSize();
82
        $count = 0;
83
        foreach ($this->jobData->URLsToProcess as $url => $priority) {
84
            $count += 1;
85
            if ($chunkSize > 0 && $count > $chunkSize) {
86
                break;
87
            }
88
89
            $this->processUrl($url, $priority);
90
        }
91
92
        $this->updateCompletedState();
93
    }
94
95
    /**
96
     * Generate and queue static cache related jobs from data
97
     *
98
     * manipulationCallback details
99
     * if you use custom callback be sure to return the array of jobs
100
     * example use cases:
101
     * - I want to override queue settings (change the queue type, schedule jobs for specific time...)
102
     * - I want to add some custom data to the jobs before the jobs are queued (common job identifier...)
103
     * - I don't want to queue the jobs, I only want the jobs to be returned (can be used for filtering)
104
     * <code>
105
     * GenerateStaticCacheJob::queueJobsFromData($urls, '', null, null, function (array $jobs) {
106
     *     // do something here with the jobs
107
     *     return $jobs;
108
     * });
109
     * </code>
110
     *
111
     * @param array $urls URLs to be processed into jobs
112
     * @param string $message will be stored in job data and it's useful debug information
113
     * @param null|int $urlsPerJob number of URLs per job, defaults to Job specific configuration
114
     * @param null|string $jobClass job class used to create jobs, defaults ti current class
115
     * @param null|callable $manipulationCallback pass callback to handle job queue process, defaults to standard queue process
116
     * @return array|Job[]
117
     */
118
    public static function queueJobsFromData(
119
        array $urls,
120
        $message = '',
121
        $urlsPerJob = null,
122
        $jobClass = null,
123
        $manipulationCallback = null
124
    ) {
125
        if (count($urls) === 0) {
126
            return [];
127
        }
128
129
        // remove duplicate URLs
130
        $urls = array_unique($urls);
131
132
        // fall back to current job class if we don't have an explicit value set
133
        if ($jobClass === null) {
134
            $jobClass = static::class;
135
        }
136
137
        // validate job class
138
        $job = singleton($jobClass);
139
        if (!($job instanceof Job)) {
140
            throw new ValidationException(
141
                sprintf('Invalid job class %s, expected instace of %s', get_class($job),Job::class)
142
            );
143
        }
144
145
        // fall back to current job urls_per_job if we don't have an explicit value set
146
        if ($urlsPerJob === null) {
147
            $urlsPerJob = $job->getUrlsPerJob();
148
        }
149
150
        // if no message is provided don't include it
151
        $message = (strlen($message) > 0) ? $message. ': ' : '';
152
153
        // batch URLs
154
        $batches = ($urlsPerJob > 0) ? array_chunk($urls, $urlsPerJob) : [$urls];
155
156
        $jobs = [];
157
        foreach ($batches as $urls) {
158
            // sanitise the URLS
159
            $urlService = Injector::inst()->create(URLSanitisationService::class);
160
            $urlService->addURLs($urls);
161
            $urls = $urlService->getURLs();
162
163
            // create job and populate it with data
164
            $job = Injector::inst()->create($jobClass);
165
            $jobData = new stdClass();
166
            $jobData->URLsToProcess = $urls;
167
168
            $job->setJobData(count($jobData->URLsToProcess), 0, false, $jobData, [
169
                $message . var_export(array_keys($jobData->URLsToProcess), true),
170
            ]);
171
172
            $jobs[] = $job;
173
        }
174
175
        if ($manipulationCallback !== null) {
176
            // custom queue process
177
            return $manipulationCallback($jobs);
178
        }
179
180
        // default queue process
181
        $service = QueuedJobService::singleton();
182
183
        foreach ($jobs as $job) {
184
            $service->queueJob($job);
185
        }
186
187
        return $jobs;
188
    }
189
190
    /**
191
     * Implement this method to process URL
192
     *
193
     * @param string $url
194
     * @param int $priority
195
     */
196
    abstract protected function processUrl($url, $priority);
197
198
    /**
199
     * Move URL to list of processed URLs and update job step to indicate progress
200
     * indication of progress is important for jobs which take long time to process
201
     * jobs that do not indicate progress may be identified as stalled by the queue
202
     * and may end up paused
203
     *
204
     * @param string $url
205
     */
206
    protected function markUrlAsProcessed($url)
207
    {
208
        $this->jobData->ProcessedURLs[$url] = $url;
209
        unset($this->jobData->URLsToProcess[$url]);
210
        $this->currentStep += 1;
211
    }
212
213
    /**
214
     * Check if job is complete and update the job state if needed
215
     */
216
    protected function updateCompletedState()
217
    {
218
        if (count($this->jobData->URLsToProcess) > 0) {
219
            return;
220
        }
221
222
        $this->isComplete = true;
223
    }
224
225
    /**
226
     * @return int
227
     */
228
    protected function getUrlsPerJob()
229
    {
230
        $urlsPerJob = (int) $this->config()->get('urls_per_job');
231
232
        return ($urlsPerJob > 0) ? $urlsPerJob : 0;
233
    }
234
235
    /**
236
     * @return int
237
     */
238
    protected function getChunkSize()
239
    {
240
        $chunkSize = (int) $this->config()->get('chunk_size');
241
242
        return ($chunkSize > 0) ? $chunkSize : 0;
243
    }
244
245
    /**
246
     * This function can be overridden to handle the case of failure of specific URL processing
247
     * such case is not handled by default which results in all such errors being effectively silenced
248
     *
249
     * @param string $url
250
     * @param array $meta
251
     */
252
    protected function handleFailedUrl($url, array $meta)
253
    {
254
        // no op
255
    }
256
}
257