1 | <?php |
||
2 | |||
3 | namespace SilverStripe\StaticPublishQueue; |
||
4 | |||
5 | use SilverStripe\Core\Config\Configurable; |
||
6 | use SilverStripe\Core\Extensible; |
||
7 | use Symbiote\QueuedJobs\Services\AbstractQueuedJob; |
||
8 | |||
9 | /** |
||
10 | * Class Job |
||
11 | * |
||
12 | * @property array $URLsToProcess |
||
13 | * @property array $ProcessedURLs |
||
14 | * @package SilverStripe\StaticPublishQueue |
||
15 | */ |
||
16 | abstract class Job extends AbstractQueuedJob |
||
17 | { |
||
18 | use Configurable; |
||
19 | use Extensible; |
||
20 | |||
21 | /** |
||
22 | * Number of URLs processed during one call of @see AbstractQueuedJob::process() |
||
23 | * This number should be set to a value which represents number of URLs which is reasonable to process in one go |
||
24 | * This number will vary depending on project, more specifically it depends on: |
||
25 | * - time to render your pages |
||
26 | * - infrastructure |
||
27 | * |
||
28 | * If this number is too large jobs may experience performance / memory issues |
||
29 | * If this number is too low the jobs will produce more overhead which may cause inefficiencies |
||
30 | * |
||
31 | * In case your project is complex and you are struggling to find the correct number |
||
32 | * It's possible to move this value to a CMS setting and adjust as needed without the need of changing the code |
||
33 | * Use @see Job::getChunkSize() to override the value lookup |
||
34 | * You can subclass your jobs and implement your own getChunkSize() method which will look into CMS setting |
||
35 | * |
||
36 | * Chunking capability can be disabled if chunk size is set to 0 |
||
37 | * In such case, all URLs will be processed in one go |
||
38 | * |
||
39 | * @var int |
||
40 | * @config |
||
41 | */ |
||
42 | private static $chunk_size = 200; |
||
0 ignored issues
–
show
introduced
by
![]() |
|||
43 | |||
44 | /** |
||
45 | * Number of URLs per job allows you to split work into multiple smaller jobs instead of having one large job |
||
46 | * This is useful if you're running a queue setup will parallel processing or if you have too many URLs in general |
||
47 | * If this number is too high you're limiting the parallel processing opportunity |
||
48 | * If this number is too low you're using your resources inefficiently |
||
49 | * as every job processing has a fixed overhead which adds up if there are too many jobs |
||
50 | * |
||
51 | * In case your project is complex, and you are struggling to find the correct number |
||
52 | * it's possible to move this value to a CMS setting and adjust as needed without the need to change the code |
||
53 | * Use @see Job::getUrlsPerJob() to override the value lookup |
||
54 | * You can subclass your jobs and implement your own getUrlsPerJob() method which can read from the CMS setting |
||
55 | * |
||
56 | * Batching capability can be disabled if urls per job is set to 0 |
||
57 | * In this case, all URLs will be processed in a single job |
||
58 | * |
||
59 | * @var int |
||
60 | * @config |
||
61 | */ |
||
62 | private static $urls_per_job = 0; |
||
0 ignored issues
–
show
|
|||
63 | |||
64 | /** |
||
65 | * Use this method to populate newly created job with data |
||
66 | * |
||
67 | * @param array $urls |
||
68 | * @param string|null $message |
||
69 | */ |
||
70 | public function hydrate(array $urls, ?string $message): void |
||
71 | { |
||
72 | $this->URLsToProcess = $urls; |
||
73 | |||
74 | if (!$message) { |
||
75 | return; |
||
76 | } |
||
77 | |||
78 | $this->messages = [ |
||
79 | sprintf('%s: %s', $message, var_export(array_keys($urls), true)), |
||
80 | ]; |
||
81 | } |
||
82 | |||
83 | /** |
||
84 | * Static cache manipulation jobs need to run without a user |
||
85 | * this is because we don't want any session related data to become part of URLs |
||
86 | * For example stage GET param is injected into URLs when user is logged in |
||
87 | * This is problematic as stage param must not be present in statically published URLs |
||
88 | * as they always refer to live content |
||
89 | * Including stage param in visiting URL is meant to bypass static cache and redirect to admin login |
||
90 | * this is something we definitely don't want for statically cached pages |
||
91 | * |
||
92 | * @return int|null |
||
93 | */ |
||
94 | public function getRunAsMemberID(): ?int |
||
95 | { |
||
96 | return 0; |
||
97 | } |
||
98 | |||
99 | public function setup(): void |
||
100 | { |
||
101 | parent::setup(); |
||
102 | $this->totalSteps = count($this->URLsToProcess); |
||
103 | } |
||
104 | |||
105 | public function getSignature(): string |
||
106 | { |
||
107 | return md5(implode('-', [static::class, implode('-', array_keys($this->URLsToProcess))])); |
||
108 | } |
||
109 | |||
110 | public function process(): void |
||
111 | { |
||
112 | $chunkSize = $this->getChunkSize(); |
||
113 | $count = 0; |
||
114 | |||
115 | foreach ($this->URLsToProcess as $url => $priority) { |
||
116 | $count += 1; |
||
117 | |||
118 | if ($chunkSize > 0 && $count > $chunkSize) { |
||
119 | return; |
||
120 | } |
||
121 | |||
122 | $this->processUrl($url, $priority); |
||
123 | } |
||
124 | |||
125 | $this->updateCompletedState(); |
||
126 | } |
||
127 | |||
128 | /** |
||
129 | * @return int |
||
130 | */ |
||
131 | public function getUrlsPerJob(): int |
||
132 | { |
||
133 | $urlsPerJob = (int) $this->config()->get('urls_per_job'); |
||
134 | |||
135 | return ($urlsPerJob > 0) ? $urlsPerJob : 0; |
||
136 | } |
||
137 | |||
138 | /** |
||
139 | * Implement this method to process URL |
||
140 | * |
||
141 | * @param string $url |
||
142 | * @param int $priority |
||
143 | */ |
||
144 | abstract protected function processUrl(string $url, int $priority): void; |
||
145 | |||
146 | /** |
||
147 | * Move URL to list of processed URLs and update job step to indicate progress |
||
148 | * indication of progress is important for jobs which take long time to process |
||
149 | * jobs that do not indicate progress may be identified as stalled by the queue |
||
150 | * and may end up paused |
||
151 | * |
||
152 | * @param string $url |
||
153 | */ |
||
154 | protected function markUrlAsProcessed(string $url): void |
||
155 | { |
||
156 | // These operation has to be done directly on the job data properties |
||
157 | // as the magic methods won't cover array access write |
||
158 | $this->jobData->ProcessedURLs[$url] = $url; |
||
159 | unset($this->jobData->URLsToProcess[$url]); |
||
160 | $this->currentStep += 1; |
||
161 | } |
||
162 | |||
163 | /** |
||
164 | * Check if job is complete and update the job state if needed |
||
165 | */ |
||
166 | protected function updateCompletedState(): void |
||
167 | { |
||
168 | if (count($this->URLsToProcess) > 0) { |
||
169 | return; |
||
170 | } |
||
171 | |||
172 | $this->isComplete = true; |
||
173 | } |
||
174 | |||
175 | /** |
||
176 | * @return int |
||
177 | */ |
||
178 | protected function getChunkSize(): int |
||
179 | { |
||
180 | $chunkSize = (int) $this->config()->get('chunk_size'); |
||
181 | |||
182 | return $chunkSize > 0 ? $chunkSize : 0; |
||
183 | } |
||
184 | |||
185 | /** |
||
186 | * This function can be overridden to handle the case of failure of specific URL processing |
||
187 | * such case is not handled by default which results in all such errors being effectively silenced |
||
188 | * |
||
189 | * @param string $url |
||
190 | * @param array $meta |
||
191 | */ |
||
192 | protected function handleFailedUrl(string $url, array $meta) |
||
193 | { |
||
194 | // no op - override this on your job classes if needed |
||
195 | } |
||
196 | } |
||
197 |