silverstripe /
silverstripe-staticpublishqueue
| 1 | <?php |
||
| 2 | |||
| 3 | namespace SilverStripe\StaticPublishQueue; |
||
| 4 | |||
| 5 | use SilverStripe\Core\Config\Configurable; |
||
| 6 | use SilverStripe\Core\Extensible; |
||
| 7 | use Symbiote\QueuedJobs\Services\AbstractQueuedJob; |
||
| 8 | |||
| 9 | /** |
||
| 10 | * Class Job |
||
| 11 | * |
||
| 12 | * @property array $URLsToProcess |
||
| 13 | * @property array $ProcessedURLs |
||
| 14 | * @package SilverStripe\StaticPublishQueue |
||
| 15 | */ |
||
| 16 | abstract class Job extends AbstractQueuedJob |
||
| 17 | { |
||
| 18 | use Configurable; |
||
| 19 | use Extensible; |
||
| 20 | |||
| 21 | /** |
||
| 22 | * Number of URLs processed during one call of @see AbstractQueuedJob::process() |
||
| 23 | * This number should be set to a value which represents number of URLs which is reasonable to process in one go |
||
| 24 | * This number will vary depending on project, more specifically it depends on: |
||
| 25 | * - time to render your pages |
||
| 26 | * - infrastructure |
||
| 27 | * |
||
| 28 | * If this number is too large jobs may experience performance / memory issues |
||
| 29 | * If this number is too low the jobs will produce more overhead which may cause inefficiencies |
||
| 30 | * |
||
| 31 | * In case your project is complex and you are struggling to find the correct number |
||
| 32 | * It's possible to move this value to a CMS setting and adjust as needed without the need of changing the code |
||
| 33 | * Use @see Job::getChunkSize() to override the value lookup |
||
| 34 | * You can subclass your jobs and implement your own getChunkSize() method which will look into CMS setting |
||
| 35 | * |
||
| 36 | * Chunking capability can be disabled if chunk size is set to 0 |
||
| 37 | * In such case, all URLs will be processed in one go |
||
| 38 | * |
||
| 39 | * @var int |
||
| 40 | * @config |
||
| 41 | */ |
||
| 42 | private static $chunk_size = 200; |
||
| 43 | |||
| 44 | /** |
||
| 45 | * Number of URLs per job allows you to split work into multiple smaller jobs instead of having one large job |
||
| 46 | * This is useful if you're running a queue setup will parallel processing or if you have too many URLs in general |
||
| 47 | * If this number is too high you're limiting the parallel processing opportunity |
||
| 48 | * If this number is too low you're using your resources inefficiently |
||
| 49 | * as every job processing has a fixed overhead which adds up if there are too many jobs |
||
| 50 | * |
||
| 51 | * In case your project is complex, and you are struggling to find the correct number |
||
| 52 | * it's possible to move this value to a CMS setting and adjust as needed without the need to change the code |
||
| 53 | * Use @see Job::getUrlsPerJob() to override the value lookup |
||
| 54 | * You can subclass your jobs and implement your own getUrlsPerJob() method which can read from the CMS setting |
||
| 55 | * |
||
| 56 | * Batching capability can be disabled if urls per job is set to 0 |
||
| 57 | * In this case, all URLs will be processed in a single job |
||
| 58 | * |
||
| 59 | * @var int |
||
| 60 | * @config |
||
| 61 | */ |
||
| 62 | private static $urls_per_job = 0; |
||
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||
| 63 | |||
| 64 | /** |
||
| 65 | * Use this method to populate newly created job with data |
||
| 66 | * |
||
| 67 | * @param array $urls |
||
| 68 | * @param string|null $message |
||
| 69 | */ |
||
| 70 | public function hydrate(array $urls, ?string $message): void |
||
| 71 | { |
||
| 72 | $this->URLsToProcess = $urls; |
||
| 73 | |||
| 74 | if (!$message) { |
||
| 75 | return; |
||
| 76 | } |
||
| 77 | |||
| 78 | $this->messages = [ |
||
| 79 | sprintf('%s: %s', $message, var_export(array_keys($urls), true)), |
||
| 80 | ]; |
||
| 81 | } |
||
| 82 | |||
| 83 | /** |
||
| 84 | * Static cache manipulation jobs need to run without a user |
||
| 85 | * this is because we don't want any session related data to become part of URLs |
||
| 86 | * For example stage GET param is injected into URLs when user is logged in |
||
| 87 | * This is problematic as stage param must not be present in statically published URLs |
||
| 88 | * as they always refer to live content |
||
| 89 | * Including stage param in visiting URL is meant to bypass static cache and redirect to admin login |
||
| 90 | * this is something we definitely don't want for statically cached pages |
||
| 91 | * |
||
| 92 | * @return int|null |
||
| 93 | */ |
||
| 94 | public function getRunAsMemberID(): ?int |
||
| 95 | { |
||
| 96 | return 0; |
||
| 97 | } |
||
| 98 | |||
| 99 | public function setup(): void |
||
| 100 | { |
||
| 101 | parent::setup(); |
||
| 102 | $this->totalSteps = count($this->URLsToProcess); |
||
| 103 | } |
||
| 104 | |||
| 105 | public function getSignature(): string |
||
| 106 | { |
||
| 107 | return md5(implode('-', [static::class, implode('-', array_keys($this->URLsToProcess))])); |
||
| 108 | } |
||
| 109 | |||
| 110 | public function process(): void |
||
| 111 | { |
||
| 112 | $chunkSize = $this->getChunkSize(); |
||
| 113 | $count = 0; |
||
| 114 | |||
| 115 | foreach ($this->URLsToProcess as $url => $priority) { |
||
| 116 | $count += 1; |
||
| 117 | |||
| 118 | if ($chunkSize > 0 && $count > $chunkSize) { |
||
| 119 | return; |
||
| 120 | } |
||
| 121 | |||
| 122 | $this->processUrl($url, $priority); |
||
| 123 | } |
||
| 124 | |||
| 125 | $this->updateCompletedState(); |
||
| 126 | } |
||
| 127 | |||
| 128 | /** |
||
| 129 | * @return int |
||
| 130 | */ |
||
| 131 | public function getUrlsPerJob(): int |
||
| 132 | { |
||
| 133 | $urlsPerJob = (int) $this->config()->get('urls_per_job'); |
||
| 134 | |||
| 135 | return ($urlsPerJob > 0) ? $urlsPerJob : 0; |
||
| 136 | } |
||
| 137 | |||
| 138 | /** |
||
| 139 | * Implement this method to process URL |
||
| 140 | * |
||
| 141 | * @param string $url |
||
| 142 | * @param int $priority |
||
| 143 | */ |
||
| 144 | abstract protected function processUrl(string $url, int $priority): void; |
||
| 145 | |||
| 146 | /** |
||
| 147 | * Move URL to list of processed URLs and update job step to indicate progress |
||
| 148 | * indication of progress is important for jobs which take long time to process |
||
| 149 | * jobs that do not indicate progress may be identified as stalled by the queue |
||
| 150 | * and may end up paused |
||
| 151 | * |
||
| 152 | * @param string $url |
||
| 153 | */ |
||
| 154 | protected function markUrlAsProcessed(string $url): void |
||
| 155 | { |
||
| 156 | // These operation has to be done directly on the job data properties |
||
| 157 | // as the magic methods won't cover array access write |
||
| 158 | $this->jobData->ProcessedURLs[$url] = $url; |
||
| 159 | unset($this->jobData->URLsToProcess[$url]); |
||
| 160 | $this->currentStep += 1; |
||
| 161 | } |
||
| 162 | |||
| 163 | /** |
||
| 164 | * Check if job is complete and update the job state if needed |
||
| 165 | */ |
||
| 166 | protected function updateCompletedState(): void |
||
| 167 | { |
||
| 168 | if (count($this->URLsToProcess) > 0) { |
||
| 169 | return; |
||
| 170 | } |
||
| 171 | |||
| 172 | $this->isComplete = true; |
||
| 173 | } |
||
| 174 | |||
| 175 | /** |
||
| 176 | * @return int |
||
| 177 | */ |
||
| 178 | protected function getChunkSize(): int |
||
| 179 | { |
||
| 180 | $chunkSize = (int) $this->config()->get('chunk_size'); |
||
| 181 | |||
| 182 | return $chunkSize > 0 ? $chunkSize : 0; |
||
| 183 | } |
||
| 184 | |||
| 185 | /** |
||
| 186 | * This function can be overridden to handle the case of failure of specific URL processing |
||
| 187 | * such case is not handled by default which results in all such errors being effectively silenced |
||
| 188 | * |
||
| 189 | * @param string $url |
||
| 190 | * @param array $meta |
||
| 191 | */ |
||
| 192 | protected function handleFailedUrl(string $url, array $meta) |
||
| 193 | { |
||
| 194 | // no op - override this on your job classes if needed |
||
| 195 | } |
||
| 196 | } |
||
| 197 |