1 | <?php |
||||||
2 | |||||||
3 | namespace PhpTek\Exodus\Model; |
||||||
4 | |||||||
5 | use ExternalContentSource; |
||||||
6 | use PhpTek\Exodus\Transform\StaticSiteImporter; |
||||||
7 | use PhpTek\Exodus\Tool\StaticSiteUtils; |
||||||
8 | use PhpTek\Exodus\Tool\StaticSiteUrlList; |
||||||
9 | use PhpTek\Exodus\Tool\StaticSiteMimeProcessor; |
||||||
10 | use PhpTek\Exodus\Tool\StaticSiteUrlProcessor; |
||||||
11 | use SilverStripe\Forms\HeaderField; |
||||||
12 | use SilverStripe\Core\ClassInfo; |
||||||
13 | use SilverStripe\ORM\DataObject; |
||||||
14 | use SilverStripe\ORM\ArrayList; |
||||||
15 | use SilverStripe\Forms\FormAction; |
||||||
16 | use SilverStripe\Forms\ReadonlyField; |
||||||
17 | use SilverStripe\Forms\LiteralField; |
||||||
18 | use SilverStripe\Forms\CheckboxField; |
||||||
19 | use SilverStripe\Forms\OptionsetField; |
||||||
20 | use SilverStripe\Forms\ListboxField; |
||||||
21 | use SilverStripe\Forms\GridField\GridFieldAddNewButton; |
||||||
22 | use SilverStripe\Assets\File; |
||||||
23 | use SilverStripe\CMS\Model\SiteTree; |
||||||
24 | use SilverStripe\Forms\DropdownField; |
||||||
25 | use SilverStripe\ORM\FieldType\DBText; |
||||||
26 | use SilverStripe\ORM\FieldType\DBVarchar; |
||||||
27 | use SilverStripe\Forms\ToggleCompositeField; |
||||||
28 | use SilverStripe\ORM\FieldType\DBField; |
||||||
29 | use SilverStripe\Forms\TextField; |
||||||
30 | use SilverStripe\ORM\FieldType\DBBoolean; |
||||||
31 | use SilverStripe\ORM\FieldType\DBDatetime; |
||||||
32 | |||||||
33 | // We do this or PHP8+ complains about the ageing phpcrawl lib |
||||||
34 | ini_set('error_reporting', 'E_ALL & ~E_DEPRECATED'); |
||||||
35 | |||||||
36 | /** |
||||||
37 | * Define the overarching content-sources, schemas etc. Probably better named a "Migration Profile". |
||||||
38 | * |
||||||
39 | * @package phptek/silverstripe-exodus |
||||||
40 | * @author Sam Minee <[email protected]> |
||||||
41 | * @author Russell Michell <[email protected]> |
||||||
42 | */ |
||||||
43 | class StaticSiteContentSource extends ExternalContentSource |
||||||
44 | { |
||||||
45 | /** |
||||||
46 | * @var string |
||||||
47 | */ |
||||||
48 | public const CACHE_DIR_PREFIX = 'static-site-0'; // Default (The zero-suffix is used by test-suite) |
||||||
49 | |||||||
50 | /** |
||||||
51 | * @var string |
||||||
52 | */ |
||||||
53 | private static $table_name = 'StaticSiteContentSource'; |
||||||
0 ignored issues
–
show
introduced
by
![]() |
|||||||
54 | |||||||
55 | /** |
||||||
56 | * @var config |
||||||
0 ignored issues
–
show
The type
PhpTek\Exodus\Model\config was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths ![]() |
|||||||
57 | */ |
||||||
58 | private static $singular_name = 'Migration Profile'; |
||||||
0 ignored issues
–
show
|
|||||||
59 | |||||||
60 | /** |
||||||
61 | * @var config |
||||||
62 | */ |
||||||
63 | private static $plural_name = 'Migration Profiles'; |
||||||
0 ignored issues
–
show
|
|||||||
64 | |||||||
65 | /** |
||||||
66 | * |
||||||
67 | * @var array |
||||||
68 | */ |
||||||
69 | private static $db = [ |
||||||
0 ignored issues
–
show
|
|||||||
70 | 'BaseUrl' => DBVarchar::class, |
||||||
71 | 'UrlProcessor' => DBVarchar::class, |
||||||
72 | 'ExtraCrawlUrls' => DBText::class, |
||||||
73 | 'UrlExcludePatterns' => DBText::class, |
||||||
74 | 'ParseCSS' => DBBoolean::class, |
||||||
75 | 'AutoRunTask' => DBBoolean::class, |
||||||
76 | ]; |
||||||
77 | |||||||
78 | /** |
||||||
79 | * |
||||||
80 | * @var array |
||||||
81 | */ |
||||||
82 | private static $has_many = [ |
||||||
0 ignored issues
–
show
|
|||||||
83 | "Schemas" => StaticSiteContentSourceImportSchema::class, |
||||||
84 | "Pages" => SiteTree::class, |
||||||
85 | "Files" => File::class, |
||||||
86 | ]; |
||||||
87 | |||||||
88 | /** |
||||||
89 | * |
||||||
90 | * @var array |
||||||
91 | */ |
||||||
92 | private static $export_columns = [ |
||||||
0 ignored issues
–
show
|
|||||||
93 | "StaticSiteContentSourceImportSchema.DataType", |
||||||
94 | "StaticSiteContentSourceImportSchema.Order", |
||||||
95 | "StaticSiteContentSourceImportSchema.AppliesTo", |
||||||
96 | "StaticSiteContentSourceImportSchema.MimeTypes", |
||||||
97 | ]; |
||||||
98 | |||||||
99 | /** |
||||||
100 | * |
||||||
101 | * @var string |
||||||
102 | */ |
||||||
103 | public $absoluteURL = null; |
||||||
104 | |||||||
105 | /** |
||||||
106 | * Where do we store our items for caching? |
||||||
107 | * Also used by calling logic |
||||||
108 | * |
||||||
109 | * @var string |
||||||
110 | */ |
||||||
111 | public $cacheDir = null; |
||||||
112 | |||||||
113 | /** |
||||||
114 | * Holds the StaticSiteUtils object on construct |
||||||
115 | * |
||||||
116 | * @var StaticSiteUtils $utils |
||||||
117 | */ |
||||||
118 | protected $utils; |
||||||
119 | |||||||
120 | /** |
||||||
121 | * |
||||||
122 | * @param array|null $record This will be null for a new database record. |
||||||
123 | * @param bool $isSingleton |
||||||
124 | * @param DataModel $model |
||||||
0 ignored issues
–
show
The type
PhpTek\Exodus\Model\DataModel was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths ![]() |
|||||||
125 | * @return void |
||||||
126 | */ |
||||||
127 | public function __construct($record = null, $isSingleton = false, $model = null) |
||||||
128 | { |
||||||
129 | parent::__construct($record, $isSingleton, $model); |
||||||
0 ignored issues
–
show
It seems like
$model can also be of type PhpTek\Exodus\Model\DataModel ; however, parameter $queryParams of SilverStripe\ORM\DataObject::__construct() does only seem to accept array , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
130 | $this->cacheDir = preg_replace('#[0-9]+$#', $this->ID, self::CACHE_DIR_PREFIX); |
||||||
131 | $this->utils = singleton(StaticSiteUtils::class); |
||||||
132 | } |
||||||
133 | |||||||
134 | /** |
||||||
135 | * Template method used to display the results of a successful crawl into the central |
||||||
136 | * column of the CMS. |
||||||
137 | * |
||||||
138 | * @return string |
||||||
139 | */ |
||||||
140 | public function listofCrawledItems(): string |
||||||
141 | { |
||||||
142 | $list = $this->urlList(); |
||||||
143 | $ulist = ''; |
||||||
144 | |||||||
145 | if ($list->getSpiderStatus() !== StaticSiteUrlList::CRAWL_STATUS_COMPLETE) { |
||||||
146 | return ''; |
||||||
147 | } |
||||||
148 | |||||||
149 | foreach (array_unique($list->getProcessedURLs()) as $raw => $processed) { |
||||||
150 | if ($raw != $processed) { |
||||||
151 | $ulist .= '<li>' . sprintf('%s (was: %s)', $processed, $raw) . '</li>'; |
||||||
152 | } else { |
||||||
153 | $ulist .= '<li>' . $processed . '</li>'; |
||||||
154 | } |
||||||
155 | } |
||||||
156 | |||||||
157 | return '<ul>' . $ulist . '</ul>'; |
||||||
158 | } |
||||||
159 | |||||||
160 | /** |
||||||
161 | * |
||||||
162 | * @return FieldList |
||||||
0 ignored issues
–
show
The type
PhpTek\Exodus\Model\FieldList was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths ![]() |
|||||||
163 | * @throws LogicException |
||||||
164 | */ |
||||||
165 | public function getCMSFields() |
||||||
166 | { |
||||||
167 | $fields = parent::getCMSFields(); |
||||||
168 | |||||||
169 | $fields->removeFieldsFromTab('Root', [ |
||||||
170 | 'Pages', |
||||||
171 | 'Files', |
||||||
172 | 'ShowContentInMenu', |
||||||
173 | 'Name' |
||||||
174 | ]); |
||||||
175 | |||||||
176 | // Because we can't pass arrays to FieldList::insertBefore |
||||||
177 | foreach ( |
||||||
178 | [ |
||||||
179 | HeaderField::create('ProfileHeading', 'Migration Profile Configuration'), |
||||||
180 | LiteralField::create('ProfileIntro', '' |
||||||
181 | . '<p class="message notice">' |
||||||
182 | . 'This where the basics of your migration profile are configured.' |
||||||
183 | . '</p>')] as $introField |
||||||
184 | ) { |
||||||
185 | $fields->insertBefore('BaseUrl', $introField); |
||||||
186 | } |
||||||
187 | |||||||
188 | // Processing Options |
||||||
189 | $processingOptions = ['' => "No Processing"]; |
||||||
190 | |||||||
191 | foreach (ClassInfo::implementorsOf(StaticSiteUrlProcessor::class) as $processor) { |
||||||
192 | $processorObj = singleton($processor); |
||||||
193 | $processingOptions[$processor] = $processorObj->getName(); |
||||||
194 | } |
||||||
195 | |||||||
196 | $fields->addFieldsToTab( |
||||||
197 | 'Root.Main', |
||||||
198 | [ |
||||||
199 | TextField::create("BaseUrl", "Base URL") |
||||||
200 | ->setDescription('The base URL of the site to be crawled and imported.'), |
||||||
201 | DropdownField::create("UrlProcessor", "URL Transformation", $processingOptions) |
||||||
202 | ->setDescription('Select the way in which crawled URLs should be transformed and cleaned-up.'), |
||||||
203 | CheckboxField::create("ParseCSS", "Fetch external CSS") |
||||||
204 | ->setDescription("Fetch images defined as CSS <strong>background-image</strong> which are not ordinarily reachable by crawling alone."), |
||||||
205 | CheckboxField::create("AutoRunTask", "Automatically rewrite links into Silverstripe-aware links") |
||||||
206 | ->setDescription("This will run a link-rewrite task automatically once an import has completed.") |
||||||
207 | ] |
||||||
208 | ); |
||||||
209 | $fields->fieldByName('Root.Main')->setTitle('Profile'); |
||||||
210 | $fields->insertBefore('BaseUrl', TextField::create('Name', 'Name') |
||||||
211 | ->setDescription('Allows you to differentiate between profiles.')); |
||||||
212 | |||||||
213 | // Schema Gridfield |
||||||
214 | $fields->addFieldToTab('Root.Main', HeaderField::create('ImportConfigHeader', 'Import Schema Configuration')); |
||||||
215 | $addNewButton = (new GridFieldAddNewButton('before'))->setButtonName("Add Schema"); |
||||||
216 | $importRules = $fields->dataFieldByName('Schemas'); |
||||||
217 | $importRules->getConfig()->removeComponentsByType(GridFieldAddNewButton::class); |
||||||
218 | $importRules->getConfig()->addComponent($addNewButton); |
||||||
219 | $fields->removeFieldFromTab("Root", "Schemas"); |
||||||
220 | $fields->addFieldToTab('Root.Main', LiteralField::create( |
||||||
221 | 'SchemaIntro', |
||||||
222 | '' |
||||||
223 | . '<p class="message notice">Schema map MIME-Types to Silverstripe content classes and' |
||||||
224 | . ' are related to one or more Import Rules. Each rule determines how content located at crawled URLs' |
||||||
225 | . ' should be imported into a content classes\' fields with the use of CSS selectors.' |
||||||
226 | . ' Where more than one schema exists for a field, they\'ll be processed in the order of Priority:' |
||||||
227 | . ' The first Schema to match a URI Pattern will be the one used for that field.</p>' |
||||||
228 | )); |
||||||
229 | $fields->addFieldToTab("Root.Main", $importRules); |
||||||
230 | |||||||
231 | switch ($this->urlList()->getSpiderStatus()) { |
||||||
232 | case StaticSiteUrlList::CRAWL_STATUS_NOTSTARTED: |
||||||
233 | $crawlButtonText = _t('StaticSiteContentSource.CRAWL_SITE', 'Crawl'); |
||||||
234 | break; |
||||||
235 | case StaticSiteUrlList::CRAWL_STATUS_PARTIAL: |
||||||
236 | $crawlButtonText = _t('StaticSiteContentSource.RESUME_CRAWLING', 'Resume Crawl'); |
||||||
237 | break; |
||||||
238 | case StaticSiteUrlList::CRAWL_STATUS_COMPLETE: |
||||||
239 | $crawlButtonText = _t('StaticSiteContentSource.RECRAWL_SITE', 'Re-Crawl'); |
||||||
240 | break; |
||||||
241 | default: |
||||||
242 | throw new \LogicException("Invalid getSpiderStatus() value '" . $this->urlList()->getSpiderStatus() . ";"); |
||||||
243 | } |
||||||
244 | |||||||
245 | $crawlButton = FormAction::create('crawlsite', $crawlButtonText) |
||||||
246 | ->setAttribute('data-icon', 'arrow-circle-double') |
||||||
247 | ->setUseButtonTag(true) |
||||||
248 | ->addExtraClass('btn action btn btn-primary tool-button font-icon-plus'); |
||||||
249 | $crawlMsg = ''; |
||||||
250 | |||||||
251 | // Disable crawl-button if assets dir isn't writable |
||||||
252 | // TODO this will need to change if change the default location of crawl data. Like _why_ is it in assets? |
||||||
253 | if (!file_exists(ASSETS_PATH) || !is_writable(ASSETS_PATH)) { |
||||||
254 | $crawlMsg = '<p class="message warning">Warning: Assets directory is not writable.</p>'; |
||||||
255 | $crawlButton->setDisabled(true); |
||||||
256 | } |
||||||
257 | |||||||
258 | $fields->addFieldsToTab('Root.Crawl', [ |
||||||
259 | ReadonlyField::create("CrawlStatus", "Crawl Status", $this->urlList()->getSpiderStatus()), |
||||||
260 | ReadonlyField::create("NumURIs", "Number of URIs Crawled", $this->urlList()->getNumURIs()), |
||||||
261 | LiteralField::create( |
||||||
262 | 'CrawlActions', |
||||||
263 | $crawlMsg ? '<p class="message notice">' . $crawlMsg . '</p>' : '' |
||||||
264 | . '<div class="btn-toolbar">' . $crawlButton->forTemplate() . '</div>' |
||||||
265 | ) |
||||||
266 | ]); |
||||||
267 | |||||||
268 | // Because we can't pass arrays to FieldList::insertBefore |
||||||
269 | foreach ( |
||||||
270 | [ |
||||||
271 | HeaderField::create('CrawlHeading', 'Source Site Crawling'), |
||||||
272 | LiteralField::create('CrawlIntro', '' |
||||||
273 | . '<p class="message notice">' |
||||||
274 | . 'Before you can load any content into Silverstripe, all source URLs must first be crawled.' |
||||||
275 | . ' Select the button below to start or resume a crawl as applicable.' |
||||||
276 | . '</p>')] as $introField |
||||||
277 | ) { |
||||||
278 | $fields->insertBefore('CrawlStatus', $introField); |
||||||
279 | } |
||||||
280 | |||||||
281 | /* |
||||||
282 | * @todo use customise() and arrange this using an includes .ss template fragment |
||||||
283 | */ |
||||||
284 | if ($this->urlList()->getSpiderStatus() == StaticSiteUrlList::CRAWL_STATUS_COMPLETE) { |
||||||
285 | $fields->addFieldToTab( |
||||||
286 | 'Root.Crawl', |
||||||
287 | LiteralField::create( |
||||||
288 | 'CrawlURLListUIntro', |
||||||
289 | '<p class="mesage notice">Review the list of crawled URIs below. When you\'re happy with the import' |
||||||
290 | . ' you can proceed to the "Import" tab and follow the instructions there.</p>' |
||||||
291 | ), |
||||||
292 | LiteralField::create('CrawlURLList', $this->listofCrawledItems()) |
||||||
293 | ); |
||||||
294 | } |
||||||
295 | |||||||
296 | $fields->dataFieldByName("ExtraCrawlUrls") |
||||||
297 | ->setDescription("Add URIs that are not reachable via links when content scraping, eg: '/about/team'. One per line") |
||||||
298 | ->setTitle('Additional URIs'); |
||||||
299 | $fields->dataFieldByName("UrlExcludePatterns") |
||||||
300 | ->setDescription("URLs that should be excluded. (Supports regular expressions e.g. '/about/.*'). One per line") |
||||||
301 | ->setTitle('Excluded URLs'); |
||||||
302 | |||||||
303 | $hasImports = DataObject::get(StaticSiteImportDataObject::class); |
||||||
304 | $_source = []; |
||||||
305 | |||||||
306 | foreach ($hasImports as $import) { |
||||||
307 | $date = DBField::create_field(DBDatetime::class, $import->Created)->Time24(); |
||||||
308 | $_source[$import->ID] = $date . ' (Import #' . $import->ID . ')'; |
||||||
309 | } |
||||||
310 | |||||||
311 | $fields->addFieldsToTab('Root.Import', [ |
||||||
312 | HeaderField::create('ImportHeading', 'Source Site Import'), |
||||||
313 | LiteralField::create('ImportIntro', '' |
||||||
314 | . '<p class="message notice">' |
||||||
315 | . 'Use this area to configure where in the current IA imported page content should appear.' |
||||||
316 | . ' The same goes for imported files and images.' |
||||||
317 | . '</p>')]); |
||||||
318 | |||||||
319 | if ($importCount = $hasImports->count()) { |
||||||
320 | $clearImportButton = FormAction::create('clearimports', 'Clear selected imports') |
||||||
321 | ->setAttribute('data-icon', 'arrow-circle-double') |
||||||
322 | ->addExtraClass('btn action btn btn-primary tool-button font-icon-plus') |
||||||
323 | ->setUseButtonTag(true); |
||||||
324 | |||||||
325 | $clearImportField = ToggleCompositeField::create('ClearImports', 'Clear Import Metadata', [ |
||||||
326 | LiteralField::create('ImportCountText', '<p>Each time an import is run, some meta information is stored such as an import identifier and failed-link records.<br/><br/></p>'), |
||||||
327 | LiteralField::create('ImportCount', '<p>Total imports: ' . $importCount . '</p>'), |
||||||
328 | ListboxField::create('ShowImports', 'Select import(s) to clear:', $_source, '', null, true), |
||||||
329 | CheckboxField::create('ClearAllImports', 'Clear all import meta-data', 0), |
||||||
330 | LiteralField::create('ImportActions', '<div class="btn-toolbar">' . $clearImportButton->forTemplate() . '</div>') |
||||||
331 | ])->addExtraClass('clear-imports'); |
||||||
332 | |||||||
333 | $fields->addFieldToTab('Root.Import', $clearImportField); |
||||||
334 | } |
||||||
335 | |||||||
336 | $fields->addFieldsToTab('Root.Environment', [ |
||||||
337 | HeaderField::create('EnvHeading', 'Webserver Environment'), |
||||||
338 | LiteralField::create('EnvIntro', '' |
||||||
339 | . '<p class="message notice">' |
||||||
340 | . 'Refer to this area for information related to the PHP and Webserver environment' |
||||||
341 | . ' which may affect the proper function and performance of this tool.' |
||||||
342 | . '</p>'), |
||||||
343 | LiteralField::create('EnvInfo', '' |
||||||
344 | . '<ul>' |
||||||
345 | . '<li>PHP Info: ' . $_SERVER['PHP_VERSION'] . '</li>' |
||||||
346 | . '<li>Webserver Info: ' . $_SERVER['SERVER_SOFTWARE'] . '</li>' |
||||||
347 | . '<li>max_execution_time: ' . sprintf('%s seconds', ini_get('max_execution_time')) . '</li>' |
||||||
348 | . '<li>memory_limit: ' . sprintf('%d Mb', ini_get('memory_limit')) . '</li>' |
||||||
349 | . '</ul>') |
||||||
350 | ]); |
||||||
351 | |||||||
352 | return $fields; |
||||||
353 | } |
||||||
354 | |||||||
355 | /** |
||||||
356 | * If the site has been crawled and then subsequently the URLProcessor was changed, we need to ensure |
||||||
357 | * URLs are re-processed using the newly selected URL Preprocessor |
||||||
358 | * |
||||||
359 | * @return void |
||||||
360 | */ |
||||||
361 | public function onAfterWrite() |
||||||
362 | { |
||||||
363 | parent::onAfterWrite(); |
||||||
364 | |||||||
365 | $urlList = $this->urlList(); |
||||||
366 | if ($this->isChanged('UrlProcessor') && $urlList->hasCrawled()) { |
||||||
367 | if ($processorClass = $this->UrlProcessor) { |
||||||
0 ignored issues
–
show
The property
UrlProcessor does not exist on PhpTek\Exodus\Model\StaticSiteContentSource . Since you implemented __get , consider adding a @property annotation.
![]() |
|||||||
368 | $urlList->setUrlProcessor($processorClass::create()); |
||||||
369 | } else { |
||||||
370 | $urlList->setUrlProcessor(null); |
||||||
371 | } |
||||||
372 | |||||||
373 | $urlList->reprocessUrls(); |
||||||
374 | } |
||||||
375 | } |
||||||
376 | |||||||
377 | /** |
||||||
378 | * |
||||||
379 | * @return StaticSiteUrlList |
||||||
380 | */ |
||||||
381 | public function urlList() |
||||||
382 | { |
||||||
383 | if (!$this->urlList) { |
||||||
384 | $this->urlList = StaticSiteUrlList::create($this, ASSETS_PATH . "/{$this->cacheDir}"); |
||||||
0 ignored issues
–
show
|
|||||||
385 | |||||||
386 | if ($processorClass = $this->UrlProcessor) { |
||||||
0 ignored issues
–
show
The property
UrlProcessor does not exist on PhpTek\Exodus\Model\StaticSiteContentSource . Since you implemented __get , consider adding a @property annotation.
![]() |
|||||||
387 | $this->urlList->setUrlProcessor($processorClass::create()); |
||||||
388 | } |
||||||
389 | |||||||
390 | if ($this->ExtraCrawlUrls) { |
||||||
0 ignored issues
–
show
The property
ExtraCrawlUrls does not exist on PhpTek\Exodus\Model\StaticSiteContentSource . Since you implemented __get , consider adding a @property annotation.
![]() |
|||||||
391 | $extraCrawlUrls = preg_split('/\s+/', trim($this->ExtraCrawlUrls)); |
||||||
392 | $this->urlList->setExtraCrawlUrls($extraCrawlUrls); |
||||||
393 | } |
||||||
394 | |||||||
395 | if ($this->UrlExcludePatterns) { |
||||||
0 ignored issues
–
show
The property
UrlExcludePatterns does not exist on PhpTek\Exodus\Model\StaticSiteContentSource . Since you implemented __get , consider adding a @property annotation.
![]() |
|||||||
396 | $urlExcludePatterns = preg_split('/\s+/', trim($this->UrlExcludePatterns)); |
||||||
397 | $this->urlList->setExcludePatterns($urlExcludePatterns); |
||||||
398 | } |
||||||
399 | } |
||||||
400 | |||||||
401 | return $this->urlList; |
||||||
402 | } |
||||||
403 | |||||||
404 | /** |
||||||
405 | * Crawl the target site |
||||||
406 | * |
||||||
407 | * @param boolean $limit |
||||||
408 | * @param boolean $verbose |
||||||
409 | * @return StaticSiteCrawler |
||||||
0 ignored issues
–
show
The type
PhpTek\Exodus\Model\StaticSiteCrawler was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths ![]() |
|||||||
410 | * @throws LogicException |
||||||
411 | */ |
||||||
412 | public function crawl($limit = false, $verbose = false) |
||||||
413 | { |
||||||
414 | if (!$this->BaseUrl) { |
||||||
0 ignored issues
–
show
The property
BaseUrl does not exist on PhpTek\Exodus\Model\StaticSiteContentSource . Since you implemented __get , consider adding a @property annotation.
![]() |
|||||||
415 | throw new \LogicException('Can\'t crawl a site until the "Base URL" field is set.'); |
||||||
416 | } |
||||||
417 | |||||||
418 | return $this->urlList()->crawl($limit, $verbose); |
||||||
0 ignored issues
–
show
$limit of type boolean is incompatible with the type double|integer expected by parameter $limit of PhpTek\Exodus\Tool\StaticSiteUrlList::crawl() .
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
419 | } |
||||||
420 | |||||||
421 | /** |
||||||
422 | * Fetch an appropriate schema for a given URL and/or Mime-Type. |
||||||
423 | * If no matches are found, boolean false is returned. |
||||||
424 | * |
||||||
425 | * @param string $absoluteURL |
||||||
426 | * @param string $mimeType (Optional) |
||||||
427 | * @return mixed StaticSiteContentSourceImportSchema $schema or boolean false if no schema matches are found |
||||||
428 | */ |
||||||
429 | public function getSchemaForURL($absoluteURL, $mimeType = null) |
||||||
430 | { |
||||||
431 | $mimeType = StaticSiteMimeProcessor::cleanse($mimeType); |
||||||
432 | // Ensure the "Order" (Priority) setting is respected |
||||||
433 | $schemas = $this->Schemas()->sort('Order'); |
||||||
0 ignored issues
–
show
The method
Schemas() does not exist on PhpTek\Exodus\Model\StaticSiteContentSource . Since you implemented __call , consider adding a @method annotation.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
434 | |||||||
435 | foreach ($schemas as $i => $schema) { |
||||||
436 | $schemaCanParseURL = $this->schemaCanParseURL($schema, $absoluteURL); |
||||||
437 | $schemaMimeTypes = StaticSiteMimeProcessor::get_mimetypes_from_text($schema->MimeTypes); |
||||||
438 | $schemaMimeTypesShow = implode(', ', $schemaMimeTypes); |
||||||
439 | $this->utils->log(' - Schema: ' . ($i + 1) . ', DataType: ' . $schema->DataType . ', AppliesTo: ' . $schema->AppliesTo . ' mimetypes: ' . $schemaMimeTypesShow); |
||||||
440 | array_push($schemaMimeTypes, StaticSiteUrlList::config()->get('undefined_mime_type')); |
||||||
441 | |||||||
442 | if ($schemaCanParseURL) { |
||||||
443 | if ($mimeType && $schemaMimeTypes && (!in_array($mimeType, $schemaMimeTypes))) { |
||||||
444 | continue; |
||||||
445 | } |
||||||
446 | |||||||
447 | return $schema; |
||||||
448 | } |
||||||
449 | } |
||||||
450 | |||||||
451 | return false; |
||||||
452 | } |
||||||
453 | |||||||
454 | /** |
||||||
455 | * Performs a match on the Schema->AppliedTo field with reference to the URL |
||||||
456 | * of the current iteration within getSchemaForURL(). |
||||||
457 | * |
||||||
458 | * @param StaticSiteContentSourceImportSchema $schema |
||||||
459 | * @param string $url |
||||||
460 | * @return boolean |
||||||
461 | */ |
||||||
462 | public function schemaCanParseURL(StaticSiteContentSourceImportSchema $schema, $url) |
||||||
463 | { |
||||||
464 | $appliesTo = $schema->AppliesTo; |
||||||
465 | if (!strlen($appliesTo)) { |
||||||
466 | $appliesTo = $schema::config()->get('default_applies_to'); |
||||||
467 | } |
||||||
468 | |||||||
469 | // Use (escaped) pipes for delimeters as pipes themselves are unlikely to appear in legit URLs |
||||||
470 | $appliesTo = str_replace('|', '\|', $appliesTo); |
||||||
471 | $urlToTest = str_replace(rtrim($this->BaseUrl, '/'), '', $url); |
||||||
0 ignored issues
–
show
The property
BaseUrl does not exist on PhpTek\Exodus\Model\StaticSiteContentSource . Since you implemented __get , consider adding a @property annotation.
![]() It seems like
$this->BaseUrl can also be of type null ; however, parameter $string of rtrim() does only seem to accept string , maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
![]() |
|||||||
472 | |||||||
473 | if (preg_match("|^$appliesTo|i", $urlToTest)) { |
||||||
474 | $this->utils->log(' - ' . __FUNCTION__ . ' matched: ' . $appliesTo . ', Url: ' . $url); |
||||||
475 | return true; |
||||||
476 | } |
||||||
477 | return false; |
||||||
478 | } |
||||||
479 | |||||||
480 | /** |
||||||
481 | * Returns a StaticSiteContentItem for the given URL |
||||||
482 | * Relative URLs are used as the unique identifiers by this importer |
||||||
483 | * |
||||||
484 | * @param string $id The URL, relative to BaseURL, starting with "/". |
||||||
485 | * @return StaticSiteContentItem |
||||||
486 | */ |
||||||
487 | public function getObject($id) |
||||||
488 | { |
||||||
489 | if ($id[0] != "/") { |
||||||
490 | $id = $this->decodeId($id); |
||||||
491 | if ($id[0] != "/") { |
||||||
492 | throw new \InvalidArgumentException("\$id must start with /"); |
||||||
493 | } |
||||||
494 | } |
||||||
495 | |||||||
496 | return StaticSiteContentItem::create($this, $id); |
||||||
497 | } |
||||||
498 | |||||||
499 | /** |
||||||
500 | * |
||||||
501 | * @return StaticSiteContentItem |
||||||
502 | */ |
||||||
503 | public function getRoot() |
||||||
504 | { |
||||||
505 | return $this->getObject('/'); |
||||||
506 | } |
||||||
507 | |||||||
508 | /** |
||||||
509 | * Signals external-content module that we wish to operate on `SiteTree` and `File` objects. |
||||||
510 | * |
||||||
511 | * @return array |
||||||
512 | */ |
||||||
513 | public function allowedImportTargets() |
||||||
514 | { |
||||||
515 | return [ |
||||||
516 | 'sitetree' => true, |
||||||
517 | 'file' => true, |
||||||
518 | ]; |
||||||
519 | } |
||||||
520 | |||||||
521 | /** |
||||||
522 | * Return the root node. |
||||||
523 | * |
||||||
524 | * @param boolean $showAll |
||||||
525 | * @return ArrayList A list containing the root node |
||||||
526 | */ |
||||||
527 | public function stageChildren($showAll = false) |
||||||
528 | { |
||||||
529 | if (!$this->urlList()->hasCrawled()) { |
||||||
530 | return ArrayList::create(); |
||||||
531 | } |
||||||
532 | |||||||
533 | return ArrayList::create(array( |
||||||
534 | $this->getObject("/") |
||||||
535 | )); |
||||||
536 | } |
||||||
537 | |||||||
538 | /** |
||||||
539 | * |
||||||
540 | * @param $target |
||||||
541 | * @return StaticSiteImporter |
||||||
542 | */ |
||||||
543 | public function getContentImporter($target = null) |
||||||
544 | { |
||||||
545 | return StaticSiteImporter::create(); |
||||||
546 | } |
||||||
547 | |||||||
548 | /** |
||||||
549 | * |
||||||
550 | * @return boolean |
||||||
551 | */ |
||||||
552 | public function isValid() |
||||||
553 | { |
||||||
554 | return (bool) $this->BaseUrl; |
||||||
0 ignored issues
–
show
The property
BaseUrl does not exist on PhpTek\Exodus\Model\StaticSiteContentSource . Since you implemented __get , consider adding a @property annotation.
![]() |
|||||||
555 | } |
||||||
556 | |||||||
557 | /** |
||||||
558 | * |
||||||
559 | * @param Member $member |
||||||
0 ignored issues
–
show
The type
PhpTek\Exodus\Model\Member was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths ![]() |
|||||||
560 | * @param array $context |
||||||
561 | * @return boolean |
||||||
562 | */ |
||||||
563 | public function canImport($member = null, $context = []) |
||||||
0 ignored issues
–
show
The parameter
$member is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. ![]() The parameter
$context is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. ![]() |
|||||||
564 | { |
||||||
565 | return $this->isValid(); |
||||||
566 | } |
||||||
567 | |||||||
568 | /** |
||||||
569 | * |
||||||
570 | * @param Member $member |
||||||
571 | * @param array $context |
||||||
572 | * @return boolean |
||||||
573 | */ |
||||||
574 | public function canCreate($member = null, $context = []) |
||||||
575 | { |
||||||
576 | return true; |
||||||
577 | } |
||||||
578 | } |
||||||
579 |