| 1 | <?php |
||||||
| 2 | |||||||
| 3 | namespace PhpTek\Exodus\Model; |
||||||
| 4 | |||||||
| 5 | use ExternalContentSource; |
||||||
| 6 | use PhpTek\Exodus\Transform\StaticSiteImporter; |
||||||
| 7 | use PhpTek\Exodus\Tool\StaticSiteUtils; |
||||||
| 8 | use PhpTek\Exodus\Tool\StaticSiteUrlList; |
||||||
| 9 | use PhpTek\Exodus\Tool\StaticSiteMimeProcessor; |
||||||
| 10 | use PhpTek\Exodus\Tool\StaticSiteUrlProcessor; |
||||||
| 11 | use SilverStripe\Forms\HeaderField; |
||||||
| 12 | use SilverStripe\Core\ClassInfo; |
||||||
| 13 | use SilverStripe\ORM\DataObject; |
||||||
| 14 | use SilverStripe\ORM\ArrayList; |
||||||
| 15 | use SilverStripe\Forms\FormAction; |
||||||
| 16 | use SilverStripe\Forms\ReadonlyField; |
||||||
| 17 | use SilverStripe\Forms\LiteralField; |
||||||
| 18 | use SilverStripe\Forms\CheckboxField; |
||||||
| 19 | use SilverStripe\Forms\OptionsetField; |
||||||
| 20 | use SilverStripe\Forms\ListboxField; |
||||||
| 21 | use SilverStripe\Forms\GridField\GridFieldAddNewButton; |
||||||
| 22 | use SilverStripe\Assets\File; |
||||||
| 23 | use SilverStripe\CMS\Model\SiteTree; |
||||||
| 24 | use SilverStripe\Forms\DropdownField; |
||||||
| 25 | use SilverStripe\ORM\FieldType\DBText; |
||||||
| 26 | use SilverStripe\ORM\FieldType\DBVarchar; |
||||||
| 27 | use SilverStripe\Forms\ToggleCompositeField; |
||||||
| 28 | use SilverStripe\ORM\FieldType\DBField; |
||||||
| 29 | use SilverStripe\Forms\TextField; |
||||||
| 30 | use SilverStripe\ORM\FieldType\DBBoolean; |
||||||
| 31 | use SilverStripe\ORM\FieldType\DBDatetime; |
||||||
| 32 | |||||||
| 33 | // We do this or PHP8+ complains about the ageing phpcrawl lib |
||||||
| 34 | ini_set('error_reporting', 'E_ALL & ~E_DEPRECATED'); |
||||||
| 35 | |||||||
| 36 | /** |
||||||
| 37 | * Define the overarching content-sources, schemas etc. Probably better named a "Migration Profile". |
||||||
| 38 | * |
||||||
| 39 | * @package phptek/silverstripe-exodus |
||||||
| 40 | * @author Sam Minee <[email protected]> |
||||||
| 41 | * @author Russell Michell <[email protected]> |
||||||
| 42 | */ |
||||||
| 43 | class StaticSiteContentSource extends ExternalContentSource |
||||||
| 44 | { |
||||||
| 45 | /** |
||||||
| 46 | * @var string |
||||||
| 47 | */ |
||||||
| 48 | public const CACHE_DIR_PREFIX = 'static-site-0'; // Default (The zero-suffix is used by test-suite) |
||||||
| 49 | |||||||
| 50 | /** |
||||||
| 51 | * @var string |
||||||
| 52 | */ |
||||||
| 53 | private static $table_name = 'StaticSiteContentSource'; |
||||||
|
0 ignored issues
–
show
introduced
by
Loading history...
|
|||||||
| 54 | |||||||
| 55 | /** |
||||||
| 56 | * @var config |
||||||
|
0 ignored issues
–
show
The type
PhpTek\Exodus\Model\config was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths Loading history...
|
|||||||
| 57 | */ |
||||||
| 58 | private static $singular_name = 'Migration Profile'; |
||||||
|
0 ignored issues
–
show
|
|||||||
| 59 | |||||||
| 60 | /** |
||||||
| 61 | * @var config |
||||||
| 62 | */ |
||||||
| 63 | private static $plural_name = 'Migration Profiles'; |
||||||
|
0 ignored issues
–
show
|
|||||||
| 64 | |||||||
| 65 | /** |
||||||
| 66 | * |
||||||
| 67 | * @var array |
||||||
| 68 | */ |
||||||
| 69 | private static $db = [ |
||||||
|
0 ignored issues
–
show
|
|||||||
| 70 | 'BaseUrl' => DBVarchar::class, |
||||||
| 71 | 'UrlProcessor' => DBVarchar::class, |
||||||
| 72 | 'ExtraCrawlUrls' => DBText::class, |
||||||
| 73 | 'UrlExcludePatterns' => DBText::class, |
||||||
| 74 | 'ParseCSS' => DBBoolean::class, |
||||||
| 75 | 'AutoRunTask' => DBBoolean::class, |
||||||
| 76 | ]; |
||||||
| 77 | |||||||
| 78 | /** |
||||||
| 79 | * |
||||||
| 80 | * @var array |
||||||
| 81 | */ |
||||||
| 82 | private static $has_many = [ |
||||||
|
0 ignored issues
–
show
|
|||||||
| 83 | "Schemas" => StaticSiteContentSourceImportSchema::class, |
||||||
| 84 | "Pages" => SiteTree::class, |
||||||
| 85 | "Files" => File::class, |
||||||
| 86 | ]; |
||||||
| 87 | |||||||
| 88 | /** |
||||||
| 89 | * |
||||||
| 90 | * @var array |
||||||
| 91 | */ |
||||||
| 92 | private static $export_columns = [ |
||||||
|
0 ignored issues
–
show
|
|||||||
| 93 | "StaticSiteContentSourceImportSchema.DataType", |
||||||
| 94 | "StaticSiteContentSourceImportSchema.Order", |
||||||
| 95 | "StaticSiteContentSourceImportSchema.AppliesTo", |
||||||
| 96 | "StaticSiteContentSourceImportSchema.MimeTypes", |
||||||
| 97 | ]; |
||||||
| 98 | |||||||
| 99 | /** |
||||||
| 100 | * |
||||||
| 101 | * @var string |
||||||
| 102 | */ |
||||||
| 103 | public $absoluteURL = null; |
||||||
| 104 | |||||||
| 105 | /** |
||||||
| 106 | * Where do we store our items for caching? |
||||||
| 107 | * Also used by calling logic |
||||||
| 108 | * |
||||||
| 109 | * @var string |
||||||
| 110 | */ |
||||||
| 111 | public $cacheDir = null; |
||||||
| 112 | |||||||
| 113 | /** |
||||||
| 114 | * Holds the StaticSiteUtils object on construct |
||||||
| 115 | * |
||||||
| 116 | * @var StaticSiteUtils $utils |
||||||
| 117 | */ |
||||||
| 118 | protected $utils; |
||||||
| 119 | |||||||
| 120 | /** |
||||||
| 121 | * |
||||||
| 122 | * @param array|null $record This will be null for a new database record. |
||||||
| 123 | * @param bool $isSingleton |
||||||
| 124 | * @param DataModel $model |
||||||
|
0 ignored issues
–
show
The type
PhpTek\Exodus\Model\DataModel was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths Loading history...
|
|||||||
| 125 | * @return void |
||||||
| 126 | */ |
||||||
| 127 | public function __construct($record = null, $isSingleton = false, $model = null) |
||||||
| 128 | { |
||||||
| 129 | parent::__construct($record, $isSingleton, $model); |
||||||
|
0 ignored issues
–
show
It seems like
$model can also be of type PhpTek\Exodus\Model\DataModel; however, parameter $queryParams of SilverStripe\ORM\DataObject::__construct() does only seem to accept array, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 130 | $this->cacheDir = preg_replace('#[0-9]+$#', $this->ID, self::CACHE_DIR_PREFIX); |
||||||
| 131 | $this->utils = singleton(StaticSiteUtils::class); |
||||||
| 132 | } |
||||||
| 133 | |||||||
| 134 | /** |
||||||
| 135 | * Template method used to display the results of a successful crawl into the central |
||||||
| 136 | * column of the CMS. |
||||||
| 137 | * |
||||||
| 138 | * @return string |
||||||
| 139 | */ |
||||||
| 140 | public function listofCrawledItems(): string |
||||||
| 141 | { |
||||||
| 142 | $list = $this->urlList(); |
||||||
| 143 | $ulist = ''; |
||||||
| 144 | |||||||
| 145 | if ($list->getSpiderStatus() !== StaticSiteUrlList::CRAWL_STATUS_COMPLETE) { |
||||||
| 146 | return ''; |
||||||
| 147 | } |
||||||
| 148 | |||||||
| 149 | foreach (array_unique($list->getProcessedURLs()) as $raw => $processed) { |
||||||
| 150 | if ($raw != $processed) { |
||||||
| 151 | $ulist .= '<li>' . sprintf('%s (was: %s)', $processed, $raw) . '</li>'; |
||||||
| 152 | } else { |
||||||
| 153 | $ulist .= '<li>' . $processed . '</li>'; |
||||||
| 154 | } |
||||||
| 155 | } |
||||||
| 156 | |||||||
| 157 | return '<ul>' . $ulist . '</ul>'; |
||||||
| 158 | } |
||||||
| 159 | |||||||
| 160 | /** |
||||||
| 161 | * |
||||||
| 162 | * @return FieldList |
||||||
|
0 ignored issues
–
show
The type
PhpTek\Exodus\Model\FieldList was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths Loading history...
|
|||||||
| 163 | * @throws LogicException |
||||||
| 164 | */ |
||||||
| 165 | public function getCMSFields() |
||||||
| 166 | { |
||||||
| 167 | $fields = parent::getCMSFields(); |
||||||
| 168 | |||||||
| 169 | $fields->removeFieldsFromTab('Root', [ |
||||||
| 170 | 'Pages', |
||||||
| 171 | 'Files', |
||||||
| 172 | 'ShowContentInMenu', |
||||||
| 173 | 'Name' |
||||||
| 174 | ]); |
||||||
| 175 | |||||||
| 176 | // Because we can't pass arrays to FieldList::insertBefore |
||||||
| 177 | foreach ( |
||||||
| 178 | [ |
||||||
| 179 | HeaderField::create('ProfileHeading', 'Migration Profile Configuration'), |
||||||
| 180 | LiteralField::create('ProfileIntro', '' |
||||||
| 181 | . '<p class="message notice">' |
||||||
| 182 | . 'This where the basics of your migration profile are configured.' |
||||||
| 183 | . '</p>')] as $introField |
||||||
| 184 | ) { |
||||||
| 185 | $fields->insertBefore('BaseUrl', $introField); |
||||||
| 186 | } |
||||||
| 187 | |||||||
| 188 | // Processing Options |
||||||
| 189 | $processingOptions = ['' => "No Processing"]; |
||||||
| 190 | |||||||
| 191 | foreach (ClassInfo::implementorsOf(StaticSiteUrlProcessor::class) as $processor) { |
||||||
| 192 | $processorObj = singleton($processor); |
||||||
| 193 | $processingOptions[$processor] = $processorObj->getName(); |
||||||
| 194 | } |
||||||
| 195 | |||||||
| 196 | $fields->addFieldsToTab( |
||||||
| 197 | 'Root.Main', |
||||||
| 198 | [ |
||||||
| 199 | TextField::create("BaseUrl", "Base URL") |
||||||
| 200 | ->setDescription('The base URL of the site to be crawled and imported.'), |
||||||
| 201 | DropdownField::create("UrlProcessor", "URL Transformation", $processingOptions) |
||||||
| 202 | ->setDescription('Select the way in which crawled URLs should be transformed and cleaned-up.'), |
||||||
| 203 | CheckboxField::create("ParseCSS", "Fetch external CSS") |
||||||
| 204 | ->setDescription("Fetch images defined as CSS <strong>background-image</strong> which are not ordinarily reachable by crawling alone."), |
||||||
| 205 | CheckboxField::create("AutoRunTask", "Automatically rewrite links into Silverstripe-aware links") |
||||||
| 206 | ->setDescription("This will run a link-rewrite task automatically once an import has completed.") |
||||||
| 207 | ] |
||||||
| 208 | ); |
||||||
| 209 | $fields->fieldByName('Root.Main')->setTitle('Profile'); |
||||||
| 210 | $fields->insertBefore('BaseUrl', TextField::create('Name', 'Name') |
||||||
| 211 | ->setDescription('Allows you to differentiate between profiles.')); |
||||||
| 212 | |||||||
| 213 | // Schema Gridfield |
||||||
| 214 | $fields->addFieldToTab('Root.Main', HeaderField::create('ImportConfigHeader', 'Import Schema Configuration')); |
||||||
| 215 | $addNewButton = (new GridFieldAddNewButton('before'))->setButtonName("Add Schema"); |
||||||
| 216 | $importRules = $fields->dataFieldByName('Schemas'); |
||||||
| 217 | $importRules->getConfig()->removeComponentsByType(GridFieldAddNewButton::class); |
||||||
| 218 | $importRules->getConfig()->addComponent($addNewButton); |
||||||
| 219 | $fields->removeFieldFromTab("Root", "Schemas"); |
||||||
| 220 | $fields->addFieldToTab('Root.Main', LiteralField::create( |
||||||
| 221 | 'SchemaIntro', |
||||||
| 222 | '' |
||||||
| 223 | . '<p class="message notice">Schema map MIME-Types to Silverstripe content classes and' |
||||||
| 224 | . ' are related to one or more Import Rules. Each rule determines how content located at crawled URLs' |
||||||
| 225 | . ' should be imported into a content classes\' fields with the use of CSS selectors.' |
||||||
| 226 | . ' Where more than one schema exists for a field, they\'ll be processed in the order of Priority:' |
||||||
| 227 | . ' The first Schema to match a URI Pattern will be the one used for that field.</p>' |
||||||
| 228 | )); |
||||||
| 229 | $fields->addFieldToTab("Root.Main", $importRules); |
||||||
| 230 | |||||||
| 231 | switch ($this->urlList()->getSpiderStatus()) { |
||||||
| 232 | case StaticSiteUrlList::CRAWL_STATUS_NOTSTARTED: |
||||||
| 233 | $crawlButtonText = _t('StaticSiteContentSource.CRAWL_SITE', 'Crawl'); |
||||||
| 234 | break; |
||||||
| 235 | case StaticSiteUrlList::CRAWL_STATUS_PARTIAL: |
||||||
| 236 | $crawlButtonText = _t('StaticSiteContentSource.RESUME_CRAWLING', 'Resume Crawl'); |
||||||
| 237 | break; |
||||||
| 238 | case StaticSiteUrlList::CRAWL_STATUS_COMPLETE: |
||||||
| 239 | $crawlButtonText = _t('StaticSiteContentSource.RECRAWL_SITE', 'Re-Crawl'); |
||||||
| 240 | break; |
||||||
| 241 | default: |
||||||
| 242 | throw new \LogicException("Invalid getSpiderStatus() value '" . $this->urlList()->getSpiderStatus() . ";"); |
||||||
| 243 | } |
||||||
| 244 | |||||||
| 245 | $crawlButton = FormAction::create('crawlsite', $crawlButtonText) |
||||||
| 246 | ->setAttribute('data-icon', 'arrow-circle-double') |
||||||
| 247 | ->setUseButtonTag(true) |
||||||
| 248 | ->addExtraClass('btn action btn btn-primary tool-button font-icon-plus'); |
||||||
| 249 | $crawlMsg = ''; |
||||||
| 250 | |||||||
| 251 | // Disable crawl-button if assets dir isn't writable |
||||||
| 252 | // TODO this will need to change if change the default location of crawl data. Like _why_ is it in assets? |
||||||
| 253 | if (!file_exists(ASSETS_PATH) || !is_writable(ASSETS_PATH)) { |
||||||
| 254 | $crawlMsg = '<p class="message warning">Warning: Assets directory is not writable.</p>'; |
||||||
| 255 | $crawlButton->setDisabled(true); |
||||||
| 256 | } |
||||||
| 257 | |||||||
| 258 | $fields->addFieldsToTab('Root.Crawl', [ |
||||||
| 259 | ReadonlyField::create("CrawlStatus", "Crawl Status", $this->urlList()->getSpiderStatus()), |
||||||
| 260 | ReadonlyField::create("NumURIs", "Number of URIs Crawled", $this->urlList()->getNumURIs()), |
||||||
| 261 | LiteralField::create( |
||||||
| 262 | 'CrawlActions', |
||||||
| 263 | $crawlMsg ? '<p class="message notice">' . $crawlMsg . '</p>' : '' |
||||||
| 264 | . '<div class="btn-toolbar">' . $crawlButton->forTemplate() . '</div>' |
||||||
| 265 | ) |
||||||
| 266 | ]); |
||||||
| 267 | |||||||
| 268 | // Because we can't pass arrays to FieldList::insertBefore |
||||||
| 269 | foreach ( |
||||||
| 270 | [ |
||||||
| 271 | HeaderField::create('CrawlHeading', 'Source Site Crawling'), |
||||||
| 272 | LiteralField::create('CrawlIntro', '' |
||||||
| 273 | . '<p class="message notice">' |
||||||
| 274 | . 'Before you can load any content into Silverstripe, all source URLs must first be crawled.' |
||||||
| 275 | . ' Select the button below to start or resume a crawl as applicable.' |
||||||
| 276 | . '</p>')] as $introField |
||||||
| 277 | ) { |
||||||
| 278 | $fields->insertBefore('CrawlStatus', $introField); |
||||||
| 279 | } |
||||||
| 280 | |||||||
| 281 | /* |
||||||
| 282 | * @todo use customise() and arrange this using an includes .ss template fragment |
||||||
| 283 | */ |
||||||
| 284 | if ($this->urlList()->getSpiderStatus() == StaticSiteUrlList::CRAWL_STATUS_COMPLETE) { |
||||||
| 285 | $fields->addFieldToTab( |
||||||
| 286 | 'Root.Crawl', |
||||||
| 287 | LiteralField::create( |
||||||
| 288 | 'CrawlURLListUIntro', |
||||||
| 289 | '<p class="mesage notice">Review the list of crawled URIs below. When you\'re happy with the import' |
||||||
| 290 | . ' you can proceed to the "Import" tab and follow the instructions there.</p>' |
||||||
| 291 | ), |
||||||
| 292 | LiteralField::create('CrawlURLList', $this->listofCrawledItems()) |
||||||
| 293 | ); |
||||||
| 294 | } |
||||||
| 295 | |||||||
| 296 | $fields->dataFieldByName("ExtraCrawlUrls") |
||||||
| 297 | ->setDescription("Add URIs that are not reachable via links when content scraping, eg: '/about/team'. One per line") |
||||||
| 298 | ->setTitle('Additional URIs'); |
||||||
| 299 | $fields->dataFieldByName("UrlExcludePatterns") |
||||||
| 300 | ->setDescription("URLs that should be excluded. (Supports regular expressions e.g. '/about/.*'). One per line") |
||||||
| 301 | ->setTitle('Excluded URLs'); |
||||||
| 302 | |||||||
| 303 | $hasImports = DataObject::get(StaticSiteImportDataObject::class); |
||||||
| 304 | $_source = []; |
||||||
| 305 | |||||||
| 306 | foreach ($hasImports as $import) { |
||||||
| 307 | $date = DBField::create_field(DBDatetime::class, $import->Created)->Time24(); |
||||||
| 308 | $_source[$import->ID] = $date . ' (Import #' . $import->ID . ')'; |
||||||
| 309 | } |
||||||
| 310 | |||||||
| 311 | $fields->addFieldsToTab('Root.Import', [ |
||||||
| 312 | HeaderField::create('ImportHeading', 'Source Site Import'), |
||||||
| 313 | LiteralField::create('ImportIntro', '' |
||||||
| 314 | . '<p class="message notice">' |
||||||
| 315 | . 'Use this area to configure where in the current IA imported page content should appear.' |
||||||
| 316 | . ' The same goes for imported files and images.' |
||||||
| 317 | . '</p>')]); |
||||||
| 318 | |||||||
| 319 | if ($importCount = $hasImports->count()) { |
||||||
| 320 | $clearImportButton = FormAction::create('clearimports', 'Clear selected imports') |
||||||
| 321 | ->setAttribute('data-icon', 'arrow-circle-double') |
||||||
| 322 | ->addExtraClass('btn action btn btn-primary tool-button font-icon-plus') |
||||||
| 323 | ->setUseButtonTag(true); |
||||||
| 324 | |||||||
| 325 | $clearImportField = ToggleCompositeField::create('ClearImports', 'Clear Import Metadata', [ |
||||||
| 326 | LiteralField::create('ImportCountText', '<p>Each time an import is run, some meta information is stored such as an import identifier and failed-link records.<br/><br/></p>'), |
||||||
| 327 | LiteralField::create('ImportCount', '<p>Total imports: ' . $importCount . '</p>'), |
||||||
| 328 | ListboxField::create('ShowImports', 'Select import(s) to clear:', $_source, '', null, true), |
||||||
| 329 | CheckboxField::create('ClearAllImports', 'Clear all import meta-data', 0), |
||||||
| 330 | LiteralField::create('ImportActions', '<div class="btn-toolbar">' . $clearImportButton->forTemplate() . '</div>') |
||||||
| 331 | ])->addExtraClass('clear-imports'); |
||||||
| 332 | |||||||
| 333 | $fields->addFieldToTab('Root.Import', $clearImportField); |
||||||
| 334 | } |
||||||
| 335 | |||||||
| 336 | $fields->addFieldsToTab('Root.Environment', [ |
||||||
| 337 | HeaderField::create('EnvHeading', 'Webserver Environment'), |
||||||
| 338 | LiteralField::create('EnvIntro', '' |
||||||
| 339 | . '<p class="message notice">' |
||||||
| 340 | . 'Refer to this area for information related to the PHP and Webserver environment' |
||||||
| 341 | . ' which may affect the proper function and performance of this tool.' |
||||||
| 342 | . '</p>'), |
||||||
| 343 | LiteralField::create('EnvInfo', '' |
||||||
| 344 | . '<ul>' |
||||||
| 345 | . '<li>PHP Info: ' . $_SERVER['PHP_VERSION'] . '</li>' |
||||||
| 346 | . '<li>Webserver Info: ' . $_SERVER['SERVER_SOFTWARE'] . '</li>' |
||||||
| 347 | . '<li>max_execution_time: ' . sprintf('%s seconds', ini_get('max_execution_time')) . '</li>' |
||||||
| 348 | . '<li>memory_limit: ' . sprintf('%d Mb', ini_get('memory_limit')) . '</li>' |
||||||
| 349 | . '</ul>') |
||||||
| 350 | ]); |
||||||
| 351 | |||||||
| 352 | return $fields; |
||||||
| 353 | } |
||||||
| 354 | |||||||
| 355 | /** |
||||||
| 356 | * If the site has been crawled and then subsequently the URLProcessor was changed, we need to ensure |
||||||
| 357 | * URLs are re-processed using the newly selected URL Preprocessor |
||||||
| 358 | * |
||||||
| 359 | * @return void |
||||||
| 360 | */ |
||||||
| 361 | public function onAfterWrite() |
||||||
| 362 | { |
||||||
| 363 | parent::onAfterWrite(); |
||||||
| 364 | |||||||
| 365 | $urlList = $this->urlList(); |
||||||
| 366 | if ($this->isChanged('UrlProcessor') && $urlList->hasCrawled()) { |
||||||
| 367 | if ($processorClass = $this->UrlProcessor) { |
||||||
|
0 ignored issues
–
show
The property
UrlProcessor does not exist on PhpTek\Exodus\Model\StaticSiteContentSource. Since you implemented __get, consider adding a @property annotation.
Loading history...
|
|||||||
| 368 | $urlList->setUrlProcessor($processorClass::create()); |
||||||
| 369 | } else { |
||||||
| 370 | $urlList->setUrlProcessor(null); |
||||||
| 371 | } |
||||||
| 372 | |||||||
| 373 | $urlList->reprocessUrls(); |
||||||
| 374 | } |
||||||
| 375 | } |
||||||
| 376 | |||||||
| 377 | /** |
||||||
| 378 | * |
||||||
| 379 | * @return StaticSiteUrlList |
||||||
| 380 | */ |
||||||
| 381 | public function urlList() |
||||||
| 382 | { |
||||||
| 383 | if (!$this->urlList) { |
||||||
| 384 | $this->urlList = StaticSiteUrlList::create($this, ASSETS_PATH . "/{$this->cacheDir}"); |
||||||
|
0 ignored issues
–
show
|
|||||||
| 385 | |||||||
| 386 | if ($processorClass = $this->UrlProcessor) { |
||||||
|
0 ignored issues
–
show
The property
UrlProcessor does not exist on PhpTek\Exodus\Model\StaticSiteContentSource. Since you implemented __get, consider adding a @property annotation.
Loading history...
|
|||||||
| 387 | $this->urlList->setUrlProcessor($processorClass::create()); |
||||||
| 388 | } |
||||||
| 389 | |||||||
| 390 | if ($this->ExtraCrawlUrls) { |
||||||
|
0 ignored issues
–
show
The property
ExtraCrawlUrls does not exist on PhpTek\Exodus\Model\StaticSiteContentSource. Since you implemented __get, consider adding a @property annotation.
Loading history...
|
|||||||
| 391 | $extraCrawlUrls = preg_split('/\s+/', trim($this->ExtraCrawlUrls)); |
||||||
| 392 | $this->urlList->setExtraCrawlUrls($extraCrawlUrls); |
||||||
| 393 | } |
||||||
| 394 | |||||||
| 395 | if ($this->UrlExcludePatterns) { |
||||||
|
0 ignored issues
–
show
The property
UrlExcludePatterns does not exist on PhpTek\Exodus\Model\StaticSiteContentSource. Since you implemented __get, consider adding a @property annotation.
Loading history...
|
|||||||
| 396 | $urlExcludePatterns = preg_split('/\s+/', trim($this->UrlExcludePatterns)); |
||||||
| 397 | $this->urlList->setExcludePatterns($urlExcludePatterns); |
||||||
| 398 | } |
||||||
| 399 | } |
||||||
| 400 | |||||||
| 401 | return $this->urlList; |
||||||
| 402 | } |
||||||
| 403 | |||||||
| 404 | /** |
||||||
| 405 | * Crawl the target site |
||||||
| 406 | * |
||||||
| 407 | * @param boolean $limit |
||||||
| 408 | * @param boolean $verbose |
||||||
| 409 | * @return StaticSiteCrawler |
||||||
|
0 ignored issues
–
show
The type
PhpTek\Exodus\Model\StaticSiteCrawler was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths Loading history...
|
|||||||
| 410 | * @throws LogicException |
||||||
| 411 | */ |
||||||
| 412 | public function crawl($limit = false, $verbose = false) |
||||||
| 413 | { |
||||||
| 414 | if (!$this->BaseUrl) { |
||||||
|
0 ignored issues
–
show
The property
BaseUrl does not exist on PhpTek\Exodus\Model\StaticSiteContentSource. Since you implemented __get, consider adding a @property annotation.
Loading history...
|
|||||||
| 415 | throw new \LogicException('Can\'t crawl a site until the "Base URL" field is set.'); |
||||||
| 416 | } |
||||||
| 417 | |||||||
| 418 | return $this->urlList()->crawl($limit, $verbose); |
||||||
|
0 ignored issues
–
show
$limit of type boolean is incompatible with the type double|integer expected by parameter $limit of PhpTek\Exodus\Tool\StaticSiteUrlList::crawl().
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 419 | } |
||||||
| 420 | |||||||
| 421 | /** |
||||||
| 422 | * Fetch an appropriate schema for a given URL and/or Mime-Type. |
||||||
| 423 | * If no matches are found, boolean false is returned. |
||||||
| 424 | * |
||||||
| 425 | * @param string $absoluteURL |
||||||
| 426 | * @param string $mimeType (Optional) |
||||||
| 427 | * @return mixed StaticSiteContentSourceImportSchema $schema or boolean false if no schema matches are found |
||||||
| 428 | */ |
||||||
| 429 | public function getSchemaForURL($absoluteURL, $mimeType = null) |
||||||
| 430 | { |
||||||
| 431 | $mimeType = StaticSiteMimeProcessor::cleanse($mimeType); |
||||||
| 432 | // Ensure the "Order" (Priority) setting is respected |
||||||
| 433 | $schemas = $this->Schemas()->sort('Order'); |
||||||
|
0 ignored issues
–
show
The method
Schemas() does not exist on PhpTek\Exodus\Model\StaticSiteContentSource. Since you implemented __call, consider adding a @method annotation.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 434 | |||||||
| 435 | foreach ($schemas as $i => $schema) { |
||||||
| 436 | $schemaCanParseURL = $this->schemaCanParseURL($schema, $absoluteURL); |
||||||
| 437 | $schemaMimeTypes = StaticSiteMimeProcessor::get_mimetypes_from_text($schema->MimeTypes); |
||||||
| 438 | $schemaMimeTypesShow = implode(', ', $schemaMimeTypes); |
||||||
| 439 | $this->utils->log(' - Schema: ' . ($i + 1) . ', DataType: ' . $schema->DataType . ', AppliesTo: ' . $schema->AppliesTo . ' mimetypes: ' . $schemaMimeTypesShow); |
||||||
| 440 | array_push($schemaMimeTypes, StaticSiteUrlList::config()->get('undefined_mime_type')); |
||||||
| 441 | |||||||
| 442 | if ($schemaCanParseURL) { |
||||||
| 443 | if ($mimeType && $schemaMimeTypes && (!in_array($mimeType, $schemaMimeTypes))) { |
||||||
| 444 | continue; |
||||||
| 445 | } |
||||||
| 446 | |||||||
| 447 | return $schema; |
||||||
| 448 | } |
||||||
| 449 | } |
||||||
| 450 | |||||||
| 451 | return false; |
||||||
| 452 | } |
||||||
| 453 | |||||||
| 454 | /** |
||||||
| 455 | * Performs a match on the Schema->AppliedTo field with reference to the URL |
||||||
| 456 | * of the current iteration within getSchemaForURL(). |
||||||
| 457 | * |
||||||
| 458 | * @param StaticSiteContentSourceImportSchema $schema |
||||||
| 459 | * @param string $url |
||||||
| 460 | * @return boolean |
||||||
| 461 | */ |
||||||
| 462 | public function schemaCanParseURL(StaticSiteContentSourceImportSchema $schema, $url) |
||||||
| 463 | { |
||||||
| 464 | $appliesTo = $schema->AppliesTo; |
||||||
| 465 | if (!strlen($appliesTo)) { |
||||||
| 466 | $appliesTo = $schema::config()->get('default_applies_to'); |
||||||
| 467 | } |
||||||
| 468 | |||||||
| 469 | // Use (escaped) pipes for delimeters as pipes themselves are unlikely to appear in legit URLs |
||||||
| 470 | $appliesTo = str_replace('|', '\|', $appliesTo); |
||||||
| 471 | $urlToTest = str_replace(rtrim($this->BaseUrl, '/'), '', $url); |
||||||
|
0 ignored issues
–
show
The property
BaseUrl does not exist on PhpTek\Exodus\Model\StaticSiteContentSource. Since you implemented __get, consider adding a @property annotation.
Loading history...
It seems like
$this->BaseUrl can also be of type null; however, parameter $string of rtrim() does only seem to accept string, maybe add an additional type check?
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
Loading history...
|
|||||||
| 472 | |||||||
| 473 | if (preg_match("|^$appliesTo|i", $urlToTest)) { |
||||||
| 474 | $this->utils->log(' - ' . __FUNCTION__ . ' matched: ' . $appliesTo . ', Url: ' . $url); |
||||||
| 475 | return true; |
||||||
| 476 | } |
||||||
| 477 | return false; |
||||||
| 478 | } |
||||||
| 479 | |||||||
| 480 | /** |
||||||
| 481 | * Returns a StaticSiteContentItem for the given URL |
||||||
| 482 | * Relative URLs are used as the unique identifiers by this importer |
||||||
| 483 | * |
||||||
| 484 | * @param string $id The URL, relative to BaseURL, starting with "/". |
||||||
| 485 | * @return StaticSiteContentItem |
||||||
| 486 | */ |
||||||
| 487 | public function getObject($id) |
||||||
| 488 | { |
||||||
| 489 | if ($id[0] != "/") { |
||||||
| 490 | $id = $this->decodeId($id); |
||||||
| 491 | if ($id[0] != "/") { |
||||||
| 492 | throw new \InvalidArgumentException("\$id must start with /"); |
||||||
| 493 | } |
||||||
| 494 | } |
||||||
| 495 | |||||||
| 496 | return StaticSiteContentItem::create($this, $id); |
||||||
| 497 | } |
||||||
| 498 | |||||||
| 499 | /** |
||||||
| 500 | * |
||||||
| 501 | * @return StaticSiteContentItem |
||||||
| 502 | */ |
||||||
| 503 | public function getRoot() |
||||||
| 504 | { |
||||||
| 505 | return $this->getObject('/'); |
||||||
| 506 | } |
||||||
| 507 | |||||||
| 508 | /** |
||||||
| 509 | * Signals external-content module that we wish to operate on `SiteTree` and `File` objects. |
||||||
| 510 | * |
||||||
| 511 | * @return array |
||||||
| 512 | */ |
||||||
| 513 | public function allowedImportTargets() |
||||||
| 514 | { |
||||||
| 515 | return [ |
||||||
| 516 | 'sitetree' => true, |
||||||
| 517 | 'file' => true, |
||||||
| 518 | ]; |
||||||
| 519 | } |
||||||
| 520 | |||||||
| 521 | /** |
||||||
| 522 | * Return the root node. |
||||||
| 523 | * |
||||||
| 524 | * @param boolean $showAll |
||||||
| 525 | * @return ArrayList A list containing the root node |
||||||
| 526 | */ |
||||||
| 527 | public function stageChildren($showAll = false) |
||||||
| 528 | { |
||||||
| 529 | if (!$this->urlList()->hasCrawled()) { |
||||||
| 530 | return ArrayList::create(); |
||||||
| 531 | } |
||||||
| 532 | |||||||
| 533 | return ArrayList::create(array( |
||||||
| 534 | $this->getObject("/") |
||||||
| 535 | )); |
||||||
| 536 | } |
||||||
| 537 | |||||||
| 538 | /** |
||||||
| 539 | * |
||||||
| 540 | * @param $target |
||||||
| 541 | * @return StaticSiteImporter |
||||||
| 542 | */ |
||||||
| 543 | public function getContentImporter($target = null) |
||||||
| 544 | { |
||||||
| 545 | return StaticSiteImporter::create(); |
||||||
| 546 | } |
||||||
| 547 | |||||||
| 548 | /** |
||||||
| 549 | * |
||||||
| 550 | * @return boolean |
||||||
| 551 | */ |
||||||
| 552 | public function isValid() |
||||||
| 553 | { |
||||||
| 554 | return (bool) $this->BaseUrl; |
||||||
|
0 ignored issues
–
show
The property
BaseUrl does not exist on PhpTek\Exodus\Model\StaticSiteContentSource. Since you implemented __get, consider adding a @property annotation.
Loading history...
|
|||||||
| 555 | } |
||||||
| 556 | |||||||
| 557 | /** |
||||||
| 558 | * |
||||||
| 559 | * @param Member $member |
||||||
|
0 ignored issues
–
show
The type
PhpTek\Exodus\Model\Member was not found. Maybe you did not declare it correctly or list all dependencies?
The issue could also be caused by a filter entry in the build configuration.
If the path has been excluded in your configuration, e.g. filter:
dependency_paths: ["lib/*"]
For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths Loading history...
|
|||||||
| 560 | * @param array $context |
||||||
| 561 | * @return boolean |
||||||
| 562 | */ |
||||||
| 563 | public function canImport($member = null, $context = []) |
||||||
|
0 ignored issues
–
show
The parameter
$member is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. Loading history...
The parameter
$context is not used and could be removed.
(
Ignorable by Annotation
)
If this is a false-positive, you can also ignore this issue in your code via the
This check looks for parameters that have been defined for a function or method, but which are not used in the method body. Loading history...
|
|||||||
| 564 | { |
||||||
| 565 | return $this->isValid(); |
||||||
| 566 | } |
||||||
| 567 | |||||||
| 568 | /** |
||||||
| 569 | * |
||||||
| 570 | * @param Member $member |
||||||
| 571 | * @param array $context |
||||||
| 572 | * @return boolean |
||||||
| 573 | */ |
||||||
| 574 | public function canCreate($member = null, $context = []) |
||||||
| 575 | { |
||||||
| 576 | return true; |
||||||
| 577 | } |
||||||
| 578 | } |
||||||
| 579 |