1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpTek\Exodus\Transform; |
4
|
|
|
|
5
|
|
|
use SilverStripe\Assets\File; |
6
|
|
|
use SilverStripe\Assets\Folder; |
7
|
|
|
use SilverStripe\Control\Controller; |
8
|
|
|
use SilverStripe\ORM\DataObject; |
9
|
|
|
use SilverStripe\AssetAdmin\Helper\ImageThumbnailHelper; |
|
|
|
|
10
|
|
|
|
11
|
|
|
/** |
12
|
|
|
* URL transformer specific to SilverStripe's `File` class for use with the module's |
13
|
|
|
* import content feature. It will re-create all available data of the scraped file into SilverStripe's |
14
|
|
|
* database and re-create a copy of the file itself on the filesystem. |
15
|
|
|
* If enabled in the CMS UI, links to imported images and documents in imported page-content will also be automatically |
16
|
|
|
* re-written. |
17
|
|
|
* |
18
|
|
|
* @todo write unit-test for unwritable assets dir. |
19
|
|
|
* |
20
|
|
|
* @package phptek/silverstripe-exodus |
21
|
|
|
* @author Sam Minee <[email protected]> |
22
|
|
|
* @author Russell Michell <[email protected]> |
23
|
|
|
* @see {@link StaticSiteDataTypeTransformer} |
24
|
|
|
*/ |
25
|
|
|
class StaticSiteFileTransformer extends StaticSiteDataTypeTransformer |
26
|
|
|
{ |
27
|
|
|
/** |
28
|
|
|
* Default value to pass to usleep() to reduce load on the remote server |
29
|
|
|
* |
30
|
|
|
* @var number |
31
|
|
|
*/ |
32
|
|
|
private static $sleep_multiplier = 10; |
33
|
|
|
|
34
|
|
|
/** |
35
|
|
|
* Generic function called by \ExternalContentImporter |
36
|
|
|
* |
37
|
|
|
* @inheritdoc |
38
|
|
|
*/ |
39
|
|
|
public function transform($item, $parentObject, $strategy) |
40
|
|
|
{ |
41
|
|
|
$this->utils->log("START file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
42
|
|
|
|
43
|
|
|
if (!$item->checkIsType('file')) { |
44
|
|
|
$this->utils->log(" - Item not of type \'file\'. for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
45
|
|
|
$this->utils->log("END page-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
46
|
|
|
|
47
|
|
|
return false; |
|
|
|
|
48
|
|
|
} |
49
|
|
|
|
50
|
|
|
$source = $item->getSource(); |
51
|
|
|
|
52
|
|
|
// Sleep for Xms to reduce load on the remote server |
53
|
|
|
usleep((int) self::$sleep_multiplier * 1000); |
54
|
|
|
|
55
|
|
|
// Extract remote location of File |
56
|
|
|
$contentFields = $this->getContentFieldsAndSelectors($item, 'File'); |
57
|
|
|
|
58
|
|
|
// Default value for Title |
59
|
|
|
if (empty($contentFields['Filename'])) { |
60
|
|
|
$contentFields['Filename'] = ['content' => $item->externalId]; |
61
|
|
|
} |
62
|
|
|
|
63
|
|
|
$schema = $source->getSchemaForURL($item->AbsoluteURL, $item->ProcessedMIME); |
64
|
|
|
|
65
|
|
|
if (!$schema) { |
66
|
|
|
$this->utils->log(" - Couldn't find an import schema for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
67
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
68
|
|
|
return false; |
|
|
|
|
69
|
|
|
} |
70
|
|
|
|
71
|
|
|
$dataType = $schema->DataType; |
72
|
|
|
|
73
|
|
|
if (!$dataType) { |
74
|
|
|
$this->utils->log(" - DataType for migration schema is empty for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
75
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
76
|
|
|
throw new \Exception('DataType for migration schema is empty!'); |
77
|
|
|
} |
78
|
|
|
|
79
|
|
|
// Process incoming according to user-selected duplication strategy |
80
|
|
|
if (!$file = $this->duplicationStrategy($dataType, $item, $source->BaseUrl, $strategy, $parentObject)) { |
81
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
82
|
|
|
return false; |
|
|
|
|
83
|
|
|
} |
84
|
|
|
|
85
|
|
|
// Prepare $file with all the correct properties, ready for writing |
86
|
|
|
$tmpPath = $contentFields['tmp_path']; |
87
|
|
|
|
88
|
|
|
if (!$file = $this->buildFileProperties($file, $item, $source, $tmpPath)) { |
|
|
|
|
89
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
90
|
|
|
return false; |
|
|
|
|
91
|
|
|
} |
92
|
|
|
|
93
|
|
|
/* |
94
|
|
|
* File::onAfterWrite() calls File::updateFileSystem() which throws |
95
|
|
|
* an exception if the same image is attempted to be written. |
96
|
|
|
* N.b this was probably happening because we weren't versioning files through {@link Upload::load()} |
97
|
|
|
* and the same filename was being used. This should be fixed now (@see: self::versionFile()). |
98
|
|
|
*/ |
99
|
|
|
try { |
100
|
|
|
if (!$file->write()) { |
101
|
|
|
$this->utils->log(" - Not imported (no write): ", $item->AbsoluteURL, $item->ProcessedMIME); |
102
|
|
|
} |
103
|
|
|
|
104
|
|
|
// Remove garbage tmp files if/when left lying around |
105
|
|
|
if (file_exists($tmpPath)) { |
106
|
|
|
unlink($tmpPath); |
107
|
|
|
} |
108
|
|
|
|
109
|
|
|
$file->publishSingle(); |
110
|
|
|
|
111
|
|
|
// Generate thumbnails |
112
|
|
|
ImageThumbnailHelper::singleton()->run(); |
113
|
|
|
} catch (\Exception $e) { |
114
|
|
|
$this->utils->log($e->getMessage(), $item->AbsoluteURL, $item->ProcessedMIME); |
115
|
|
|
} |
116
|
|
|
|
117
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
118
|
|
|
|
119
|
|
|
return StaticSiteTransformResult::create($file, $item->stageChildren()); |
120
|
|
|
} |
121
|
|
|
|
122
|
|
|
/** |
123
|
|
|
* Build the properties required for a safely saved SilverStripe asset. |
124
|
|
|
* Attempts to detect and fix bad file-extensions based on the available Mime-Type. |
125
|
|
|
* |
126
|
|
|
* @param File $file |
127
|
|
|
* @param Object $item Object properties are used to fixup bad-file extensions or filenames with no |
128
|
|
|
* extension but which _do_ have a Mime-Type. |
129
|
|
|
* @param Object $source Source...TBC |
130
|
|
|
* @param string $tmpPath |
131
|
|
|
* @return mixed (boolean | File) |
132
|
|
|
*/ |
133
|
|
|
public function buildFileProperties(File $file, $item, $source, $tmpPath) |
134
|
|
|
{ |
135
|
|
|
$url = $item->AbsoluteURL; |
136
|
|
|
$mime = $item->ProcessedMIME; |
137
|
|
|
$assetsPath = $this->getDirHierarchy($url); |
138
|
|
|
|
139
|
|
|
/* |
140
|
|
|
* Run checks on original filename and name it as per default if nothing can be done with it. |
141
|
|
|
* '.zzz' not in framework/_config/mimetypes.yml and unlikely ever to be found in File, so fails gracefully. |
142
|
|
|
*/ |
143
|
|
|
$dummy = 'unknown.zzz'; |
144
|
|
|
$origFilename = pathinfo($url, PATHINFO_FILENAME); |
145
|
|
|
$origFilename = (mb_strlen($origFilename) > 0 ? $origFilename : $dummy); |
|
|
|
|
146
|
|
|
|
147
|
|
|
/* |
148
|
|
|
* Some assets come through with no file-extension, which confuses SS's File logic |
149
|
|
|
* and throws errors causing the import to stop dead. |
150
|
|
|
* Check for this and guess an appropriate file-extension, if possible. |
151
|
|
|
*/ |
152
|
|
|
$oldExt = pathinfo($url, PATHINFO_EXTENSION); |
153
|
|
|
$extIsValid = in_array($oldExt, $this->getSSExtensions()); |
154
|
|
|
// Only attempt to define and append a new filename ($newExt) if $oldExt is invalid |
155
|
|
|
$newExt = null; |
156
|
|
|
|
157
|
|
|
if (!$extIsValid && !$newExt = $this->mimeProcessor->ext_to_mime_compare($oldExt, $mime, true)) { |
|
|
|
|
158
|
|
|
$this->utils->log(" - WARNING: Bad file-extension: \"$oldExt\". Unable to assign new file-extension (#1) - DISCARDING.", $url, $mime); |
159
|
|
|
|
160
|
|
|
return false; |
161
|
|
|
} elseif ($newExt) { |
162
|
|
|
$useExtension = $newExt; |
163
|
|
|
$logMessagePt1 = "NOTICE: Bad file-extension: \"$oldExt\". Assigned new file-extension: \"$newExt\" based on MimeType."; |
164
|
|
|
$logMessagePt2 = PHP_EOL."\t - FROM: \"$url\"".PHP_EOL."\t - TO: \"$origFilename.$newExt\""; |
165
|
|
|
|
166
|
|
|
$this->utils->log(' - ' . $logMessagePt1 . $logMessagePt2, '', $mime); |
167
|
|
|
} else { |
168
|
|
|
// If $newExt didn't work, check again if $oldExt is invalid and just lose it. |
169
|
|
|
if (!$extIsValid) { |
170
|
|
|
$this->utils->log(" - WARNING: Bad file-extension: \"$oldExt\". Unable to assign new file-extension (#2) - DISCARDING.", $url, $mime); |
171
|
|
|
|
172
|
|
|
return false; |
173
|
|
|
} |
174
|
|
|
|
175
|
|
|
if ($this->mimeProcessor->isBadMimeType($mime)) { |
176
|
|
|
$this->utils->log(" - WARNING: Bad mime-type: \"$mime\". Unable to assign new file-extension (#3) - DISCARDING.", $url, $mime); |
177
|
|
|
|
178
|
|
|
return false; |
179
|
|
|
} |
180
|
|
|
|
181
|
|
|
$useExtension = $oldExt; |
182
|
|
|
} |
183
|
|
|
|
184
|
|
|
$folder = Folder::find_or_make($assetsPath); |
185
|
|
|
$fileName = sprintf('%s.%s', $origFilename, $useExtension); |
|
|
|
|
186
|
|
|
$file->setFromLocalFile($tmpPath, $fileName); |
187
|
|
|
$file->setFilename($fileName); |
188
|
|
|
$file->ParentID = $folder->ID; |
189
|
|
|
$file->StaticSiteContentSourceID = $source->ID; |
190
|
|
|
$file->StaticSiteURL = $url; |
191
|
|
|
$file->StaticSiteImportID = $this->getCurrentImportID(); |
192
|
|
|
|
193
|
|
|
$this->utils->log(" - NOTICE: \"File-properties built successfully for: ", $url, $mime); |
194
|
|
|
|
195
|
|
|
return $file; |
196
|
|
|
} |
197
|
|
|
|
198
|
|
|
/** |
199
|
|
|
* Determine the correct parent directory hierarchy from the imported file's remote-path, |
200
|
|
|
* such that it is mapped to the appropriate area under the main SilverStripe 'assets' directory. |
201
|
|
|
* |
202
|
|
|
* @param string $absolutePath The absolute path of this file on the remote server. |
203
|
|
|
* @param boolean $full Return absolute path from server's filesystem root |
204
|
|
|
* @return string The path to append to 'assets' and use as local cache dir. |
205
|
|
|
*/ |
206
|
|
|
public function getDirHierarchy(string $absoluteUrl, bool $full = false): string |
207
|
|
|
{ |
208
|
|
|
/* |
209
|
|
|
* Determine the top-level directory under 'assets' under-which this item's |
210
|
|
|
* dir-hierarchy will be created. |
211
|
|
|
*/ |
212
|
|
|
$parentDir = ''; |
213
|
|
|
$postVars = Controller::curr()->request->postVars(); |
214
|
|
|
|
215
|
|
|
if (!empty($postVars['FileMigrationTarget'])) { |
216
|
|
|
$parentDirData = DataObject::get_by_id(File::class, $postVars['FileMigrationTarget']); |
217
|
|
|
$parentDir = $parentDirData->Title; |
218
|
|
|
} |
219
|
|
|
|
220
|
|
|
$replaceUnused = preg_replace("#https?://(www.)?[^/]+#", '', $absoluteUrl); |
221
|
|
|
$fragments = explode('/', $replaceUnused); |
222
|
|
|
$filename = pathinfo($absoluteUrl, PATHINFO_FILENAME); |
223
|
|
|
$path = []; |
224
|
|
|
|
225
|
|
|
foreach ($fragments as $fragment) { |
226
|
|
|
$dontUse = (!strlen($fragment) || preg_match("#(http|$filename|www\.)+#", $fragment)); |
227
|
|
|
|
228
|
|
|
if ($dontUse) { |
229
|
|
|
continue; |
230
|
|
|
} |
231
|
|
|
|
232
|
|
|
array_push($path, $fragment); |
233
|
|
|
} |
234
|
|
|
|
235
|
|
|
$joinedPath = Controller::join_links($parentDir, implode('/', $path)); |
236
|
|
|
$fullPath = ASSETS_PATH . ($joinedPath ? DIRECTORY_SEPARATOR . $joinedPath : ''); |
237
|
|
|
|
238
|
|
|
return $full ? $fullPath : $joinedPath; |
239
|
|
|
} |
240
|
|
|
|
241
|
|
|
/** |
242
|
|
|
* Borrows logic from Upload::load() to ensure duplicated files get renamed |
243
|
|
|
* correctly. This therefore allows multiple versions of the same physical image |
244
|
|
|
* on the filesystem. |
245
|
|
|
* |
246
|
|
|
* @param string $relativeFilePath The path to the file relative to the 'assets' dir. |
247
|
|
|
* @return string $relativeFilePath |
248
|
|
|
* @throws LogicException |
249
|
|
|
*/ |
250
|
|
|
public function versionFile(string $relativeFilePath): string |
251
|
|
|
{ |
252
|
|
|
// A while loop provides the ability to continually add further duplicates with the right name |
253
|
|
|
$base = ASSETS_PATH; |
254
|
|
|
|
255
|
|
|
while(file_exists("$base/$relativeFilePath")) { |
256
|
|
|
$i = isset($i) ? ($i + 1) : 2; |
257
|
|
|
$oldFilePath = $relativeFilePath; |
258
|
|
|
|
259
|
|
|
// make sure archives retain valid extensions |
260
|
|
|
$isTarGz = substr($relativeFilePath, strlen($relativeFilePath) - strlen('.tar.gz')) == '.tar.gz'; |
261
|
|
|
$isTarBz2 = substr($relativeFilePath, strlen($relativeFilePath) - strlen('.tar.bz2')) == '.tar.bz2'; |
262
|
|
|
|
263
|
|
|
if ($isTarGz || $isTarBz2) { |
264
|
|
|
$relativeFilePath = preg_replace('#[0-9]*(\.tar\.[^.]+$)#', $i . "$1", $relativeFilePath); |
265
|
|
|
} elseif (strpos($relativeFilePath, '.') !== false) { |
266
|
|
|
$relativeFilePath = preg_replace('#[0-9]*(\.[^.]+$)#', $i . "$1", $relativeFilePath); |
267
|
|
|
} elseif (strpos($relativeFilePath, '_') !== false) { |
268
|
|
|
$relativeFilePath = preg_replace('#_([^_]+$)#', '_' . $i, $relativeFilePath); |
269
|
|
|
} else { |
270
|
|
|
$relativeFilePath .= '_' . $i; |
271
|
|
|
} |
272
|
|
|
|
273
|
|
|
// We've tried and failed, so we'll just end-up returning the original, that way we get _something_ |
274
|
|
|
if ($oldFilePath == $relativeFilePath && $i > 2) { |
275
|
|
|
$this->utils->log(" - Couldn't fix $relativeFilePath with $i attempts in " . __FUNCTION__); |
276
|
|
|
} |
277
|
|
|
} |
278
|
|
|
|
279
|
|
|
return $relativeFilePath; |
280
|
|
|
} |
281
|
|
|
} |
282
|
|
|
|
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"]
, you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths