|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
namespace PhpTek\Exodus\Transform; |
|
4
|
|
|
|
|
5
|
|
|
use SilverStripe\Assets\File; |
|
6
|
|
|
use SilverStripe\Assets\Folder; |
|
7
|
|
|
use SilverStripe\Control\Controller; |
|
8
|
|
|
use SilverStripe\ORM\DataObject; |
|
9
|
|
|
use SilverStripe\AssetAdmin\Helper\ImageThumbnailHelper; |
|
|
|
|
|
|
10
|
|
|
|
|
11
|
|
|
/** |
|
12
|
|
|
* URL transformer specific to SilverStripe's `File` class for use with the module's |
|
13
|
|
|
* import content feature. It will re-create all available data of the scraped file into SilverStripe's |
|
14
|
|
|
* database and re-create a copy of the file itself on the filesystem. |
|
15
|
|
|
* If enabled in the CMS UI, links to imported images and documents in imported page-content will also be automatically |
|
16
|
|
|
* re-written. |
|
17
|
|
|
* |
|
18
|
|
|
* @todo write unit-test for unwritable assets dir. |
|
19
|
|
|
* |
|
20
|
|
|
* @package phptek/silverstripe-exodus |
|
21
|
|
|
* @author Sam Minee <[email protected]> |
|
22
|
|
|
* @author Russell Michell <[email protected]> |
|
23
|
|
|
* @see {@link StaticSiteDataTypeTransformer} |
|
24
|
|
|
*/ |
|
25
|
|
|
class StaticSiteFileTransformer extends StaticSiteDataTypeTransformer |
|
26
|
|
|
{ |
|
27
|
|
|
/** |
|
28
|
|
|
* Default value to pass to usleep() to reduce load on the remote server |
|
29
|
|
|
* |
|
30
|
|
|
* @var number |
|
31
|
|
|
*/ |
|
32
|
|
|
private static $sleep_multiplier = 10; |
|
33
|
|
|
|
|
34
|
|
|
/** |
|
35
|
|
|
* Generic function called by \ExternalContentImporter |
|
36
|
|
|
* |
|
37
|
|
|
* @inheritdoc |
|
38
|
|
|
*/ |
|
39
|
|
|
public function transform($item, $parentObject, $strategy) |
|
40
|
|
|
{ |
|
41
|
|
|
$this->utils->log("START file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
|
42
|
|
|
|
|
43
|
|
|
if (!$item->checkIsType('file')) { |
|
44
|
|
|
$this->utils->log(" - Item not of type \'file\'. for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
|
45
|
|
|
$this->utils->log("END page-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
|
46
|
|
|
|
|
47
|
|
|
return false; |
|
|
|
|
|
|
48
|
|
|
} |
|
49
|
|
|
|
|
50
|
|
|
$source = $item->getSource(); |
|
51
|
|
|
|
|
52
|
|
|
// Sleep for Xms to reduce load on the remote server |
|
53
|
|
|
usleep((int) self::$sleep_multiplier * 1000); |
|
54
|
|
|
|
|
55
|
|
|
// Extract remote location of File |
|
56
|
|
|
$contentFields = $this->getContentFieldsAndSelectors($item, 'File'); |
|
57
|
|
|
|
|
58
|
|
|
// Default value for Title |
|
59
|
|
|
if (empty($contentFields['Filename'])) { |
|
60
|
|
|
$contentFields['Filename'] = ['content' => $item->externalId]; |
|
61
|
|
|
} |
|
62
|
|
|
|
|
63
|
|
|
$schema = $source->getSchemaForURL($item->AbsoluteURL, $item->ProcessedMIME); |
|
64
|
|
|
|
|
65
|
|
|
if (!$schema) { |
|
66
|
|
|
$this->utils->log(" - Couldn't find an import schema for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
|
67
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
|
68
|
|
|
return false; |
|
|
|
|
|
|
69
|
|
|
} |
|
70
|
|
|
|
|
71
|
|
|
$dataType = $schema->DataType; |
|
72
|
|
|
|
|
73
|
|
|
if (!$dataType) { |
|
74
|
|
|
$this->utils->log(" - DataType for migration schema is empty for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
|
75
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
|
76
|
|
|
throw new \Exception('DataType for migration schema is empty!'); |
|
77
|
|
|
} |
|
78
|
|
|
|
|
79
|
|
|
// Process incoming according to user-selected duplication strategy |
|
80
|
|
|
if (!$file = $this->duplicationStrategy($dataType, $item, $source->BaseUrl, $strategy, $parentObject)) { |
|
81
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
|
82
|
|
|
return false; |
|
|
|
|
|
|
83
|
|
|
} |
|
84
|
|
|
|
|
85
|
|
|
// Prepare $file with all the correct properties, ready for writing |
|
86
|
|
|
$tmpPath = $contentFields['tmp_path']; |
|
87
|
|
|
|
|
88
|
|
|
if (!$file = $this->buildFileProperties($file, $item, $source, $tmpPath)) { |
|
|
|
|
|
|
89
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
|
90
|
|
|
return false; |
|
|
|
|
|
|
91
|
|
|
} |
|
92
|
|
|
|
|
93
|
|
|
/* |
|
94
|
|
|
* File::onAfterWrite() calls File::updateFileSystem() which throws |
|
95
|
|
|
* an exception if the same image is attempted to be written. |
|
96
|
|
|
* N.b this was probably happening because we weren't versioning files through {@link Upload::load()} |
|
97
|
|
|
* and the same filename was being used. This should be fixed now (@see: self::versionFile()). |
|
98
|
|
|
*/ |
|
99
|
|
|
try { |
|
100
|
|
|
if (!$file->write()) { |
|
101
|
|
|
$this->utils->log(" - Not imported (no write): ", $item->AbsoluteURL, $item->ProcessedMIME); |
|
102
|
|
|
} |
|
103
|
|
|
|
|
104
|
|
|
// Remove garbage tmp files if/when left lying around |
|
105
|
|
|
if (file_exists($tmpPath)) { |
|
106
|
|
|
unlink($tmpPath); |
|
107
|
|
|
} |
|
108
|
|
|
|
|
109
|
|
|
$file->publishSingle(); |
|
110
|
|
|
|
|
111
|
|
|
// Generate thumbnails |
|
112
|
|
|
ImageThumbnailHelper::singleton()->run(); |
|
113
|
|
|
} catch (\Exception $e) { |
|
114
|
|
|
$this->utils->log($e->getMessage(), $item->AbsoluteURL, $item->ProcessedMIME); |
|
115
|
|
|
} |
|
116
|
|
|
|
|
117
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
|
118
|
|
|
|
|
119
|
|
|
return StaticSiteTransformResult::create($file, $item->stageChildren()); |
|
120
|
|
|
} |
|
121
|
|
|
|
|
122
|
|
|
/** |
|
123
|
|
|
* Build the properties required for a safely saved SilverStripe asset. |
|
124
|
|
|
* Attempts to detect and fix bad file-extensions based on the available Mime-Type. |
|
125
|
|
|
* |
|
126
|
|
|
* @param File $file |
|
127
|
|
|
* @param Object $item Object properties are used to fixup bad-file extensions or filenames with no |
|
128
|
|
|
* extension but which _do_ have a Mime-Type. |
|
129
|
|
|
* @param Object $source Source...TBC |
|
130
|
|
|
* @param string $tmpPath |
|
131
|
|
|
* @return mixed (boolean | File) |
|
132
|
|
|
*/ |
|
133
|
|
|
public function buildFileProperties(File $file, $item, $source, $tmpPath) |
|
134
|
|
|
{ |
|
135
|
|
|
$url = $item->AbsoluteURL; |
|
136
|
|
|
$mime = $item->ProcessedMIME; |
|
137
|
|
|
$assetsPath = $this->getDirHierarchy($url); |
|
138
|
|
|
|
|
139
|
|
|
/* |
|
140
|
|
|
* Run checks on original filename and name it as per default if nothing can be done with it. |
|
141
|
|
|
* '.zzz' not in framework/_config/mimetypes.yml and unlikely ever to be found in File, so fails gracefully. |
|
142
|
|
|
*/ |
|
143
|
|
|
$dummy = 'unknown.zzz'; |
|
144
|
|
|
$origFilename = pathinfo($url, PATHINFO_FILENAME); |
|
145
|
|
|
$origFilename = (mb_strlen($origFilename) > 0 ? $origFilename : $dummy); |
|
|
|
|
|
|
146
|
|
|
|
|
147
|
|
|
/* |
|
148
|
|
|
* Some assets come through with no file-extension, which confuses SS's File logic |
|
149
|
|
|
* and throws errors causing the import to stop dead. |
|
150
|
|
|
* Check for this and guess an appropriate file-extension, if possible. |
|
151
|
|
|
*/ |
|
152
|
|
|
$oldExt = pathinfo($url, PATHINFO_EXTENSION); |
|
153
|
|
|
$extIsValid = in_array($oldExt, $this->getSSExtensions()); |
|
154
|
|
|
// Only attempt to define and append a new filename ($newExt) if $oldExt is invalid |
|
155
|
|
|
$newExt = null; |
|
156
|
|
|
|
|
157
|
|
|
if (!$extIsValid && !$newExt = $this->mimeProcessor->ext_to_mime_compare($oldExt, $mime, true)) { |
|
|
|
|
|
|
158
|
|
|
$this->utils->log(" - WARNING: Bad file-extension: \"$oldExt\". Unable to assign new file-extension (#1) - DISCARDING.", $url, $mime); |
|
159
|
|
|
|
|
160
|
|
|
return false; |
|
161
|
|
|
} elseif ($newExt) { |
|
162
|
|
|
$useExtension = $newExt; |
|
163
|
|
|
$logMessagePt1 = "NOTICE: Bad file-extension: \"$oldExt\". Assigned new file-extension: \"$newExt\" based on MimeType."; |
|
164
|
|
|
$logMessagePt2 = PHP_EOL."\t - FROM: \"$url\"".PHP_EOL."\t - TO: \"$origFilename.$newExt\""; |
|
165
|
|
|
|
|
166
|
|
|
$this->utils->log(' - ' . $logMessagePt1 . $logMessagePt2, '', $mime); |
|
167
|
|
|
} else { |
|
168
|
|
|
// If $newExt didn't work, check again if $oldExt is invalid and just lose it. |
|
169
|
|
|
if (!$extIsValid) { |
|
170
|
|
|
$this->utils->log(" - WARNING: Bad file-extension: \"$oldExt\". Unable to assign new file-extension (#2) - DISCARDING.", $url, $mime); |
|
171
|
|
|
|
|
172
|
|
|
return false; |
|
173
|
|
|
} |
|
174
|
|
|
|
|
175
|
|
|
if ($this->mimeProcessor->isBadMimeType($mime)) { |
|
176
|
|
|
$this->utils->log(" - WARNING: Bad mime-type: \"$mime\". Unable to assign new file-extension (#3) - DISCARDING.", $url, $mime); |
|
177
|
|
|
|
|
178
|
|
|
return false; |
|
179
|
|
|
} |
|
180
|
|
|
|
|
181
|
|
|
$useExtension = $oldExt; |
|
182
|
|
|
} |
|
183
|
|
|
|
|
184
|
|
|
$folder = Folder::find_or_make($assetsPath); |
|
185
|
|
|
$fileName = sprintf('%s.%s', $origFilename, $useExtension); |
|
|
|
|
|
|
186
|
|
|
$file->setFromLocalFile($tmpPath, $fileName); |
|
187
|
|
|
$file->setFilename($fileName); |
|
188
|
|
|
$file->ParentID = $folder->ID; |
|
189
|
|
|
$file->StaticSiteContentSourceID = $source->ID; |
|
190
|
|
|
$file->StaticSiteURL = $url; |
|
191
|
|
|
$file->StaticSiteImportID = $this->getCurrentImportID(); |
|
192
|
|
|
|
|
193
|
|
|
$this->utils->log(" - NOTICE: \"File-properties built successfully for: ", $url, $mime); |
|
194
|
|
|
|
|
195
|
|
|
return $file; |
|
196
|
|
|
} |
|
197
|
|
|
|
|
198
|
|
|
/** |
|
199
|
|
|
* Determine the correct parent directory hierarchy from the imported file's remote-path, |
|
200
|
|
|
* such that it is mapped to the appropriate area under the main SilverStripe 'assets' directory. |
|
201
|
|
|
* |
|
202
|
|
|
* @param string $absolutePath The absolute path of this file on the remote server. |
|
203
|
|
|
* @param boolean $full Return absolute path from server's filesystem root |
|
204
|
|
|
* @return string The path to append to 'assets' and use as local cache dir. |
|
205
|
|
|
*/ |
|
206
|
|
|
public function getDirHierarchy(string $absoluteUrl, bool $full = false): string |
|
207
|
|
|
{ |
|
208
|
|
|
/* |
|
209
|
|
|
* Determine the top-level directory under 'assets' under-which this item's |
|
210
|
|
|
* dir-hierarchy will be created. |
|
211
|
|
|
*/ |
|
212
|
|
|
$parentDir = ''; |
|
213
|
|
|
$postVars = Controller::curr()->request->postVars(); |
|
214
|
|
|
|
|
215
|
|
|
if (!empty($postVars['FileMigrationTarget'])) { |
|
216
|
|
|
$parentDirData = DataObject::get_by_id(File::class, $postVars['FileMigrationTarget']); |
|
217
|
|
|
$parentDir = $parentDirData->Title; |
|
218
|
|
|
} |
|
219
|
|
|
|
|
220
|
|
|
$replaceUnused = preg_replace("#https?://(www.)?[^/]+#", '', $absoluteUrl); |
|
221
|
|
|
$fragments = explode('/', $replaceUnused); |
|
222
|
|
|
$filename = pathinfo($absoluteUrl, PATHINFO_FILENAME); |
|
223
|
|
|
$path = []; |
|
224
|
|
|
|
|
225
|
|
|
foreach ($fragments as $fragment) { |
|
226
|
|
|
$dontUse = (!strlen($fragment) || preg_match("#(http|$filename|www\.)+#", $fragment)); |
|
227
|
|
|
|
|
228
|
|
|
if ($dontUse) { |
|
229
|
|
|
continue; |
|
230
|
|
|
} |
|
231
|
|
|
|
|
232
|
|
|
array_push($path, $fragment); |
|
233
|
|
|
} |
|
234
|
|
|
|
|
235
|
|
|
$joinedPath = Controller::join_links($parentDir, implode('/', $path)); |
|
236
|
|
|
$fullPath = ASSETS_PATH . ($joinedPath ? DIRECTORY_SEPARATOR . $joinedPath : ''); |
|
237
|
|
|
|
|
238
|
|
|
return $full ? $fullPath : $joinedPath; |
|
239
|
|
|
} |
|
240
|
|
|
|
|
241
|
|
|
/** |
|
242
|
|
|
* Borrows logic from Upload::load() to ensure duplicated files get renamed |
|
243
|
|
|
* correctly. This therefore allows multiple versions of the same physical image |
|
244
|
|
|
* on the filesystem. |
|
245
|
|
|
* |
|
246
|
|
|
* @param string $relativeFilePath The path to the file relative to the 'assets' dir. |
|
247
|
|
|
* @return string $relativeFilePath |
|
248
|
|
|
* @throws LogicException |
|
249
|
|
|
*/ |
|
250
|
|
|
public function versionFile(string $relativeFilePath): string |
|
251
|
|
|
{ |
|
252
|
|
|
// A while loop provides the ability to continually add further duplicates with the right name |
|
253
|
|
|
$base = ASSETS_PATH; |
|
254
|
|
|
|
|
255
|
|
|
while(file_exists("$base/$relativeFilePath")) { |
|
256
|
|
|
$i = isset($i) ? ($i + 1) : 2; |
|
257
|
|
|
$oldFilePath = $relativeFilePath; |
|
258
|
|
|
|
|
259
|
|
|
// make sure archives retain valid extensions |
|
260
|
|
|
$isTarGz = substr($relativeFilePath, strlen($relativeFilePath) - strlen('.tar.gz')) == '.tar.gz'; |
|
261
|
|
|
$isTarBz2 = substr($relativeFilePath, strlen($relativeFilePath) - strlen('.tar.bz2')) == '.tar.bz2'; |
|
262
|
|
|
|
|
263
|
|
|
if ($isTarGz || $isTarBz2) { |
|
264
|
|
|
$relativeFilePath = preg_replace('#[0-9]*(\.tar\.[^.]+$)#', $i . "$1", $relativeFilePath); |
|
265
|
|
|
} elseif (strpos($relativeFilePath, '.') !== false) { |
|
266
|
|
|
$relativeFilePath = preg_replace('#[0-9]*(\.[^.]+$)#', $i . "$1", $relativeFilePath); |
|
267
|
|
|
} elseif (strpos($relativeFilePath, '_') !== false) { |
|
268
|
|
|
$relativeFilePath = preg_replace('#_([^_]+$)#', '_' . $i, $relativeFilePath); |
|
269
|
|
|
} else { |
|
270
|
|
|
$relativeFilePath .= '_' . $i; |
|
271
|
|
|
} |
|
272
|
|
|
|
|
273
|
|
|
// We've tried and failed, so we'll just end-up returning the original, that way we get _something_ |
|
274
|
|
|
if ($oldFilePath == $relativeFilePath && $i > 2) { |
|
275
|
|
|
$this->utils->log(" - Couldn't fix $relativeFilePath with $i attempts in " . __FUNCTION__); |
|
276
|
|
|
} |
|
277
|
|
|
} |
|
278
|
|
|
|
|
279
|
|
|
return $relativeFilePath; |
|
280
|
|
|
} |
|
281
|
|
|
} |
|
282
|
|
|
|
The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g.
excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths