1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace PhpTek\Exodus\Transform; |
4
|
|
|
|
5
|
|
|
use SilverStripe\Assets\File; |
6
|
|
|
use SilverStripe\Assets\Filesystem; |
7
|
|
|
use SilverStripe\Assets\Folder; |
8
|
|
|
use SilverStripe\Assets\Upload; |
9
|
|
|
use SilverStripe\Control\Controller; |
10
|
|
|
use SilverStripe\ORM\DataObject; |
11
|
|
|
|
12
|
|
|
/** |
13
|
|
|
* URL transformer specific to SilverStripe's `File` class for use with the module's |
14
|
|
|
* import content feature. It will re-create all available data of the scraped file into SilverStripe's |
15
|
|
|
* database and re-create a copy of the file itself on the filesystem. |
16
|
|
|
* If enabled in the CMS UI, links to imported images and documents in imported page-content will also be automatically |
17
|
|
|
* re-written. |
18
|
|
|
* |
19
|
|
|
* @todo write unit-test for unwritable assets dir. |
20
|
|
|
* |
21
|
|
|
* @package phptek/silverstripe-exodus |
22
|
|
|
* @author Sam Minee <[email protected]> |
23
|
|
|
* @author Russell Michell <[email protected]> |
24
|
|
|
* @see {@link StaticSiteDataTypeTransformer} |
25
|
|
|
*/ |
26
|
|
|
class StaticSiteFileTransformer extends StaticSiteDataTypeTransformer |
27
|
|
|
{ |
28
|
|
|
/** |
29
|
|
|
* Default value to pass to usleep() to reduce load on the remote server |
30
|
|
|
* |
31
|
|
|
* @var number |
32
|
|
|
*/ |
33
|
|
|
private static $sleep_multiplier = 10; |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* Generic function called by \ExternalContentImporter |
37
|
|
|
* |
38
|
|
|
* @inheritdoc |
39
|
|
|
*/ |
40
|
|
|
public function transform($item, $parentObject, $strategy) |
41
|
|
|
{ |
42
|
|
|
$this->utils->log("START file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
43
|
|
|
|
44
|
|
|
if (!$item->checkIsType('file')) { |
45
|
|
|
$this->utils->log(" - Item not of type \'file\'. for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
46
|
|
|
$this->utils->log("END page-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
47
|
|
|
|
48
|
|
|
return false; |
|
|
|
|
49
|
|
|
} |
50
|
|
|
|
51
|
|
|
$source = $item->getSource(); |
52
|
|
|
|
53
|
|
|
// Sleep for Xms to reduce load on the remote server |
54
|
|
|
usleep((int) self::$sleep_multiplier * 1000); |
55
|
|
|
|
56
|
|
|
// Extract remote location of File |
57
|
|
|
$contentFields = $this->getContentFieldsAndSelectors($item, 'File'); |
58
|
|
|
|
59
|
|
|
// Default value for Title |
60
|
|
|
if (empty($contentFields['Filename'])) { |
61
|
|
|
$contentFields['Filename'] = ['content' => $item->externalId]; |
62
|
|
|
} |
63
|
|
|
|
64
|
|
|
$schema = $source->getSchemaForURL($item->AbsoluteURL, $item->ProcessedMIME); |
65
|
|
|
|
66
|
|
|
if (!$schema) { |
67
|
|
|
$this->utils->log(" - Couldn't find an import schema for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
68
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
69
|
|
|
return false; |
|
|
|
|
70
|
|
|
} |
71
|
|
|
|
72
|
|
|
$dataType = $schema->DataType; |
73
|
|
|
|
74
|
|
|
if (!$dataType) { |
75
|
|
|
$this->utils->log(" - DataType for migration schema is empty for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
76
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
77
|
|
|
throw new \Exception('DataType for migration schema is empty!'); |
78
|
|
|
} |
79
|
|
|
|
80
|
|
|
// Process incoming according to user-selected duplication strategy |
81
|
|
|
if (!$file = $this->duplicationStrategy($dataType, $item, $source->BaseUrl, $strategy, $parentObject)) { |
82
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
83
|
|
|
return false; |
|
|
|
|
84
|
|
|
} |
85
|
|
|
|
86
|
|
|
// Prepare $file with all the correct properties, ready for writing |
87
|
|
|
$tmpPath = $contentFields['tmp_path']; |
88
|
|
|
|
89
|
|
|
if (!$file = $this->buildFileProperties($file, $item, $source, $tmpPath)) { |
|
|
|
|
90
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
91
|
|
|
return false; |
|
|
|
|
92
|
|
|
} |
93
|
|
|
|
94
|
|
|
/* |
95
|
|
|
* File::onAfterWrite() calls File::updateFileSystem() which throws |
96
|
|
|
* an exception if the same image is attempted to be written. |
97
|
|
|
* N.b this was probably happening because we weren't versioning files through {@link Upload::load()} |
98
|
|
|
* and the same filename was being used. This should be fixed now (@see: self::versionFile()). |
99
|
|
|
*/ |
100
|
|
|
try { |
101
|
|
|
if (!$file->write()) { |
102
|
|
|
$this->utils->log(" - Not imported (no write): ", $item->AbsoluteURL, $item->ProcessedMIME); |
103
|
|
|
} |
104
|
|
|
|
105
|
|
|
// Remove garbage tmp files if/when left lying around |
106
|
|
|
if (file_exists($tmpPath)) { |
107
|
|
|
unlink($tmpPath); |
108
|
|
|
} |
109
|
|
|
|
110
|
|
|
$file->publishSingle(); |
111
|
|
|
} catch (\Exception $e) { |
112
|
|
|
$this->utils->log($e->getMessage(), $item->AbsoluteURL, $item->ProcessedMIME); |
113
|
|
|
} |
114
|
|
|
|
115
|
|
|
$this->utils->log("END file-transform for: ", $item->AbsoluteURL, $item->ProcessedMIME); |
116
|
|
|
|
117
|
|
|
return StaticSiteTransformResult::create($file, $item->stageChildren()); |
118
|
|
|
} |
119
|
|
|
|
120
|
|
|
/** |
121
|
|
|
* Build the properties required for a safely saved SilverStripe asset. |
122
|
|
|
* Attempts to detect and fix bad file-extensions based on the available Mime-Type. |
123
|
|
|
* |
124
|
|
|
* @param File $file |
125
|
|
|
* @param Object $item Object properties are used to fixup bad-file extensions or filenames with no |
126
|
|
|
* extension but which _do_ have a Mime-Type. |
127
|
|
|
* @param Object $source Source...TBC |
128
|
|
|
* @param string $tmpPath |
129
|
|
|
* @return mixed (boolean | File) |
130
|
|
|
*/ |
131
|
|
|
public function buildFileProperties(File $file, $item, $source, $tmpPath) |
132
|
|
|
{ |
133
|
|
|
$url = $item->AbsoluteURL; |
134
|
|
|
$mime = $item->ProcessedMIME; |
135
|
|
|
$assetsPath = $this->getDirHierarchy($url); |
136
|
|
|
|
137
|
|
|
/* |
138
|
|
|
* Run checks on original filename and name it as per default if nothing can be done with it. |
139
|
|
|
* '.zzz' not in framework/_config/mimetypes.yml and unlikely ever to be found in File, so fails gracefully. |
140
|
|
|
*/ |
141
|
|
|
$dummy = 'unknown.zzz'; |
142
|
|
|
$origFilename = pathinfo($url, PATHINFO_FILENAME); |
143
|
|
|
$origFilename = (mb_strlen($origFilename) > 0 ? $origFilename : $dummy); |
|
|
|
|
144
|
|
|
|
145
|
|
|
/* |
146
|
|
|
* Some assets come through with no file-extension, which confuses SS's File logic |
147
|
|
|
* and throws errors causing the import to stop dead. |
148
|
|
|
* Check for this and guess an appropriate file-extension, if possible. |
149
|
|
|
*/ |
150
|
|
|
$oldExt = pathinfo($url, PATHINFO_EXTENSION); |
151
|
|
|
$extIsValid = in_array($oldExt, $this->getSSExtensions()); |
152
|
|
|
// Only attempt to define and append a new filename ($newExt) if $oldExt is invalid |
153
|
|
|
$newExt = null; |
154
|
|
|
|
155
|
|
|
if (!$extIsValid && !$newExt = $this->mimeProcessor->ext_to_mime_compare($oldExt, $mime, true)) { |
|
|
|
|
156
|
|
|
$this->utils->log(" - WARNING: Bad file-extension: \"$oldExt\". Unable to assign new file-extension (#1) - DISCARDING.", $url, $mime); |
157
|
|
|
|
158
|
|
|
return false; |
159
|
|
|
} elseif ($newExt) { |
160
|
|
|
$useExtension = $newExt; |
161
|
|
|
$logMessagePt1 = "NOTICE: Bad file-extension: \"$oldExt\". Assigned new file-extension: \"$newExt\" based on MimeType."; |
162
|
|
|
$logMessagePt2 = PHP_EOL."\t - FROM: \"$url\"".PHP_EOL."\t - TO: \"$origFilename.$newExt\""; |
163
|
|
|
|
164
|
|
|
$this->utils->log(' - ' . $logMessagePt1 . $logMessagePt2, '', $mime); |
165
|
|
|
} else { |
166
|
|
|
// If $newExt didn't work, check again if $oldExt is invalid and just lose it. |
167
|
|
|
if (!$extIsValid) { |
168
|
|
|
$this->utils->log(" - WARNING: Bad file-extension: \"$oldExt\". Unable to assign new file-extension (#2) - DISCARDING.", $url, $mime); |
169
|
|
|
|
170
|
|
|
return false; |
171
|
|
|
} |
172
|
|
|
|
173
|
|
|
if ($this->mimeProcessor->isBadMimeType($mime)) { |
174
|
|
|
$this->utils->log(" - WARNING: Bad mime-type: \"$mime\". Unable to assign new file-extension (#3) - DISCARDING.", $url, $mime); |
175
|
|
|
|
176
|
|
|
return false; |
177
|
|
|
} |
178
|
|
|
|
179
|
|
|
$useExtension = $oldExt; |
180
|
|
|
} |
181
|
|
|
|
182
|
|
|
$folder = Folder::find_or_make($assetsPath); |
183
|
|
|
$file->setFromLocalFile($tmpPath, $origFilename . '.' . $useExtension); |
|
|
|
|
184
|
|
|
$file->setFilename($origFilename . '.' . $useExtension); |
185
|
|
|
$file->ParentID = $folder->ID; |
186
|
|
|
$file->StaticSiteContentSourceID = $source->ID; |
187
|
|
|
$file->StaticSiteURL = $url; |
188
|
|
|
$file->StaticSiteImportID = $this->getCurrentImportID(); |
189
|
|
|
|
190
|
|
|
$this->utils->log(" - NOTICE: \"File-properties built successfully for: ", $url, $mime); |
191
|
|
|
|
192
|
|
|
return $file; |
193
|
|
|
} |
194
|
|
|
|
195
|
|
|
/** |
196
|
|
|
* Determine the correct parent directory hierarchy from the imported file's remote-path, |
197
|
|
|
* such that it is mapped to the appropriate area under the main SilverStripe 'assets' directory. |
198
|
|
|
* |
199
|
|
|
* @param string $absolutePath The absolute path of this file on the remote server. |
200
|
|
|
* @param boolean $full Return absolute path from server's filesystem root |
201
|
|
|
* @return string The path to append to 'assets' and use as local cache dir. |
202
|
|
|
*/ |
203
|
|
|
public function getDirHierarchy(string $absoluteUrl, bool $full = false): string |
204
|
|
|
{ |
205
|
|
|
/* |
206
|
|
|
* Determine the top-level directory under 'assets' under-which this item's |
207
|
|
|
* dir-hierarchy will be created. |
208
|
|
|
*/ |
209
|
|
|
$parentDir = ''; |
210
|
|
|
$postVars = Controller::curr()->request->postVars(); |
211
|
|
|
|
212
|
|
|
if (!empty($postVars['FileMigrationTarget'])) { |
213
|
|
|
$parentDirData = DataObject::get_by_id(File::class, $postVars['FileMigrationTarget']); |
214
|
|
|
$parentDir = $parentDirData->Title; |
215
|
|
|
} |
216
|
|
|
|
217
|
|
|
$replaceUnused = preg_replace("#https?://(www.)?[^/]+#", '', $absoluteUrl); |
218
|
|
|
$fragments = explode('/', $replaceUnused); |
219
|
|
|
$filename = pathinfo($absoluteUrl, PATHINFO_FILENAME); |
220
|
|
|
$path = []; |
221
|
|
|
|
222
|
|
|
foreach ($fragments as $fragment) { |
223
|
|
|
$dontUse = (!strlen($fragment) || preg_match("#(http|$filename|www\.)+#", $fragment)); |
224
|
|
|
|
225
|
|
|
if ($dontUse) { |
226
|
|
|
continue; |
227
|
|
|
} |
228
|
|
|
|
229
|
|
|
array_push($path, $fragment); |
230
|
|
|
} |
231
|
|
|
|
232
|
|
|
$joinedPath = Controller::join_links($parentDir, implode('/', $path)); |
233
|
|
|
$fullPath = ASSETS_PATH . ($joinedPath ? DIRECTORY_SEPARATOR . $joinedPath : ''); |
234
|
|
|
|
235
|
|
|
return $full ? $fullPath : $joinedPath; |
236
|
|
|
} |
237
|
|
|
|
238
|
|
|
/** |
239
|
|
|
* Borrows logic from Upload::load() to ensure duplicated files get renamed |
240
|
|
|
* correctly. This therefore allows multiple versions of the same physical image |
241
|
|
|
* on the filesystem. |
242
|
|
|
* |
243
|
|
|
* @param string $relativeFilePath The path to the file relative to the 'assets' dir. |
244
|
|
|
* @return string $relativeFilePath |
245
|
|
|
* @throws LogicException |
246
|
|
|
*/ |
247
|
|
|
public function versionFile(string $relativeFilePath): string |
248
|
|
|
{ |
249
|
|
|
// A while loop provides the ability to continually add further duplicates with the right name |
250
|
|
|
$base = ASSETS_PATH; |
251
|
|
|
|
252
|
|
|
while(file_exists("$base/$relativeFilePath")) { |
253
|
|
|
$i = isset($i) ? ($i + 1) : 2; |
254
|
|
|
$oldFilePath = $relativeFilePath; |
255
|
|
|
|
256
|
|
|
// make sure archives retain valid extensions |
257
|
|
|
$isTarGz = substr($relativeFilePath, strlen($relativeFilePath) - strlen('.tar.gz')) == '.tar.gz'; |
258
|
|
|
$isTarBz2 = substr($relativeFilePath, strlen($relativeFilePath) - strlen('.tar.bz2')) == '.tar.bz2'; |
259
|
|
|
|
260
|
|
|
if ($isTarGz || $isTarBz2) { |
261
|
|
|
$relativeFilePath = preg_replace('#[0-9]*(\.tar\.[^.]+$)#', $i . "$1", $relativeFilePath); |
262
|
|
|
} elseif (strpos($relativeFilePath, '.') !== false) { |
263
|
|
|
$relativeFilePath = preg_replace('#[0-9]*(\.[^.]+$)#', $i . "$1", $relativeFilePath); |
264
|
|
|
} elseif (strpos($relativeFilePath, '_') !== false) { |
265
|
|
|
$relativeFilePath = preg_replace('#_([^_]+$)#', '_' . $i, $relativeFilePath); |
266
|
|
|
} else { |
267
|
|
|
$relativeFilePath .= '_' . $i; |
268
|
|
|
} |
269
|
|
|
|
270
|
|
|
// We've tried and failed, so we'll just end-up returning the original, that way we get _something_ |
271
|
|
|
if ($oldFilePath == $relativeFilePath && $i > 2) { |
272
|
|
|
$this->utils->log(" - Couldn't fix $relativeFilePath with $i attempts in " . __FUNCTION__); |
273
|
|
|
} |
274
|
|
|
} |
275
|
|
|
|
276
|
|
|
return $relativeFilePath; |
277
|
|
|
} |
278
|
|
|
} |
279
|
|
|
|
In the issue above, the returned value is violating the contract defined by the mentioned interface.
Let's take a look at an example: