|
1
|
|
|
<?php |
|
2
|
|
|
|
|
3
|
|
|
/** |
|
4
|
|
|
* TechDivision\Import\Plugins\SubjectPlugin |
|
5
|
|
|
* |
|
6
|
|
|
* NOTICE OF LICENSE |
|
7
|
|
|
* |
|
8
|
|
|
* This source file is subject to the Open Software License (OSL 3.0) |
|
9
|
|
|
* that is available through the world-wide-web at this URL: |
|
10
|
|
|
* http://opensource.org/licenses/osl-3.0.php |
|
11
|
|
|
* |
|
12
|
|
|
* PHP version 5 |
|
13
|
|
|
* |
|
14
|
|
|
* @author Tim Wagner <[email protected]> |
|
15
|
|
|
* @copyright 2016 TechDivision GmbH <[email protected]> |
|
16
|
|
|
* @license http://opensource.org/licenses/osl-3.0.php Open Software License (OSL 3.0) |
|
17
|
|
|
* @link https://github.com/techdivision/import |
|
18
|
|
|
* @link http://www.techdivision.com |
|
19
|
|
|
*/ |
|
20
|
|
|
|
|
21
|
|
|
namespace TechDivision\Import\Plugins; |
|
22
|
|
|
|
|
23
|
|
|
use TechDivision\Import\Utils\BunchKeys; |
|
24
|
|
|
use TechDivision\Import\Utils\RegistryKeys; |
|
25
|
|
|
use TechDivision\Import\ApplicationInterface; |
|
26
|
|
|
use TechDivision\Import\Exceptions\LineNotFoundException; |
|
27
|
|
|
use TechDivision\Import\Exceptions\MissingOkFileException; |
|
28
|
|
|
use TechDivision\Import\Configuration\SubjectConfigurationInterface; |
|
29
|
|
|
|
|
30
|
|
|
/** |
|
31
|
|
|
* Plugin that processes the subjects. |
|
32
|
|
|
* |
|
33
|
|
|
* @author Tim Wagner <[email protected]> |
|
34
|
|
|
* @copyright 2016 TechDivision GmbH <[email protected]> |
|
35
|
|
|
* @license http://opensource.org/licenses/osl-3.0.php Open Software License (OSL 3.0) |
|
36
|
|
|
* @link https://github.com/techdivision/import |
|
37
|
|
|
* @link http://www.techdivision.com |
|
38
|
|
|
*/ |
|
39
|
|
|
class SubjectPlugin extends AbstractPlugin |
|
40
|
|
|
{ |
|
41
|
|
|
|
|
42
|
|
|
/** |
|
43
|
|
|
* The matches for the last processed CSV filename. |
|
44
|
|
|
* |
|
45
|
|
|
* @var array |
|
46
|
|
|
*/ |
|
47
|
|
|
protected $matches = array(); |
|
48
|
|
|
|
|
49
|
|
|
/** |
|
50
|
|
|
* The number of imported bunches. |
|
51
|
|
|
* |
|
52
|
|
|
* @var integer |
|
53
|
|
|
*/ |
|
54
|
|
|
protected $bunches = 0; |
|
55
|
|
|
|
|
56
|
|
|
/** |
|
57
|
|
|
* The subject executor instance. |
|
58
|
|
|
* |
|
59
|
|
|
* @var \TechDivision\Import\Plugins\SubjectExecutorInterface |
|
60
|
|
|
*/ |
|
61
|
|
|
protected $subjectExecutor; |
|
62
|
|
|
|
|
63
|
|
|
/** |
|
64
|
|
|
* Initializes the plugin with the application instance. |
|
65
|
|
|
* |
|
66
|
|
|
* @param \TechDivision\Import\ApplicationInterface $application The application instance |
|
67
|
|
|
* @param \TechDivision\Import\Plugins\SubjectExecutorInterface $subjectExecutor The subject executor instance |
|
68
|
|
|
*/ |
|
69
|
6 |
|
public function __construct( |
|
70
|
|
|
ApplicationInterface $application, |
|
71
|
|
|
SubjectExecutorInterface $subjectExecutor |
|
72
|
|
|
) { |
|
73
|
|
|
|
|
74
|
|
|
// call the parent constructor |
|
75
|
6 |
|
parent::__construct($application); |
|
76
|
|
|
|
|
77
|
|
|
// initialize the callback/observer visitors |
|
78
|
6 |
|
$this->subjectExecutor = $subjectExecutor; |
|
79
|
6 |
|
} |
|
80
|
|
|
|
|
81
|
|
|
|
|
82
|
|
|
/** |
|
83
|
|
|
* Process the plugin functionality. |
|
84
|
|
|
* |
|
85
|
|
|
* @return void |
|
86
|
|
|
* @throws \Exception Is thrown, if the plugin can not be processed |
|
87
|
|
|
*/ |
|
88
|
4 |
|
public function process() |
|
89
|
|
|
{ |
|
90
|
|
|
try { |
|
91
|
|
|
// immediately add the PID to lock this import process |
|
92
|
4 |
|
$this->lock(); |
|
93
|
|
|
|
|
94
|
|
|
// load the plugin's subjects |
|
95
|
4 |
|
$subjects = $this->getPluginConfiguration()->getSubjects(); |
|
96
|
|
|
|
|
97
|
|
|
// initialize the array for the status |
|
98
|
4 |
|
$status = array(); |
|
99
|
|
|
|
|
100
|
|
|
// initialize the status information for the subjects |
|
101
|
|
|
/** @var \TechDivision\Import\Configuration\SubjectConfigurationInterface $subject */ |
|
102
|
4 |
|
foreach ($subjects as $subject) { |
|
103
|
3 |
|
$status[$subject->getPrefix()] = array(); |
|
104
|
|
|
} |
|
105
|
|
|
|
|
106
|
|
|
// and update it in the registry |
|
107
|
4 |
|
$this->getRegistryProcessor()->mergeAttributesRecursive($this->getSerial(), $status); |
|
108
|
|
|
|
|
109
|
|
|
// process all the subjects found in the system configuration |
|
110
|
|
|
/** @var \TechDivision\Import\Configuration\SubjectConfigurationInterface $subject */ |
|
111
|
4 |
|
foreach ($subjects as $subject) { |
|
112
|
3 |
|
$this->processSubject($subject); |
|
113
|
|
|
} |
|
114
|
|
|
|
|
115
|
|
|
// update the number of imported bunches |
|
116
|
2 |
|
$this->getRegistryProcessor()->mergeAttributesRecursive( |
|
117
|
2 |
|
$this->getSerial(), |
|
118
|
2 |
|
array(RegistryKeys::BUNCHES => $this->bunches) |
|
119
|
|
|
); |
|
120
|
|
|
|
|
121
|
|
|
// stop the application if we don't process ANY bunch |
|
122
|
2 |
|
if ($this->bunches === 0) { |
|
123
|
1 |
|
$this->getApplication()->stop( |
|
124
|
1 |
|
sprintf( |
|
125
|
1 |
|
'Operation %s has been stopped by %s, because no import files can be found in directory %s', |
|
126
|
1 |
|
$this->getConfiguration()->getOperationName(), |
|
127
|
1 |
|
get_class($this), |
|
128
|
1 |
|
$this->getConfiguration()->getSourceDir() |
|
129
|
|
|
) |
|
130
|
|
|
); |
|
131
|
|
|
} |
|
132
|
|
|
|
|
133
|
|
|
// finally, if a PID has been set (because CSV files has been found), |
|
134
|
|
|
// remove it from the PID file to unlock the importer |
|
135
|
2 |
|
$this->unlock(); |
|
136
|
|
|
|
|
|
|
|
|
|
137
|
2 |
|
} catch (\Exception $e) { |
|
138
|
|
|
// finally, if a PID has been set (because CSV files has been found), |
|
139
|
|
|
// remove it from the PID file to unlock the importer |
|
140
|
2 |
|
$this->unlock(); |
|
141
|
|
|
|
|
142
|
|
|
// re-throw the exception |
|
143
|
2 |
|
throw $e; |
|
144
|
|
|
} |
|
145
|
2 |
|
} |
|
146
|
|
|
|
|
147
|
|
|
/** |
|
148
|
|
|
* Loads the files from the source directory and return's them sorted. |
|
149
|
|
|
* |
|
150
|
|
|
* @param \TechDivision\Import\Configuration\SubjectConfigurationInterface $subject The source directory to parse for files |
|
151
|
|
|
* |
|
152
|
|
|
* @return array The array with the files matching the subjects suffix |
|
153
|
|
|
* @throws \Exception Is thrown, when the source directory is NOT available |
|
154
|
|
|
*/ |
|
155
|
3 |
|
protected function loadFiles(SubjectConfigurationInterface $subject) |
|
156
|
|
|
{ |
|
157
|
|
|
|
|
158
|
|
|
// clear the filecache |
|
159
|
3 |
|
clearstatcache(); |
|
160
|
|
|
|
|
161
|
|
|
// load the actual status |
|
162
|
3 |
|
$status = $this->getRegistryProcessor()->getAttribute($this->getSerial()); |
|
163
|
|
|
|
|
164
|
|
|
// query whether or not the configured source directory is available |
|
165
|
3 |
View Code Duplication |
if (!is_dir($sourceDir = $status[RegistryKeys::SOURCE_DIRECTORY])) { |
|
|
|
|
|
|
166
|
1 |
|
throw new \Exception(sprintf('Source directory %s for subject %s is not available!', $sourceDir, $subject->getId())); |
|
167
|
|
|
} |
|
168
|
|
|
|
|
169
|
|
|
// initialize the array with the files matching the suffix found in the source directory |
|
170
|
2 |
|
$files = glob(sprintf('%s/*.%s', $sourceDir, $subject->getSuffix())); |
|
171
|
|
|
|
|
172
|
|
|
// sort the files for the apropriate order |
|
173
|
2 |
|
usort($files, function ($a, $b) { |
|
174
|
|
|
return strcmp($a, $b); |
|
175
|
2 |
|
}); |
|
176
|
|
|
|
|
177
|
|
|
// return the sorted files |
|
178
|
2 |
|
return $files; |
|
179
|
|
|
} |
|
180
|
|
|
|
|
181
|
|
|
/** |
|
182
|
|
|
* Process the subject with the passed name/identifier. |
|
183
|
|
|
* |
|
184
|
|
|
* We create a new, fresh and separate subject for EVERY file here, because this would be |
|
185
|
|
|
* the starting point to parallelize the import process in a multithreaded/multiprocessed |
|
186
|
|
|
* environment. |
|
187
|
|
|
* |
|
188
|
|
|
* @param \TechDivision\Import\Configuration\SubjectConfigurationInterface $subject The subject configuration |
|
189
|
|
|
* |
|
190
|
|
|
* @return void |
|
191
|
|
|
*/ |
|
192
|
3 |
|
protected function processSubject(SubjectConfigurationInterface $subject) |
|
193
|
|
|
{ |
|
194
|
|
|
|
|
195
|
|
|
// initialize the bunch number and the serial |
|
196
|
3 |
|
$bunches = 0; |
|
197
|
3 |
|
$serial = $this->getSerial(); |
|
198
|
|
|
|
|
199
|
|
|
// load the files |
|
200
|
3 |
|
$files = $this->loadFiles($subject); |
|
201
|
|
|
|
|
202
|
|
|
// iterate through all CSV files and process the subjects |
|
203
|
2 |
|
foreach ($files as $pathname) { |
|
204
|
|
|
// query whether or not that the file is part of the actual bunch |
|
205
|
2 |
|
if ($this->isPartOfBunch($subject->getPrefix(), $subject->getSuffix(), $pathname)) { |
|
206
|
|
|
// query whether or not the subject needs an OK file, |
|
207
|
|
|
// if yes remove the filename from the file |
|
208
|
2 |
|
if ($subject->isOkFileNeeded()) { |
|
209
|
2 |
|
$this->removeFromOkFile($pathname, $subject->getSuffix()); |
|
210
|
|
|
} |
|
211
|
|
|
|
|
212
|
|
|
// initialize the subject and import the bunch |
|
213
|
2 |
|
$this->subjectExecutor->execute($subject, $this->matches, $serial, $pathname); |
|
214
|
|
|
// raise the number of the imported bunches |
|
215
|
1 |
|
$bunches++; |
|
216
|
|
|
} |
|
217
|
|
|
} |
|
218
|
|
|
|
|
219
|
|
|
// raise the bunch number by the imported bunches |
|
220
|
1 |
|
$this->bunches = $this->bunches + $bunches; |
|
221
|
|
|
|
|
222
|
|
|
// reset the matches, because the exported artefacts |
|
223
|
1 |
|
$this->matches = array(); |
|
224
|
|
|
|
|
225
|
|
|
// and and log a message that the subject has been processed |
|
226
|
1 |
|
$this->getSystemLogger()->debug( |
|
227
|
1 |
|
sprintf('Successfully processed subject %s with %d bunch(es)!', $subject->getId(), $bunches) |
|
228
|
|
|
); |
|
229
|
1 |
|
} |
|
230
|
|
|
|
|
231
|
|
|
/** |
|
232
|
|
|
* Queries whether or not, the passed filename is part of a bunch or not. |
|
233
|
|
|
* |
|
234
|
|
|
* @param string $prefix The prefix to query for |
|
235
|
|
|
* @param string $suffix The suffix to query for |
|
236
|
|
|
* @param string $filename The filename to query for |
|
237
|
|
|
* |
|
238
|
|
|
* @return boolean TRUE if the filename is part, else FALSE |
|
239
|
|
|
*/ |
|
240
|
4 |
|
protected function isPartOfBunch($prefix, $suffix, $filename) |
|
241
|
|
|
{ |
|
242
|
|
|
|
|
243
|
|
|
// initialize the pattern |
|
244
|
4 |
|
$pattern = ''; |
|
|
|
|
|
|
245
|
|
|
|
|
246
|
|
|
// query whether or not, this is the first file to be processed |
|
247
|
4 |
|
if (sizeof($this->matches) === 0) { |
|
248
|
|
|
// initialize the pattern to query whether the FIRST file has to be processed or not |
|
249
|
4 |
|
$pattern = sprintf( |
|
250
|
4 |
|
'/^.*\/(?<%s>%s)_(?<%s>.*)_(?<%s>\d+)\\.%s$/', |
|
251
|
4 |
|
BunchKeys::PREFIX, |
|
252
|
4 |
|
$prefix, |
|
253
|
4 |
|
BunchKeys::FILENAME, |
|
254
|
4 |
|
BunchKeys::COUNTER, |
|
255
|
4 |
|
$suffix |
|
256
|
|
|
); |
|
257
|
|
|
|
|
|
|
|
|
|
258
|
|
|
} else { |
|
259
|
|
|
// initialize the pattern to query whether the NEXT file is part of a bunch or not |
|
260
|
2 |
|
$pattern = sprintf( |
|
261
|
2 |
|
'/^.*\/(?<%s>%s)_(?<%s>%s)_(?<%s>\d+)\\.%s$/', |
|
262
|
2 |
|
BunchKeys::PREFIX, |
|
263
|
2 |
|
$this->matches[BunchKeys::PREFIX], |
|
264
|
2 |
|
BunchKeys::FILENAME, |
|
265
|
2 |
|
$this->matches[BunchKeys::FILENAME], |
|
266
|
2 |
|
BunchKeys::COUNTER, |
|
267
|
2 |
|
$suffix |
|
268
|
|
|
); |
|
269
|
|
|
} |
|
270
|
|
|
|
|
271
|
|
|
// initialize the array for the matches |
|
272
|
4 |
|
$matches = array(); |
|
273
|
|
|
|
|
274
|
|
|
// update the matches, if the pattern matches |
|
275
|
4 |
|
if ($result = preg_match($pattern, $filename, $matches)) { |
|
276
|
4 |
|
$this->matches = $matches; |
|
277
|
|
|
} |
|
278
|
|
|
|
|
279
|
|
|
// stop processing, if the filename doesn't match |
|
280
|
4 |
|
return (boolean) $result; |
|
281
|
|
|
} |
|
282
|
|
|
|
|
283
|
|
|
/** |
|
284
|
|
|
* Return's an array with the names of the expected OK files for the actual subject. |
|
285
|
|
|
* |
|
286
|
|
|
* @return array The array with the expected OK filenames |
|
287
|
|
|
*/ |
|
288
|
2 |
|
protected function getOkFilenames() |
|
289
|
|
|
{ |
|
290
|
|
|
|
|
291
|
|
|
// load the array with the available bunch keys |
|
292
|
2 |
|
$bunchKeys = BunchKeys::getAllKeys(); |
|
293
|
|
|
|
|
294
|
|
|
// initialize the array for the available okFilenames |
|
295
|
2 |
|
$okFilenames = array(); |
|
296
|
|
|
|
|
297
|
|
|
// prepare the OK filenames based on the found CSV file information |
|
298
|
2 |
|
for ($i = 1; $i <= sizeof($bunchKeys); $i++) { |
|
|
|
|
|
|
299
|
|
|
// intialize the array for the parts of the names (prefix, filename + counter) |
|
300
|
2 |
|
$parts = array(); |
|
301
|
|
|
// load the parts from the matches |
|
302
|
2 |
|
for ($z = 0; $z < $i; $z++) { |
|
303
|
2 |
|
$parts[] = $this->matches[$bunchKeys[$z]]; |
|
304
|
|
|
} |
|
305
|
|
|
|
|
306
|
|
|
// query whether or not, the OK file exists, if yes append it |
|
307
|
2 |
|
if (file_exists($okFilename = sprintf('%s/%s.ok', $this->getSourceDir(), implode('_', $parts)))) { |
|
308
|
2 |
|
$okFilenames[] = $okFilename; |
|
309
|
|
|
} |
|
310
|
|
|
} |
|
311
|
|
|
|
|
312
|
|
|
// prepare and return the pattern for the OK file |
|
313
|
2 |
|
return $okFilenames; |
|
314
|
|
|
} |
|
315
|
|
|
|
|
316
|
|
|
/** |
|
317
|
|
|
* Query whether or not, the passed CSV filename is in the OK file. If the filename was found, |
|
318
|
|
|
* it'll be returned and the method return TRUE. |
|
319
|
|
|
* |
|
320
|
|
|
* If the filename is NOT in the OK file, the method return's FALSE and the CSV should NOT be |
|
321
|
|
|
* imported/moved. |
|
322
|
|
|
* |
|
323
|
|
|
* @param string $filename The CSV filename to query for |
|
324
|
|
|
* @param string $suffix The CSF filename suffix, csv by default |
|
325
|
|
|
* |
|
326
|
|
|
* @return void |
|
327
|
|
|
* @throws \Exception Is thrown, if the passed filename is NOT in the OK file or it can NOT be removed from it |
|
328
|
|
|
*/ |
|
329
|
2 |
|
protected function removeFromOkFile($filename, $suffix) |
|
330
|
|
|
{ |
|
331
|
|
|
|
|
332
|
|
|
try { |
|
333
|
|
|
// try to load the expected OK filenames |
|
334
|
2 |
|
if (sizeof($okFilenames = $this->getOkFilenames()) === 0) { |
|
335
|
|
|
throw new MissingOkFileException(sprintf('Can\'t find a OK filename for file %s', $filename)); |
|
336
|
|
|
} |
|
337
|
|
|
|
|
338
|
|
|
// iterate over the found OK filenames (should usually be only one, but could be more) |
|
339
|
2 |
|
foreach ($okFilenames as $okFilename) { |
|
340
|
|
|
// if the OK filename matches the CSV filename AND the OK file is empty |
|
341
|
2 |
|
if (basename($filename, sprintf('.%s', $suffix)) === basename($okFilename, '.ok') && filesize($okFilename) === 0) { |
|
342
|
|
|
unlink($okFilename); |
|
343
|
|
|
return; |
|
344
|
|
|
} |
|
345
|
|
|
|
|
346
|
|
|
// else, remove the CSV filename from the OK file |
|
347
|
2 |
|
$this->removeLineFromFile(basename($filename), $fh = fopen($okFilename, 'r+')); |
|
|
|
|
|
|
348
|
2 |
|
fclose($fh); |
|
349
|
|
|
|
|
350
|
|
|
// if the OK file is empty, delete the file |
|
351
|
2 |
|
if (filesize($okFilename) === 0) { |
|
352
|
|
|
unlink($okFilename); |
|
353
|
|
|
} |
|
354
|
|
|
|
|
355
|
|
|
// return immediately |
|
356
|
2 |
|
return; |
|
357
|
|
|
} |
|
358
|
|
|
|
|
359
|
|
|
// throw an exception if either no OK file has been found, |
|
360
|
|
|
// or the CSV file is not in one of the OK files |
|
361
|
|
|
throw new \Exception( |
|
362
|
|
|
sprintf( |
|
363
|
|
|
'Can\'t found filename %s in one of the expected OK files: %s', |
|
364
|
|
|
$filename, |
|
365
|
|
|
implode(', ', $okFilenames) |
|
366
|
|
|
) |
|
367
|
|
|
); |
|
368
|
|
|
|
|
|
|
|
|
|
369
|
|
|
} catch (LineNotFoundException $lne) { |
|
370
|
|
|
// wrap and re-throw the exception |
|
371
|
|
|
throw new \Exception( |
|
372
|
|
|
sprintf( |
|
373
|
|
|
'Can\'t remove filename %s from OK file: %s', |
|
374
|
|
|
$filename, |
|
375
|
|
|
$okFilename |
|
376
|
|
|
), |
|
377
|
|
|
null, |
|
378
|
|
|
$lne |
|
379
|
|
|
); |
|
380
|
|
|
} |
|
381
|
|
|
} |
|
382
|
|
|
} |
|
383
|
|
|
|