Completed
Push — master ( 313f20...c23fe9 )
by Tim
9s
created

SubjectPlugin::processSubject()   B

Complexity

Conditions 4
Paths 4

Size

Total Lines 38
Code Lines 14

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 15
CRAP Score 4

Importance

Changes 0
Metric Value
dl 0
loc 38
ccs 15
cts 15
cp 1
rs 8.5806
c 0
b 0
f 0
cc 4
eloc 14
nc 4
nop 1
crap 4
1
<?php
2
3
/**
4
 * TechDivision\Import\Plugins\SubjectPlugin
5
 *
6
 * NOTICE OF LICENSE
7
 *
8
 * This source file is subject to the Open Software License (OSL 3.0)
9
 * that is available through the world-wide-web at this URL:
10
 * http://opensource.org/licenses/osl-3.0.php
11
 *
12
 * PHP version 5
13
 *
14
 * @author    Tim Wagner <[email protected]>
15
 * @copyright 2016 TechDivision GmbH <[email protected]>
16
 * @license   http://opensource.org/licenses/osl-3.0.php Open Software License (OSL 3.0)
17
 * @link      https://github.com/techdivision/import
18
 * @link      http://www.techdivision.com
19
 */
20
21
namespace TechDivision\Import\Plugins;
22
23
use TechDivision\Import\Utils\BunchKeys;
24
use TechDivision\Import\Utils\RegistryKeys;
25
use TechDivision\Import\ApplicationInterface;
26
use TechDivision\Import\Exceptions\LineNotFoundException;
27
use TechDivision\Import\Exceptions\MissingOkFileException;
28
use TechDivision\Import\Configuration\SubjectConfigurationInterface;
29
30
/**
31
 * Plugin that processes the subjects.
32
 *
33
 * @author    Tim Wagner <[email protected]>
34
 * @copyright 2016 TechDivision GmbH <[email protected]>
35
 * @license   http://opensource.org/licenses/osl-3.0.php Open Software License (OSL 3.0)
36
 * @link      https://github.com/techdivision/import
37
 * @link      http://www.techdivision.com
38
 */
39
class SubjectPlugin extends AbstractPlugin
40
{
41
42
    /**
43
     * The matches for the last processed CSV filename.
44
     *
45
     * @var array
46
     */
47
    protected $matches = array();
48
49
    /**
50
     * The number of imported bunches.
51
     *
52
     * @var integer
53
     */
54
    protected $bunches = 0;
55
56
    /**
57
     * The subject executor instance.
58
     *
59
     * @var \TechDivision\Import\Plugins\SubjectExecutorInterface
60
     */
61
    protected $subjectExecutor;
62
63
    /**
64
     * Initializes the plugin with the application instance.
65
     *
66
     * @param \TechDivision\Import\ApplicationInterface             $application     The application instance
67
     * @param \TechDivision\Import\Plugins\SubjectExecutorInterface $subjectExecutor The subject executor instance
68
     */
69 6
    public function __construct(
70
        ApplicationInterface $application,
71
        SubjectExecutorInterface $subjectExecutor
72
    ) {
73
74
        // call the parent constructor
75 6
        parent::__construct($application);
76
77
        // initialize the callback/observer visitors
78 6
        $this->subjectExecutor = $subjectExecutor;
79 6
    }
80
81
82
    /**
83
     * Process the plugin functionality.
84
     *
85
     * @return void
86
     * @throws \Exception Is thrown, if the plugin can not be processed
87
     */
88 4
    public function process()
89
    {
90
        try {
91
            // immediately add the PID to lock this import process
92 4
            $this->lock();
93
94
            // load the plugin's subjects
95 4
            $subjects = $this->getPluginConfiguration()->getSubjects();
96
97
            // initialize the array for the status
98 4
            $status = array();
99
100
            // initialize the status information for the subjects
101
            /** @var \TechDivision\Import\Configuration\SubjectConfigurationInterface $subject */
102 4
            foreach ($subjects as $subject) {
103 3
                $status[$subject->getPrefix()] = array();
104
            }
105
106
            // and update it in the registry
107 4
            $this->getRegistryProcessor()->mergeAttributesRecursive($this->getSerial(), $status);
108
109
            // process all the subjects found in the system configuration
110
            /** @var \TechDivision\Import\Configuration\SubjectConfigurationInterface $subject */
111 4
            foreach ($subjects as $subject) {
112 3
                $this->processSubject($subject);
113
            }
114
115
            // update the number of imported bunches
116 2
            $this->getRegistryProcessor()->mergeAttributesRecursive(
117 2
                $this->getSerial(),
118 2
                array(RegistryKeys::BUNCHES => $this->bunches)
119
            );
120
121
            // stop the application if we don't process ANY bunch
122 2
            if ($this->bunches === 0) {
123 1
                $this->getApplication()->stop(
124 1
                    sprintf(
125 1
                        'Operation %s has been stopped by %s, because no import files can be found in directory %s',
126 1
                        $this->getConfiguration()->getOperationName(),
127 1
                        get_class($this),
128 1
                        $this->getConfiguration()->getSourceDir()
129
                    )
130
                );
131
            }
132
133
            // finally, if a PID has been set (because CSV files has been found),
134
            // remove it from the PID file to unlock the importer
135 2
            $this->unlock();
136
0 ignored issues
show
Coding Style introduced by
Blank line found at end of control structure
Loading history...
137 2
        } catch (\Exception $e) {
138
            // finally, if a PID has been set (because CSV files has been found),
139
            // remove it from the PID file to unlock the importer
140 2
            $this->unlock();
141
142
            // re-throw the exception
143 2
            throw $e;
144
        }
145 2
    }
146
147
    /**
148
     * Loads the files from the source directory and return's them sorted.
149
     *
150
     * @param \TechDivision\Import\Configuration\SubjectConfigurationInterface $subject The source directory to parse for files
151
     *
152
     * @return array The array with the files matching the subjects suffix
153
     * @throws \Exception Is thrown, when the source directory is NOT available
154
     */
155 3
    protected function loadFiles(SubjectConfigurationInterface $subject)
156
    {
157
158
        // clear the filecache
159 3
        clearstatcache();
160
161
        // load the actual status
162 3
        $status = $this->getRegistryProcessor()->getAttribute($this->getSerial());
163
164
        // query whether or not the configured source directory is available
165 3 View Code Duplication
        if (!is_dir($sourceDir = $status[RegistryKeys::SOURCE_DIRECTORY])) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
166 1
            throw new \Exception(sprintf('Source directory %s for subject %s is not available!', $sourceDir, $subject->getId()));
167
        }
168
169
        // initialize the array with the files matching the suffix found in the source directory
170 2
        $files = glob(sprintf('%s/*.%s', $sourceDir, $subject->getSuffix()));
171
172
        // sort the files for the apropriate order
173 2
        usort($files, function ($a, $b) {
174
            return strcmp($a, $b);
175 2
        });
176
177
        // return the sorted files
178 2
        return $files;
179
    }
180
181
    /**
182
     * Process the subject with the passed name/identifier.
183
     *
184
     * We create a new, fresh and separate subject for EVERY file here, because this would be
185
     * the starting point to parallelize the import process in a multithreaded/multiprocessed
186
     * environment.
187
     *
188
     * @param \TechDivision\Import\Configuration\SubjectConfigurationInterface $subject The subject configuration
189
     *
190
     * @return void
191
     */
192 3
    protected function processSubject(SubjectConfigurationInterface $subject)
193
    {
194
195
        // initialize the bunch number and the serial
196 3
        $bunches = 0;
197 3
        $serial = $this->getSerial();
198
199
        // load the files
200 3
        $files = $this->loadFiles($subject);
201
202
        // iterate through all CSV files and process the subjects
203 2
        foreach ($files as $pathname) {
204
            // query whether or not that the file is part of the actual bunch
205 2
            if ($this->isPartOfBunch($subject->getPrefix(), $subject->getSuffix(), $pathname)) {
206
                // query whether or not the subject needs an OK file,
207
                // if yes remove the filename from the file
208 2
                if ($subject->isOkFileNeeded()) {
209 2
                    $this->removeFromOkFile($pathname, $subject->getSuffix());
210
                }
211
212
                // initialize the subject and import the bunch
213 2
                $this->subjectExecutor->execute($subject, $this->matches, $serial, $pathname);
214
                // raise the number of the imported bunches
215 1
                $bunches++;
216
            }
217
        }
218
219
        // raise the bunch number by the imported bunches
220 1
        $this->bunches = $this->bunches + $bunches;
221
222
        // reset the matches, because the exported artefacts
223 1
        $this->matches = array();
224
225
        // and and log a message that the subject has been processed
226 1
        $this->getSystemLogger()->debug(
227 1
            sprintf('Successfully processed subject %s with %d bunch(es)!', $subject->getId(), $bunches)
228
        );
229 1
    }
230
231
    /**
232
     * Queries whether or not, the passed filename is part of a bunch or not.
233
     *
234
     * @param string $prefix   The prefix to query for
235
     * @param string $suffix   The suffix to query for
236
     * @param string $filename The filename to query for
237
     *
238
     * @return boolean TRUE if the filename is part, else FALSE
239
     */
240 4
    protected function isPartOfBunch($prefix, $suffix, $filename)
241
    {
242
243
        // initialize the pattern
244 4
        $pattern = '';
0 ignored issues
show
Unused Code introduced by
$pattern is not used, you could remove the assignment.

This check looks for variable assignements that are either overwritten by other assignments or where the variable is not used subsequently.

$myVar = 'Value';
$higher = false;

if (rand(1, 6) > 3) {
    $higher = true;
} else {
    $higher = false;
}

Both the $myVar assignment in line 1 and the $higher assignment in line 2 are dead. The first because $myVar is never used and the second because $higher is always overwritten for every possible time line.

Loading history...
245
246
        // query whether or not, this is the first file to be processed
247 4
        if (sizeof($this->matches) === 0) {
248
            // initialize the pattern to query whether the FIRST file has to be processed or not
249 4
            $pattern = sprintf(
250 4
                '/^.*\/(?<%s>%s)_(?<%s>.*)_(?<%s>\d+)\\.%s$/',
251 4
                BunchKeys::PREFIX,
252 4
                $prefix,
253 4
                BunchKeys::FILENAME,
254 4
                BunchKeys::COUNTER,
255 4
                $suffix
256
            );
257
0 ignored issues
show
Coding Style introduced by
Blank line found at end of control structure
Loading history...
258
        } else {
259
            // initialize the pattern to query whether the NEXT file is part of a bunch or not
260 2
            $pattern = sprintf(
261 2
                '/^.*\/(?<%s>%s)_(?<%s>%s)_(?<%s>\d+)\\.%s$/',
262 2
                BunchKeys::PREFIX,
263 2
                $this->matches[BunchKeys::PREFIX],
264 2
                BunchKeys::FILENAME,
265 2
                $this->matches[BunchKeys::FILENAME],
266 2
                BunchKeys::COUNTER,
267 2
                $suffix
268
            );
269
        }
270
271
        // initialize the array for the matches
272 4
        $matches = array();
273
274
        // update the matches, if the pattern matches
275 4
        if ($result = preg_match($pattern, $filename, $matches)) {
276 4
            $this->matches = $matches;
277
        }
278
279
        // stop processing, if the filename doesn't match
280 4
        return (boolean) $result;
281
    }
282
283
    /**
284
     * Return's an array with the names of the expected OK files for the actual subject.
285
     *
286
     * @return array The array with the expected OK filenames
287
     */
288 2
    protected function getOkFilenames()
289
    {
290
291
        // load the array with the available bunch keys
292 2
        $bunchKeys = BunchKeys::getAllKeys();
293
294
        // initialize the array for the available okFilenames
295 2
        $okFilenames = array();
296
297
        // prepare the OK filenames based on the found CSV file information
298 2
        for ($i = 1; $i <= sizeof($bunchKeys); $i++) {
0 ignored issues
show
Performance Best Practice introduced by
It seems like you are calling the size function sizeof() as part of the test condition. You might want to compute the size beforehand, and not on each iteration.

If the size of the collection does not change during the iteration, it is generally a good practice to compute it beforehand, and not on each iteration:

for ($i=0; $i<count($array); $i++) { // calls count() on each iteration
}

// Better
for ($i=0, $c=count($array); $i<$c; $i++) { // calls count() just once
}
Loading history...
299
            // intialize the array for the parts of the names (prefix, filename + counter)
300 2
            $parts = array();
301
            // load the parts from the matches
302 2
            for ($z = 0; $z < $i; $z++) {
303 2
                $parts[] = $this->matches[$bunchKeys[$z]];
304
            }
305
306
            // query whether or not, the OK file exists, if yes append it
307 2
            if (file_exists($okFilename = sprintf('%s/%s.ok', $this->getSourceDir(), implode('_', $parts)))) {
308 2
                $okFilenames[] = $okFilename;
309
            }
310
        }
311
312
        // prepare and return the pattern for the OK file
313 2
        return $okFilenames;
314
    }
315
316
    /**
317
     * Query whether or not, the passed CSV filename is in the OK file. If the filename was found,
318
     * it'll be returned and the method return TRUE.
319
     *
320
     * If the filename is NOT in the OK file, the method return's FALSE and the CSV should NOT be
321
     * imported/moved.
322
     *
323
     * @param string $filename The CSV filename to query for
324
     * @param string $suffix   The CSF filename suffix, csv by default
325
     *
326
     * @return void
327
     * @throws \Exception Is thrown, if the passed filename is NOT in the OK file or it can NOT be removed from it
328
     */
329 2
    protected function removeFromOkFile($filename, $suffix)
330
    {
331
332
        try {
333
            // try to load the expected OK filenames
334 2
            if (sizeof($okFilenames = $this->getOkFilenames()) === 0) {
335
                throw new MissingOkFileException(sprintf('Can\'t find a OK filename for file %s', $filename));
336
            }
337
338
            // iterate over the found OK filenames (should usually be only one, but could be more)
339 2
            foreach ($okFilenames as $okFilename) {
340
                // if the OK filename matches the CSV filename AND the OK file is empty
341 2
                if (basename($filename, sprintf('.%s', $suffix)) === basename($okFilename, '.ok') && filesize($okFilename) === 0) {
342
                    unlink($okFilename);
343
                    return;
344
                }
345
346
                // else, remove the CSV filename from the OK file
347 2
                $this->removeLineFromFile(basename($filename), $fh = fopen($okFilename, 'r+'));
0 ignored issues
show
Documentation introduced by
$fh = fopen($okFilename, 'r+') is of type resource, but the function expects a string.

It seems like the type of the argument is not accepted by the function/method which you are calling.

In some cases, in particular if PHP’s automatic type-juggling kicks in this might be fine. In other cases, however this might be a bug.

We suggest to add an explicit type cast like in the following example:

function acceptsInteger($int) { }

$x = '123'; // string "123"

// Instead of
acceptsInteger($x);

// we recommend to use
acceptsInteger((integer) $x);
Loading history...
348 2
                fclose($fh);
349
350
                // if the OK file is empty, delete the file
351 2
                if (filesize($okFilename) === 0) {
352
                    unlink($okFilename);
353
                }
354
355
                // return immediately
356 2
                return;
357
            }
358
359
            // throw an exception if either no OK file has been found,
360
            // or the CSV file is not in one of the OK files
361
            throw new \Exception(
362
                sprintf(
363
                    'Can\'t found filename %s in one of the expected OK files: %s',
364
                    $filename,
365
                    implode(', ', $okFilenames)
366
                )
367
            );
368
0 ignored issues
show
Coding Style introduced by
Blank line found at end of control structure
Loading history...
369
        } catch (LineNotFoundException $lne) {
370
            // wrap and re-throw the exception
371
            throw new \Exception(
372
                sprintf(
373
                    'Can\'t remove filename %s from OK file: %s',
374
                    $filename,
375
                    $okFilename
376
                ),
377
                null,
378
                $lne
379
            );
380
        }
381
    }
382
}
383