Completed
Push — reindexer_issue_on_fork ( 8aa6ff )
by André
29:38
created

ReindexCommand   C

Complexity

Total Complexity 53

Size/Duplication

Total Lines 445
Duplicated Lines 0 %

Coupling/Cohesion

Components 2
Dependencies 13

Importance

Changes 0
Metric Value
dl 0
loc 445
rs 6.96
c 0
b 0
f 0
wmc 53
lcom 2
cbo 13

12 Methods

Rating   Name   Duplication   Size   Complexity  
A initialize() 0 17 2
B configure() 0 65 1
A execute() 0 35 4
C indexIncrementally() 0 66 11
B runParallelProcess() 0 37 9
A getStatementContentSince() 0 12 2
A getStatementSubtree() 0 18 2
A getStatementContentAll() 0 10 2
B getNumberOfCPUCores() 0 26 6
A fetchIteration() 0 17 5
B getPhpProcess() 0 30 6
A getPhpPath() 0 16 3

How to fix   Complexity   

Complex Class

Complex classes like ReindexCommand often do a lot of different things. To break such a class down, we need to identify a cohesive component within that class. A common approach to find such a component is to look for fields/methods that share the same prefixes, or suffixes. You can also have a look at the cohesion graph to spot any un-connected, or weakly-connected components.

Once you have determined the fields that belong together, you can apply the Extract Class refactoring. If the component makes sense as a sub-class, Extract Subclass is also a candidate, and is often faster.

While breaking up the class, it is a good idea to analyze how other classes use ReindexCommand, and based on these observations, apply Extract Interface, too.

1
<?php
2
3
/**
4
 * This file is part of the eZ Publish Kernel package.
5
 *
6
 * @copyright Copyright (C) eZ Systems AS. All rights reserved.
7
 * @license For full copyright and license information view LICENSE file distributed with this source code.
8
 */
9
namespace eZ\Bundle\EzPublishCoreBundle\Command;
10
11
use eZ\Publish\SPI\Persistence\Content\ContentInfo;
12
use eZ\Publish\Core\Search\Common\Indexer;
13
use eZ\Publish\Core\Search\Common\IncrementalIndexer;
14
use Doctrine\DBAL\Driver\Statement;
15
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
16
use Symfony\Component\Console\Helper\ProgressBar;
17
use Symfony\Component\Console\Input\InputInterface;
18
use Symfony\Component\Console\Input\InputOption;
19
use Symfony\Component\Console\Output\OutputInterface;
20
use Symfony\Component\Process\PhpExecutableFinder;
21
use Symfony\Component\Process\ProcessBuilder;
22
use RuntimeException;
23
use DateTime;
24
use PDO;
25
26
class ReindexCommand extends ContainerAwareCommand
27
{
28
    /**
29
     * @var \eZ\Publish\Core\Search\Common\Indexer|\eZ\Publish\Core\Search\Common\IncrementalIndexer
30
     */
31
    private $searchIndexer;
32
33
    /**
34
     * @var \Doctrine\DBAL\Connection
35
     */
36
    private $connection;
37
38
    /**
39
     * @var string
40
     */
41
    private $phpPath;
42
43
    /**
44
     * @var \Psr\Log\LoggerInterface
45
     */
46
    private $logger;
47
48
    /**
49
     * @var string
50
     */
51
    private $siteaccess;
52
53
    /**
54
     * @var string
55
     */
56
    private $env;
57
58
    /**
59
     * @var bool
60
     */
61
    private $isDebug;
62
63
    /**
64
     * Initialize objects required by {@see execute()}.
65
     *
66
     * @param InputInterface $input
67
     * @param OutputInterface $output
68
     */
69
    public function initialize(InputInterface $input, OutputInterface $output)
70
    {
71
        parent::initialize($input, $output);
72
        $this->searchIndexer = $this->getContainer()->get('ezpublish.spi.search.indexer');
73
        $this->connection = $this->getContainer()->get('ezpublish.api.storage_engine.legacy.connection');
74
        $this->logger = $this->getContainer()->get('logger');
75
        $this->env = $this->getContainer()->getParameter('kernel.environment');
76
        $this->isDebug = $this->getContainer()->getParameter('kernel.debug');
77
        if (!$this->searchIndexer instanceof Indexer) {
78
            throw new RuntimeException(
79
                sprintf(
80
                    'Expected to find Search Engine Indexer but found "%s" instead',
81
                    get_parent_class($this->searchIndexer)
82
                )
83
            );
84
        }
85
    }
86
87
    /**
88
     * {@inheritdoc}
89
     */
90
    protected function configure()
91
    {
92
        $this
93
            ->setName('ezplatform:reindex')
94
            ->setDescription('Recreate or Refresh search engine index')
95
            ->addOption(
96
                'iteration-count',
97
                'c',
98
                InputOption::VALUE_OPTIONAL,
99
                'Number of objects to be indexed in a single iteration, for avoiding using too much memory',
100
                50
101
            )->addOption(
102
                'no-commit',
103
                null,
104
                InputOption::VALUE_NONE,
105
                'Do not commit after each iteration'
106
            )->addOption(
107
                'no-purge',
108
                null,
109
                InputOption::VALUE_NONE,
110
                'Do not purge before indexing'
111
            )->addOption(
112
                'since',
113
                null,
114
                InputOption::VALUE_OPTIONAL,
115
                'Refresh changes since a given time, any format understood by DateTime. Implies "no-purge", can not be combined with "content-ids" or "subtree"'
116
            )->addOption(
117
                'content-ids',
118
                null,
119
                InputOption::VALUE_OPTIONAL,
120
                'Comma separated list of content id\'s to refresh (deleted/updated/added). Implies "no-purge", can not be combined with "since" or "subtree"'
121
            )->addOption(
122
                'subtree',
123
                null,
124
                InputOption::VALUE_OPTIONAL,
125
                'Location Id to index subtree of (incl self). Implies "no-purge", can not be combined with "since" or "content-ids"'
126
            )->addOption(
127
                'processes',
128
                null,
129
                InputOption::VALUE_OPTIONAL,
130
                'Number of child processes to run in parallel for iterations, if set to "auto" it will set to number of CPU cores -1, set to "1" or "0" to disable',
131
                1
132
            )->setHelp(
133
                <<<EOT
134
The command <info>%command.name%</info> indexes current configured database in configured search engine index.
135
136
137
Example usage:
138
- Refresh (add/update) index changes since yesterday:
139
  <comment>ezplatform:reindex --since=yesterday</comment>
140
  See: http://php.net/manual/en/datetime.formats.php
141
142
- Refresh (add/update/remove) index on a set of content id's:
143
  <comment>ezplatform:reindex --content-ids=2,34,68</comment>
144
145
- Refresh (add/update) index of a subtree:
146
  <comment>ezplatform:reindex --subtree=45</comment>
147
148
- Refresh (add/update) the whole index using 3 processes (if machine has 4 CPU cores),
149
  & let search engine handle commits itself using auto commit:
150
  <comment>ezplatform:reindex --no-purge --no-commit --processes=auto</comment>
151
152
EOT
153
            );
154
    }
155
156
    /**
157
     * {@inheritdoc}
158
     */
159
    protected function execute(InputInterface $input, OutputInterface $output)
160
    {
161
        $commit = !$input->getOption('no-commit');
162
        $iterationCount = $input->getOption('iteration-count');
163
        $this->siteaccess = $input->getOption('siteaccess');
0 ignored issues
show
Documentation Bug introduced by
It seems like $input->getOption('siteaccess') can also be of type array<integer,string> or boolean. However, the property $siteaccess is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
164
        if (!is_numeric($iterationCount) || (int) $iterationCount < 1) {
165
            throw new RuntimeException("'--iteration-count' option should be > 0, got '{$iterationCount}'");
166
        }
167
168
        if (!$this->searchIndexer instanceof IncrementalIndexer) {
169
            $output->writeln(<<<EOT
170
DEPRECATED:
171
Running indexing against an Indexer that has not been updated to use IncrementalIndexer abstract.
172
173
Options that won't be taken into account:
174
- since
175
- content-ids
176
- subtree
177
- processes
178
- no-purge
179
EOT
180
            );
181
            $this->searchIndexer->createSearchIndex($output, (int) $iterationCount, !$commit);
182
        } else {
183
            $output->writeln('Re-indexing started for search engine: ' . $this->searchIndexer->getName());
184
            $output->writeln('');
185
186
            $return = $this->indexIncrementally($input, $output, $iterationCount, $commit);
187
188
            $output->writeln('');
189
            $output->writeln('Finished re-indexing');
190
191
            return $return;
192
        }
193
    }
194
195
    protected function indexIncrementally(InputInterface $input, OutputInterface $output, $iterationCount, $commit)
196
    {
197
        if ($contentIds = $input->getOption('content-ids')) {
198
            $contentIds = explode(',', $contentIds);
199
            $output->writeln(sprintf(
200
                'Indexing list of content id\'s (%s)' . ($commit ? ', with commit' : ''),
201
                \count($contentIds)
202
            ));
203
204
            return $this->searchIndexer->updateSearchIndex($contentIds, $commit);
205
        }
206
207
        if ($since = $input->getOption('since')) {
208
            $stmt = $this->getStatementContentSince(new DateTime($since));
209
            $count = (int)$this->getStatementContentSince(new DateTime($since), true)->fetchColumn();
210
            $purge = false;
211
        } elseif ($locationId = (int) $input->getOption('subtree')) {
212
            $stmt = $this->getStatementSubtree($locationId);
213
            $count = (int) $this->getStatementSubtree($locationId, true)->fetchColumn();
214
            $purge = false;
215
        } else {
216
            $stmt = $this->getStatementContentAll();
217
            $count = (int) $this->getStatementContentAll(true)->fetchColumn();
218
            $purge = !$input->getOption('no-purge');
219
        }
220
221
        if (!$count) {
222
            $output->writeln('<error>Could not find any items to index, aborting.</error>');
223
224
            return 1;
225
        }
226
227
        $iterations = ceil($count / $iterationCount);
228
        $processes = $input->getOption('processes');
229
        $processCount = $processes === 'auto' ? $this->getNumberOfCPUCores() - 1 : (int) $processes;
230
        $processCount = min($iterations, $processCount);
231
        $processMessage = $processCount > 1 ? "using $processCount parallel child processes" : 'using single (current) process';
232
233
        if ($purge) {
234
            $output->writeln('Purging index...');
235
            $this->searchIndexer->purge();
236
237
            $output->writeln(
238
                "<info>Re-Creating index for {$count} items across $iterations iteration(s), $processMessage:</info>"
239
            );
240
        } else {
241
            $output->writeln(
242
                "<info>Refreshing index for {$count} items across $iterations iteration(s), $processMessage:</info>"
243
            );
244
        }
245
246
        $progress = new ProgressBar($output);
247
        $progress->start($iterations);
248
249
        if ($processCount > 1) {
250
            $this->runParallelProcess($progress, $stmt, (int) $processCount, (int) $iterationCount, $commit);
251
        } else {
252
            // if we only have one process, or less iterations to warrant running several, we index it all inline
253
            foreach ($this->fetchIteration($stmt, $iterationCount) as $contentIds) {
254
                $this->searchIndexer->updateSearchIndex($contentIds, $commit);
255
                $progress->advance(1);
256
            }
257
        }
258
259
        $progress->finish();
260
    }
261
262
    private function runParallelProcess(ProgressBar $progress, Statement $stmt, $processCount, $iterationCount, $commit)
263
    {
264
        /**
265
         * @var \Symfony\Component\Process\Process[]|null[]
266
         */
267
        $processes = array_fill(0, $processCount, null);
268
        $generator = $this->fetchIteration($stmt, $iterationCount);
269
        do {
270
            foreach ($processes as $key => $process) {
271
                if ($process !== null && $process->isRunning()) {
272
                    continue;
273
                }
274
275
                if ($process !== null) {
276
                    // One of the processes just finished, so we increment progress bar
277
                    $progress->advance(1);
278
279
                    if (!$process->isSuccessful()) {
280
                        $this->logger->error('Child indexer process returned: ' . $process->getExitCodeText());
281
                    }
282
                }
283
284
                if (!$generator->valid()) {
285
                    unset($processes[$key]);
286
                    continue;
287
                }
288
289
                $processes[$key] = $this->getPhpProcess($generator->current(), $commit);
290
                $processes[$key]->start();
291
                $generator->next();
292
            }
293
294
            if (!empty($processes)) {
295
                sleep(1);
296
            }
297
        } while (!empty($processes));
298
    }
299
300
    /**
301
     * @param DateTime $since
302
     * @param bool $count
303
     *
304
     * @return \Doctrine\DBAL\Driver\Statement
305
     */
306
    private function getStatementContentSince(DateTime $since, $count = false)
307
    {
308
        $q = $this->connection->createQueryBuilder()
309
            ->select($count ? 'count(c.id)' : 'c.id')
310
            ->from('ezcontentobject', 'c')
311
            ->where('c.status = :status')->andWhere('c.modified >= :since')
312
            ->orderBy('c.modified')
313
            ->setParameter('status', ContentInfo::STATUS_PUBLISHED, PDO::PARAM_INT)
314
            ->setParameter('since', $since->getTimestamp(), PDO::PARAM_INT);
315
316
        return $q->execute();
317
    }
318
319
    /**
320
     * @param mixed $locationId
321
     * @param bool $count
322
     *
323
     * @return \Doctrine\DBAL\Driver\Statement
324
     */
325
    private function getStatementSubtree($locationId, $count = false)
326
    {
327
        /**
328
         * @var \eZ\Publish\SPI\Persistence\Content\Location\Handler
329
         */
330
        $locationHandler = $this->getContainer()->get('ezpublish.spi.persistence.location_handler');
331
        $location = $locationHandler->load($locationId);
332
        $q = $this->connection->createQueryBuilder()
333
            ->select($count ? 'count(DISTINCT c.id)' : 'DISTINCT c.id')
334
            ->from('ezcontentobject', 'c')
335
            ->innerJoin('c', 'ezcontentobject_tree', 't', 't.contentobject_id = c.id')
336
            ->where('c.status = :status')
337
            ->andWhere('t.path_string LIKE :path')
338
            ->setParameter('status', ContentInfo::STATUS_PUBLISHED, PDO::PARAM_INT)
339
            ->setParameter('path', $location->pathString . '%', PDO::PARAM_STR);
340
341
        return $q->execute();
342
    }
343
344
    /**
345
     * @param bool $count
346
     *
347
     * @return \Doctrine\DBAL\Driver\Statement
348
     */
349
    private function getStatementContentAll($count = false)
350
    {
351
        $q = $this->connection->createQueryBuilder()
352
            ->select($count ? 'count(c.id)' : 'c.id')
353
            ->from('ezcontentobject', 'c')
354
            ->where('c.status = :status')
355
            ->setParameter('status', ContentInfo::STATUS_PUBLISHED, PDO::PARAM_INT);
356
357
        return $q->execute();
358
    }
359
360
    /**
361
     * @param \Doctrine\DBAL\Driver\Statement $stmt
362
     * @param int $iterationCount
363
     *
364
     * @return \Generator Return an array of arrays, each array contains content id's of $iterationCount.
365
     */
366
    private function fetchIteration(Statement $stmt, $iterationCount)
367
    {
368
        do {
369
            $contentIds = [];
370
            for ($i = 0; $i < $iterationCount; ++$i) {
371
                if ($contentId = $stmt->fetch(PDO::FETCH_COLUMN)) {
372
                    $contentIds[] = $contentId;
373
                } elseif (empty($contentIds)) {
374
                    return;
375
                } else {
376
                    break;
377
                }
378
            }
379
380
            yield $contentIds;
381
        } while (!empty($contentId));
382
    }
383
384
    /**
385
     * @param array $contentIds
386
     * @param bool $commit
387
     *
388
     * @return \Symfony\Component\Process\Process
389
     */
390
    private function getPhpProcess(array $contentIds, $commit)
391
    {
392
        if (empty($contentIds)) {
393
            throw new RuntimeException("'--content-ids=' can not be empty on parallel sub process");
394
        }
395
396
        $consolePath = file_exists('bin/console') ? 'bin/console' : 'app/console';
397
        $subProcessArgs = [
398
            $consolePath,
399
            'ezplatform:reindex',
400
            '--content-ids=' . implode(',', $contentIds),
401
            '--env=' . $this->env,
402
        ];
403
        if ($this->siteaccess) {
404
            $subProcessArgs[] = '--siteaccess=' . $this->siteaccess;
405
        }
406
        if (!$this->isDebug) {
407
            $subProcessArgs[] = '--no-debug';
408
        }
409
410
        $process = new ProcessBuilder($subProcessArgs);
0 ignored issues
show
Deprecated Code introduced by
The class Symfony\Component\Process\ProcessBuilder has been deprecated with message: since version 3.4, to be removed in 4.0. Use the Process class instead.

This class, trait or interface has been deprecated. The supplier of the file has supplied an explanatory message.

The explanatory message should give you some clue as to whether and when the type will be removed from the class and what other constant to use instead.

Loading history...
411
        $process->setTimeout(null);
412
        $process->setPrefix($this->getPhpPath());
413
414
        if (!$commit) {
415
            $process->add('--no-commit');
416
        }
417
418
        return $process->getProcess();
419
    }
420
421
    /**
422
     * @return string
423
     */
424
    private function getPhpPath()
425
    {
426
        if ($this->phpPath) {
427
            return $this->phpPath;
428
        }
429
430
        $phpFinder = new PhpExecutableFinder();
431
        $this->phpPath = $phpFinder->find();
0 ignored issues
show
Documentation Bug introduced by
It seems like $phpFinder->find() can also be of type false. However, the property $phpPath is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
432
        if (!$this->phpPath) {
433
            throw new \RuntimeException(
434
                'The php executable could not be found, it\'s needed for executing parable sub processes, so add it to your PATH environment variable and try again'
435
            );
436
        }
437
438
        return $this->phpPath;
439
    }
440
441
    /**
442
     * @return int
443
     */
444
    private function getNumberOfCPUCores()
445
    {
446
        $cores = 1;
447
        if (is_file('/proc/cpuinfo')) {
448
            // Linux (and potentially Windows with linux sub systems)
449
            $cpuinfo = file_get_contents('/proc/cpuinfo');
450
            preg_match_all('/^processor/m', $cpuinfo, $matches);
451
            $cores = \count($matches[0]);
452
        } elseif (\DIRECTORY_SEPARATOR === '\\') {
453
            // Windows
454
            if (($process = @popen('wmic cpu get NumberOfCores', 'rb')) !== false) {
455
                fgets($process);
456
                $cores = (int) fgets($process);
457
                pclose($process);
458
            }
459
        } elseif (($process = @popen('sysctl -a', 'rb')) !== false) {
460
            // *nix (Linux, BSD and Mac)
461
            $output = stream_get_contents($process);
462
            if (preg_match('/hw.ncpu: (\d+)/', $output, $matches)) {
463
                $cores = (int) $matches[1][0];
464
            }
465
            pclose($process);
466
        }
467
468
        return $cores;
469
    }
470
}
471