Completed
Push — 6.7 ( 730fb7...a124d5 )
by André
39:46 queued 26:19
created

ReindexCommand::getNumberOfCPUCores()   B

Complexity

Conditions 6
Paths 6

Size

Total Lines 26
Code Lines 17

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 6
eloc 17
nc 6
nop 0
dl 0
loc 26
rs 8.439
c 0
b 0
f 0
1
<?php
2
3
/**
4
 * This file is part of the eZ Publish Kernel package.
5
 *
6
 * @copyright Copyright (C) eZ Systems AS. All rights reserved.
7
 * @license For full copyright and license information view LICENSE file distributed with this source code.
8
 */
9
namespace eZ\Bundle\EzPublishCoreBundle\Command;
10
11
use eZ\Publish\SPI\Persistence\Content\ContentInfo;
12
use eZ\Publish\Core\Search\Common\Indexer;
13
use eZ\Publish\Core\Search\Common\IncrementalIndexer;
14
use Doctrine\DBAL\Driver\Statement;
15
use Symfony\Bundle\FrameworkBundle\Command\ContainerAwareCommand;
16
use Symfony\Component\Console\Helper\ProgressBar;
17
use Symfony\Component\Console\Input\InputInterface;
18
use Symfony\Component\Console\Input\InputOption;
19
use Symfony\Component\Console\Output\OutputInterface;
20
use Symfony\Component\Process\PhpExecutableFinder;
21
use Symfony\Component\Process\ProcessBuilder;
22
use RuntimeException;
23
use DateTime;
24
use PDO;
25
26
class ReindexCommand extends ContainerAwareCommand
27
{
28
    /**
29
     * @var \eZ\Publish\Core\Search\Common\Indexer|\eZ\Publish\Core\Search\Common\IncrementalIndexer
30
     */
31
    private $searchIndexer;
32
33
    /**
34
     * @var \Doctrine\DBAL\Connection
35
     */
36
    private $connection;
37
38
    /**
39
     * @var string
40
     */
41
    private $phpPath;
42
43
    /**
44
     * Initialize objects required by {@see execute()}.
45
     *
46
     * @param InputInterface $input
47
     * @param OutputInterface $output
48
     */
49
    public function initialize(InputInterface $input, OutputInterface $output)
50
    {
51
        parent::initialize($input, $output);
52
        $this->searchIndexer = $this->getContainer()->get('ezpublish.spi.search.indexer');
53
        $this->connection = $this->getContainer()->get('ezpublish.api.storage_engine.legacy.connection');
54
        if (!$this->searchIndexer instanceof Indexer) {
55
            throw new RuntimeException(
56
                sprintf(
57
                    'Expected to find Search Engine Indexer but found "%s" instead',
58
                    get_parent_class($this->searchIndexer)
59
                )
60
            );
61
        }
62
    }
63
64
    /**
65
     * {@inheritdoc}
66
     */
67
    protected function configure()
68
    {
69
        $this
70
            ->setName('ezplatform:reindex')
71
            ->setDescription('Recreate or Refresh search engine index')
72
            ->addOption(
73
                'iteration-count',
74
                'c',
75
                InputOption::VALUE_OPTIONAL,
76
                'Number of objects to be indexed in a single iteration, for avoiding using too much memory',
77
                50
78
            )->addOption(
79
                'no-commit',
80
                null,
81
                InputOption::VALUE_NONE,
82
                'Do not commit after each iteration'
83
            )->addOption(
84
                'no-purge',
85
                null,
86
                InputOption::VALUE_NONE,
87
                'Do not purge before indexing'
88
            )->addOption(
89
                'since',
90
                null,
91
                InputOption::VALUE_OPTIONAL,
92
                'Refresh changes since a given time, any format understood by DateTime. Implies "no-purge", can not be combined with "content-ids" or "subtree"'
93
            )->addOption(
94
                'content-ids',
95
                null,
96
                InputOption::VALUE_OPTIONAL,
97
                'Comma separated list of content id\'s to refresh (deleted/updated/added). Implies "no-purge", can not be combined with "since" or "subtree"'
98
            )->addOption(
99
                'subtree',
100
                null,
101
                InputOption::VALUE_OPTIONAL,
102
                'Location Id to index subtree of (incl self). Implies "no-purge", can not be combined with "since" or "content-ids"'
103
            )->addOption(
104
                'processes',
105
                null,
106
                InputOption::VALUE_OPTIONAL,
107
                'Number of sub processes to spawn in parallel handling iterations, default number is number of CPU cores -1, set to 1 or 0 to disable',
108
                $this->getNumberOfCPUCores()
109
            )->setHelp(
110
                <<<EOT
111
The command <info>%command.name%</info> indexes current configured database in configured search engine index.
112
113
114
Example usage:
115
- Refresh (add/update) index changes since yesterday:
116
  <comment>ezplatform:reindex --since=yesterday</comment>
117
  See: http://php.net/manual/en/datetime.formats.php
118
119
- Refresh (add/update/remove) index on a set of content id's:
120
  <comment>ezplatform:reindex --content-ids=2,34,68</comment>
121
122
- Refresh (add/update) index of a subtree:
123
  <comment>ezplatform:reindex --subtree=45</comment>
124
125
 - Refresh (add/update) the whole index using 3 processes, & let search engine handle commits itself using auto commit:
126
   <comment>ezplatform:reindex --no-purge --no-commit --processes=3</comment>
127
128
EOT
129
            );
130
    }
131
132
    /**
133
     * {@inheritdoc}
134
     */
135
    protected function execute(InputInterface $input, OutputInterface $output)
136
    {
137
        $commit = !$input->getOption('no-commit');
138
        $iterationCount = $input->getOption('iteration-count');
139
        if (!is_numeric($iterationCount) || (int) $iterationCount < 1) {
140
            throw new RuntimeException("'--iteration-count' option should be > 0, got '{$iterationCount}'");
141
        }
142
143
        if (!$this->searchIndexer instanceof IncrementalIndexer) {
144
            $output->writeln(<<<EOT
145
DEPRECATED:
146
Running indexing against an Indexer that has not been updated to use IncrementalIndexer abstract.
147
148
Options that won't be taken into account:
149
- since
150
- content-ids
151
- processes
152
- no-purge
153
EOT
154
            );
155
            $this->searchIndexer->createSearchIndex($output, (int) $iterationCount, !$commit);
156
        } else {
157
            $output->writeln('Re-indexing started for search engine: ' . $this->searchIndexer->getName());
158
            $output->writeln('');
159
160
            $return = $this->indexIncrementally($input, $output, $iterationCount, $commit);
161
162
            $output->writeln('');
163
            $output->writeln('Finished re-indexing');
164
165
            return $return;
166
        }
167
    }
168
169
    protected function indexIncrementally(InputInterface $input, OutputInterface $output, $iterationCount, $commit)
170
    {
171
        if ($contentIds = $input->getOption('content-ids')) {
172
            $output->writeln('Indexing list of content id\'s');
173
174
            return $this->searchIndexer->updateSearchIndex(explode(',', $contentIds), $commit);
175
        }
176
177
        if ($since = $input->getOption('since')) {
178
            $stmt = $this->getStatementContentSince(new DateTime($since));
179
            $count = (int)$this->getStatementContentSince(new DateTime($since), true)->fetchColumn();
180
            $purge = false;
181
        } elseif ($locationId = (int) $input->getOption('subtree')) {
182
            $stmt = $this->getStatementSubtree($locationId);
183
            $count = (int) $this->getStatementSubtree($locationId, true)->fetchColumn();
184
            $purge = false;
185
        } else {
186
            $stmt = $this->getStatementContentAll();
187
            $count = (int) $this->getStatementContentAll(true)->fetchColumn();
188
            $purge = !$input->getOption('no-purge');
189
        }
190
191
        if (!$count) {
192
            $output->writeln('<error>Could not find any items to index, aborting.</error>');
193
194
            return 1;
195
        }
196
197
        $iterations = ceil($count / $iterationCount);
198
        $processCount = (int) $input->getOption('processes');
199
        $processCount = $processCount > $iterations ? $iterations : $processCount;
200
        $processMessage = $processCount > 1 ? "using $processCount parallel processes" : 'using single process';
201
202
        if ($purge) {
203
            $output->writeln('Purging index...');
204
            $this->searchIndexer->purge();
205
206
            $output->writeln(
207
                "<info>Re-Creating index for {$count} items across $iterations iteration(s), $processMessage:</info>"
208
            );
209
        } else {
210
            $output->writeln(
211
                "<info>Refreshing index for {$count} items across $iterations iteration(s), $processMessage:</info>"
212
            );
213
        }
214
215
        $progress = new ProgressBar($output);
216
        $progress->start($iterations);
217
218
        if ($processCount > 1) {
219
            $this->runParallelProcess($progress, $stmt, (int) $processCount, (int) $iterationCount, $commit);
220
        } else {
221
            // if we only have one process, or less iterations to warrant running several, we index it all inline
222
            foreach ($this->fetchIteration($stmt, $iterationCount) as $contentIds) {
223
                $this->searchIndexer->updateSearchIndex($contentIds, $commit);
224
                $progress->advance(1);
225
            }
226
        }
227
228
        $progress->finish();
229
    }
230
231
    private function runParallelProcess(ProgressBar $progress, Statement $stmt, $processCount, $iterationCount, $commit)
232
    {
233
        /**
234
         * @var \Symfony\Component\Process\Process[]|null[]
235
         */
236
        $processes = array_fill(0, $processCount, null);
237
        $generator = $this->fetchIteration($stmt, $iterationCount);
238
        do {
239
            foreach ($processes as $key => $process) {
240
                if ($process !== null && $process->isRunning()) {
241
                    continue;
242
                }
243
244
                if ($process !== null) {
245
                    // One of the processes just finished, so we increment progress bar
246
                    $progress->advance(1);
247
                }
248
249
                if (!$generator->valid()) {
250
                    unset($processes[$key]);
251
                    continue;
252
                }
253
254
                $processes[$key] = $this->getPhpProcess($generator->current(), $commit);
255
                $processes[$key]->start();
256
                $generator->next();
257
            }
258
259
            if (!empty($processes)) {
260
                sleep(1);
261
            }
262
        } while (!empty($processes));
263
    }
264
265
    /**
266
     * @param DateTime $since
267
     * @param bool $count
268
     *
269
     * @return \Doctrine\DBAL\Driver\Statement
270
     */
271
    private function getStatementContentSince(DateTime $since, $count = false)
272
    {
273
        $q = $this->connection->createQueryBuilder()
274
            ->select($count ? 'count(c.id)' : 'c.id')
275
            ->from('ezcontentobject', 'c')
276
            ->where('c.status = :status')->andWhere('c.modified >= :since')
277
            ->orderBy('c.modified')
278
            ->setParameter('status', ContentInfo::STATUS_PUBLISHED, PDO::PARAM_INT)
279
            ->setParameter('since', $since->getTimestamp(), PDO::PARAM_INT);
280
281
        return $q->execute();
282
    }
283
284
    /**
285
     * @param mixed $locationId
286
     * @param bool $count
287
     *
288
     * @return \Doctrine\DBAL\Driver\Statement
289
     */
290
    private function getStatementSubtree($locationId, $count = false)
291
    {
292
        /**
293
         * @var \eZ\Publish\SPI\Persistence\Content\Location\Handler
294
         */
295
        $locationHandler = $this->getContainer()->get('ezpublish.spi.persistence.location_handler');
296
        $location = $locationHandler->load($locationId);
297
        $q = $this->connection->createQueryBuilder()
298
            ->select($count ? 'count(DISTINCT c.id)' : 'DISTINCT c.id')
299
            ->from('ezcontentobject', 'c')
300
            ->innerJoin('c', 'ezcontentobject_tree', 't', 't.contentobject_id = c.id')
301
            ->where('c.status = :status')
302
            ->andWhere('t.path_string LIKE :path')
303
            ->orderBy('t.path_string')
304
            ->setParameter('status', ContentInfo::STATUS_PUBLISHED, PDO::PARAM_INT)
305
            ->setParameter('path', $location->pathString . '%', PDO::PARAM_STR);
306
307
        return $q->execute();
308
    }
309
310
    /**
311
     * @param bool $count
312
     *
313
     * @return \Doctrine\DBAL\Driver\Statement
314
     */
315
    private function getStatementContentAll($count = false)
316
    {
317
        $q = $this->connection->createQueryBuilder()
318
            ->select($count ? 'count(c.id)' : 'c.id')
319
            ->from('ezcontentobject', 'c')
320
            ->where('c.status = :status')
321
            ->setParameter('status', ContentInfo::STATUS_PUBLISHED, PDO::PARAM_INT);
322
323
        return $q->execute();
324
    }
325
326
    /**
327
     * @param \Doctrine\DBAL\Driver\Statement $stmt
328
     * @param int $iterationCount
329
     *
330
     * @return \Generator Return an array of arrays, each array contains content id's of $iterationCount.
331
     */
332
    private function fetchIteration(Statement $stmt, $iterationCount)
333
    {
334
        do {
335
            $contentIds = [];
336 View Code Duplication
            for ($i = 0; $i < $iterationCount; ++$i) {
0 ignored issues
show
Duplication introduced by
This code seems to be duplicated across your project.

Duplicated code is one of the most pungent code smells. If you need to duplicate the same code in three or more different places, we strongly encourage you to look into extracting the code into a single class or operation.

You can also find more detailed suggestions in the “Code” section of your repository.

Loading history...
337
                if ($contentId = $stmt->fetch(PDO::FETCH_COLUMN)) {
338
                    $contentIds[] = $contentId;
339
                } else {
340
                    break;
341
                }
342
            }
343
344
            yield $contentIds;
345
        } while (!empty($contentId));
346
    }
347
348
    /**
349
     * @param array $contentIds
350
     * @param bool $commit
351
     *
352
     * @return \Symfony\Component\Process\Process
353
     */
354
    private function getPhpProcess(array $contentIds, $commit)
355
    {
356
        $process = new ProcessBuilder([
357
            file_exists('bin/console') ? 'bin/console' : 'app/console',
358
            'ezplatform:reindex',
359
            '--content-ids=' . implode(',', $contentIds),
360
        ]);
361
        $process->setTimeout(null);
362
        $process->setPrefix($this->getPhpPath());
363
364
        if (!$commit) {
365
            $process->add('--no-commit');
366
        }
367
368
        return $process->getProcess();
369
    }
370
371
    /**
372
     * @return string
373
     */
374
    private function getPhpPath()
375
    {
376
        if ($this->phpPath) {
377
            return $this->phpPath;
378
        }
379
380
        $phpFinder = new PhpExecutableFinder();
381
        $this->phpPath = $phpFinder->find();
0 ignored issues
show
Documentation Bug introduced by
It seems like $phpFinder->find() can also be of type false. However, the property $phpPath is declared as type string. Maybe add an additional type check?

Our type inference engine has found a suspicous assignment of a value to a property. This check raises an issue when a value that can be of a mixed type is assigned to a property that is type hinted more strictly.

For example, imagine you have a variable $accountId that can either hold an Id object or false (if there is no account id yet). Your code now assigns that value to the id property of an instance of the Account class. This class holds a proper account, so the id value must no longer be false.

Either this assignment is in error or a type check should be added for that assignment.

class Id
{
    public $id;

    public function __construct($id)
    {
        $this->id = $id;
    }

}

class Account
{
    /** @var  Id $id */
    public $id;
}

$account_id = false;

if (starsAreRight()) {
    $account_id = new Id(42);
}

$account = new Account();
if ($account instanceof Id)
{
    $account->id = $account_id;
}
Loading history...
382
        if (!$this->phpPath) {
0 ignored issues
show
Bug Best Practice introduced by
The expression $this->phpPath of type string|false is loosely compared to false; this is ambiguous if the string can be empty. You might want to explicitly use === false instead.

In PHP, under loose comparison (like ==, or !=, or switch conditions), values of different types might be equal.

For string values, the empty string '' is a special case, in particular the following results might be unexpected:

''   == false // true
''   == null  // true
'ab' == false // false
'ab' == null  // false

// It is often better to use strict comparison
'' === false // false
'' === null  // false
Loading history...
383
            throw new \RuntimeException(
384
                'The php executable could not be found, it\'s needed for executing parable sub processes, so add it to your PATH environment variable and try again'
385
            );
386
        }
387
    }
388
389
    /**
390
     * @return int
391
     */
392
    private function getNumberOfCPUCores()
393
    {
394
        $cores = 1;
395
        if (is_file('/proc/cpuinfo')) {
396
            // Linux (and potentially Windows with linux sub systems)
397
            $cpuinfo = file_get_contents('/proc/cpuinfo');
398
            preg_match_all('/^processor/m', $cpuinfo, $matches);
399
            $cores = count($matches[0]);
400
        } elseif (DIRECTORY_SEPARATOR === '\\') {
401
            // Windows
402
            if (($process = @popen('wmic cpu get NumberOfCores', 'rb')) !== false) {
403
                fgets($process);
404
                $cores = (int) fgets($process);
405
                pclose($process);
406
            }
407
        } elseif (($process = @popen('sysctl -a', 'rb')) !== false) {
408
            // *nix (Linux, BSD and Mac)
409
            $output = stream_get_contents($process);
410
            if (preg_match('/hw.ncpu: (\d+)/', $output, $matches)) {
411
                $cores = (int) $matches[1][0];
412
            }
413
            pclose($process);
414
        }
415
416
        return $cores;
417
    }
418
}
419