Passed
Push — typo3v9 ( 2404ee...b9b5fa )
by Tomas Norre
05:51
created

SubProcessExecutionStrategy::executeShellCommand()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 3
Code Lines 1

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 1
Bugs 0 Features 0
Metric Value
cc 1
eloc 1
c 1
b 0
f 0
nc 1
nop 1
dl 0
loc 3
ccs 0
cts 3
cp 0
crap 2
rs 10
1
<?php
2
3
declare(strict_types=1);
4
5
namespace AOE\Crawler\CrawlStrategy;
6
7
/*
8
 * (c) 2020 AOE GmbH <[email protected]>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21
22
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
23
use AOE\Crawler\Utility\PhpBinaryUtility;
24
use Psr\Http\Message\UriInterface;
25
use Psr\Log\LoggerAwareInterface;
26
use Psr\Log\LoggerAwareTrait;
27
use TYPO3\CMS\Core\Core\Environment;
28
use TYPO3\CMS\Core\Utility\CommandUtility;
29
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility;
30
use TYPO3\CMS\Core\Utility\GeneralUtility;
31
32
/**
33
 * Executes another process via shell_exec() to include cli/bootstrap.php which in turn
34
 * includes the index.php for frontend.
35
 */
36
class SubProcessExecutionStrategy implements LoggerAwareInterface
37
{
38
    use LoggerAwareTrait;
39
40
    /**
41
     * @var array
42
     */
43
    protected $extensionSettings;
44
45 1
    public function __construct(?ExtensionConfigurationProvider $configurationProvider = null)
46
    {
47 1
        $configurationProvider = $configurationProvider ?? GeneralUtility::makeInstance(ExtensionConfigurationProvider::class);
48 1
        $settings = $configurationProvider->getExtensionConfiguration();
49 1
        $this->extensionSettings = is_array($settings) ? $settings : [];
0 ignored issues
show
introduced by
The condition is_array($settings) is always true.
Loading history...
50 1
    }
51
52
    /**
53
     * Fetches a URL by calling a shell script.
54
     *
55
     * @param UriInterface $url
56
     * @param string $crawlerId
57
     * @return array|bool|mixed
58
     */
59
    public function fetchUrlContents(UriInterface $url, string $crawlerId)
60
    {
61
        $url = (string)$url;
62
        $parsedUrl = parse_url($url);
63
64
        if ($parsedUrl === false) {
65
            $this->logger->debug(
66
                sprintf('Could not parse_url() for string "%s"', $url),
67
                ['crawlerId' => $crawlerId]
68
            );
69
            return false;
70
        }
71
72
        if (!in_array($parsedUrl['scheme'], ['', 'http', 'https'])) {
73
            $this->logger->debug(
74
                sprintf('Scheme does not match for url "%s"', $url),
75
                ['crawlerId' => $crawlerId]
76
            );
77
            return false;
78
        }
79
80
        if (!is_array($parsedUrl)) {
0 ignored issues
show
introduced by
The condition is_array($parsedUrl) is always true.
Loading history...
81
            return [];
82
        }
83
84
        $requestHeaders = $this->buildRequestHeaders($parsedUrl, $crawlerId);
85
86
        $commandParts = [
87
            ExtensionManagementUtility::extPath('crawler') . 'cli/bootstrap.php',
88
            $this->getFrontendBasePath(),
89
            $url,
90
            base64_encode(serialize($requestHeaders)),
91
        ];
92
        $commandParts = CommandUtility::escapeShellArguments($commandParts);
93
        $cmd = escapeshellcmd(PhpBinaryUtility::getPhpBinary());
94
        $cmd .= ' ' . implode(' ', $commandParts);
95
96
        $startTime = microtime(true);
97
        $content = $this->executeShellCommand($cmd);
98
        $this->logger->info($url . ' ' . (microtime(true) - $startTime));
99
100
        return unserialize($content);
101
    }
102
103
    protected function buildRequestHeaders(array $url, string $crawlerId)
104
    {
105
        $reqHeaders = [];
106
        $reqHeaders[] = 'GET ' . $url['path'] . ($url['query'] ? '?' . $url['query'] : '') . ' HTTP/1.0';
107
        $reqHeaders[] = 'Host: ' . $url['host'];
108
        $reqHeaders[] = 'Connection: close';
109
        if ($url['user'] != '') {
110
            $reqHeaders[] = 'Authorization: Basic ' . base64_encode($url['user'] . ':' . $url['pass']);
111
        }
112
        $reqHeaders[] = 'X-T3crawler: ' . $crawlerId;
113
        $reqHeaders[] = 'User-Agent: TYPO3 crawler';
114
        return $reqHeaders;
115
    }
116
117
    /**
118
     * Executes a shell command and returns the outputted result.
119
     *
120
     * @param string $command Shell command to be executed
121
     * @return string Outputted result of the command execution
122
     */
123
    protected function executeShellCommand($command)
124
    {
125
        return shell_exec($command);
126
    }
127
128
    /**
129
     * Gets the base path of the website frontend.
130
     * (e.g. if you call http://mydomain.com/cms/index.php in
131
     * the browser the base path is "/cms/")
132
     *
133
     * @return string Base path of the website frontend
134
     */
135
    protected function getFrontendBasePath()
136
    {
137
        $frontendBasePath = '/';
138
139
        // Get the path from the extension settings:
140
        if (isset($this->extensionSettings['frontendBasePath']) && $this->extensionSettings['frontendBasePath']) {
141
            $frontendBasePath = $this->extensionSettings['frontendBasePath'];
142
        // If empty, try to use config.absRefPrefix:
143
        } elseif (isset($GLOBALS['TSFE']->absRefPrefix) && !empty($GLOBALS['TSFE']->absRefPrefix)) {
144
            $frontendBasePath = $GLOBALS['TSFE']->absRefPrefix;
145
        // If not in CLI mode the base path can be determined from $_SERVER environment:
146
        } elseif (!Environment::isCli()) {
147
            $frontendBasePath = GeneralUtility::getIndpEnv('TYPO3_SITE_PATH');
148
        }
149
150
        // Base path must be '/<pathSegements>/':
151
        if ($frontendBasePath !== '/') {
152
            $frontendBasePath = '/' . ltrim($frontendBasePath, '/');
153
            $frontendBasePath = rtrim($frontendBasePath, '/') . '/';
154
        }
155
156
        return $frontendBasePath;
157
    }
158
}
159