Completed
Push — typo3v9 ( efc308...b63611 )
by Tomas Norre
06:24
created

SubProcessExecutionStrategy::executeShellCommand()   A

Complexity

Conditions 1
Paths 1

Size

Total Lines 4

Duplication

Lines 0
Ratio 0 %

Code Coverage

Tests 0
CRAP Score 2

Importance

Changes 0
Metric Value
cc 1
nc 1
nop 1
dl 0
loc 4
ccs 0
cts 4
cp 0
crap 2
rs 10
c 0
b 0
f 0
1
<?php
2
declare(strict_types=1);
3
namespace AOE\Crawler\CrawlStrategy;
4
5
/*
6
 * This file is part of the TYPO3 Crawler Extension.
7
 *
8
 * It is free software; you can redistribute it and/or modify it under
9
 * the terms of the GNU General Public License, either version 2
10
 * of the License, or any later version.
11
 *
12
 * For the full copyright and license information, please read the
13
 * LICENSE.txt file that was distributed with this source code.
14
 *
15
 * The TYPO3 project - inspiring people to share!
16
 */
17
18
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
19
use Psr\Http\Message\UriInterface;
20
use Psr\Log\LoggerAwareInterface;
21
use Psr\Log\LoggerAwareTrait;
22
use TYPO3\CMS\Core\Core\Environment;
23
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility;
24
use TYPO3\CMS\Core\Utility\GeneralUtility;
25
26
/**
27
 * Executes another process via shell_exec() to include cli/bootstrap.php which in turn
28
 * includes the index.php for frontend.
29
 */
30
class SubProcessExecutionStrategy implements LoggerAwareInterface
31
{
32
    use LoggerAwareTrait;
33
34
    /**
35
     * @var array
36
     */
37
    protected $extensionSettings;
38
39 1
    public function __construct(ExtensionConfigurationProvider $configurationProvider = null)
40
    {
41 1
        $configurationProvider = $configurationProvider ?? GeneralUtility::makeInstance(ExtensionConfigurationProvider::class);
42 1
        $settings = $configurationProvider->getExtensionConfiguration();
43 1
        $this->extensionSettings = is_array($settings) ? $settings : [];
44 1
    }
45
46
    /**
47
     * Fetches a URL by calling a shell script.
48
     *
49
     * @param UriInterface $url
50
     * @param string $crawlerId
51
     * @return array|bool|mixed
52
     */
53
    public function fetchUrlContents(UriInterface $url, string $crawlerId)
54
    {
55
        $url = (string)$url;
56
        $parsedUrl = parse_url($url);
57
58
        if ($parsedUrl === false) {
59
            $this->logger->debug(
60
                sprintf('Could not parse_url() for string "%s"', $url),
61
                ['crawlerId' => $crawlerId]
62
            );
63
            return false;
64
        }
65
66
        if (!in_array($parsedUrl['scheme'], ['','http','https'])) {
67
            $this->logger->debug(
68
                sprintf('Scheme does not match for url "%s"', $url),
69
                ['crawlerId' => $crawlerId]
70
            );
71
            return false;
72
        }
73
74
        if (!is_array($parsedUrl)) {
75
            return [];
76
        }
77
78
        $requestHeaders = $this->buildRequestHeaders($parsedUrl, $crawlerId);
79
80
        $cmd = escapeshellcmd($this->extensionSettings['phpPath']);
81
        $cmd .= ' ';
82
        $cmd .= escapeshellarg(ExtensionManagementUtility::extPath('crawler') . 'cli/bootstrap.php');
83
        $cmd .= ' ';
84
        $cmd .= escapeshellarg($this->getFrontendBasePath());
85
        $cmd .= ' ';
86
        $cmd .= escapeshellarg($url);
87
        $cmd .= ' ';
88
        $cmd .= escapeshellarg(base64_encode(serialize($requestHeaders)));
89
90
        $startTime = microtime(true);
91
        $content = $this->executeShellCommand($cmd);
92
        $this->logger->info($url . ' ' . (microtime(true) - $startTime));
93
94
        return unserialize($content);
95
    }
96
97
    protected function buildRequestHeaders(array $url, string $crawlerId)
98
    {
99
        $reqHeaders = [];
100
        $reqHeaders[] = 'GET ' . $url['path'] . ($url['query'] ? '?' . $url['query'] : '') . ' HTTP/1.0';
101
        $reqHeaders[] = 'Host: ' . $url['host'];
102
        if (stristr($url['query'], 'ADMCMD_previewWS')) {
103
            $reqHeaders[] = 'Cookie: $Version="1"; be_typo_user="1"; $Path=/';
104
        }
105
        $reqHeaders[] = 'Connection: close';
106
        if ($url['user'] != '') {
107
            $reqHeaders[] = 'Authorization: Basic ' . base64_encode($url['user'] . ':' . $url['pass']);
108
        }
109
        $reqHeaders[] = 'X-T3crawler: ' . $crawlerId;
110
        $reqHeaders[] = 'User-Agent: TYPO3 crawler';
111
        return $reqHeaders;
112
    }
113
114
115
    /**
116
     * Executes a shell command and returns the outputted result.
117
     *
118
     * @param string $command Shell command to be executed
119
     * @return string Outputted result of the command execution
120
     */
121
    protected function executeShellCommand($command)
122
    {
123
        return shell_exec($command);
124
    }
125
126
127
    /**
128
     * Gets the base path of the website frontend.
129
     * (e.g. if you call http://mydomain.com/cms/index.php in
130
     * the browser the base path is "/cms/")
131
     *
132
     * @return string Base path of the website frontend
133
     */
134
    protected function getFrontendBasePath()
135
    {
136
        $frontendBasePath = '/';
137
138
        // Get the path from the extension settings:
139
        if (isset($this->extensionSettings['frontendBasePath']) && $this->extensionSettings['frontendBasePath']) {
140
            $frontendBasePath = $this->extensionSettings['frontendBasePath'];
141
            // If empty, try to use config.absRefPrefix:
142
        } elseif (isset($GLOBALS['TSFE']->absRefPrefix) && !empty($GLOBALS['TSFE']->absRefPrefix)) {
143
            $frontendBasePath = $GLOBALS['TSFE']->absRefPrefix;
144
            // If not in CLI mode the base path can be determined from $_SERVER environment:
145
        } elseif (!Environment::isCli()) {
146
            $frontendBasePath = GeneralUtility::getIndpEnv('TYPO3_SITE_PATH');
147
        }
148
149
        // Base path must be '/<pathSegements>/':
150
        if ($frontendBasePath !== '/') {
151
            $frontendBasePath = '/' . ltrim($frontendBasePath, '/');
152
            $frontendBasePath = rtrim($frontendBasePath, '/') . '/';
153
        }
154
155
        return $frontendBasePath;
156
    }
157
}
158