Issues (48)

src/Tasks/ReviewInternalLinks.php (4 issues)

1
<?php
2
3
namespace Sunnysideup\MigrateData\Tasks;
4
5
use SilverStripe\CMS\Model\SiteTree;
0 ignored issues
show
The type SilverStripe\CMS\Model\SiteTree was not found. Maybe you did not declare it correctly or list all dependencies?

The issue could also be caused by a filter entry in the build configuration. If the path has been excluded in your configuration, e.g. excluded_paths: ["lib/*"], you can move it to the dependency path list as follows:

filter:
    dependency_paths: ["lib/*"]

For further information see https://scrutinizer-ci.com/docs/tools/php/php-scrutinizer/#list-dependency-paths

Loading history...
6
use SilverStripe\Core\Environment;
7
use SilverStripe\ORM\DataList;
8
use SilverStripe\ORM\DB;
9
10
class ReviewInternalLinks extends MigrateDataTaskBase
11
{
12
    protected $title = 'Review Internal Links';
13
14
    protected $description = 'Goes through all the imported content and reviews internal links';
15
16
    protected $type = 0;
17
18
    protected $count = 0;
19
20
    protected $step = 10;
21
22
    protected $filter = 10;
23
24
    protected $allLinks = [];
25
26
    protected $replacements = [];
27
28
    protected $fieldsToTest = [
29
        'Content',
30
    ];
31
32
    protected static $filtered_class_names = [];
33
34
    public function run($request)
35
    {
36
        Environment::increaseTimeLimitTo();
37
        Environment::increaseMemoryLimitTo();
38
39
        $tableHTML = '
40
            <table>
41
                <thead>
42
                    <tr>
43
                        <th>Count</th>
44
                        <th>Links</th>
45
                        <th>Title</th>
46
                        <th>Content</th>
47
                    </tr>
48
                </thead>
49
                <tbody>
50
        ';
51
        $ids = [];
52
        if ($request->getVar('ids')) {
53
            $ids = explode(',', (string) $request->getVar('ids'));
54
        }
55
        if ($request->getVar('type')) {
56
            $this->type = $request->getVar('type');
57
        }
58
        $objects = DataList::create();
0 ignored issues
show
The assignment to $objects is dead and can be removed.
Loading history...
59
        if (count($ids)) {
60
            echo $tableHTML;
61
            $objects = SiteTree::get()->sort(['ID' => 'ASC'])->filter(['ID' => $ids]);
62
            foreach ($objects as $object) {
63
                $this->printFields($object);
64
            }
65
        } else {
66
            echo '
67
                By default 200 random pages are loaded.
68
69
            ';
70
71
            echo $tableHTML;
72
73
            if ($request->getVar('page')) {
74
                if ('all' === $request->getVar('page')) {
75
                    $isPage = false;
76
                    $limit = 5000;
77
                    $this->step = 10;
78
                    $start = 0;
79
                } else {
80
                    $isPage = true;
81
                    $limit = 500;
82
                    $start = $limit * ((int) $request->getVar('page') - 1);
83
                    echo '<h1>Page: ' . (int) $request->getVar('page') . '</h1>';
84
                }
85
            } else {
86
                $isPage = false;
87
                $limit = 50;
88
                $this->step = 51;
89
                $start = 0;
90
                echo '<h1>Random Selection</h1>';
91
            }
92
            for ($i = 0; $i < $limit; $i += $this->step) {
93
                $objects = null;
94
                if ($isPage) {
95
                    $objects = SiteTree::get()->sort(['ID' => 'ASC'])->limit($this->step, $i + $start);
96
                }
97
                $filter = $this->Config()->get('filtered_class_names');
98
                if (! empty($filter)) {
99
                    $objects = $objects->filter($filter);
100
                }
101
                foreach ($objects as $object) {
102
                    $this->printFields($object);
103
                }
104
            }
105
        }
106
        $linksAll = [];
107
        ksort($this->allLinks);
108
        foreach ($this->allLinks as $url => $details) {
109
            $linksAll[] = $url . ' | ' . $details['count'];
110
        }
111
        echo  '
112
                    <tr>
113
                        <th>---</th>
114
                        <th>---</th>
115
                        <th>Full List of Links</th>
116
                        <th>
117
                        <ul>
118
                            <li>
119
                                ' . implode('</li><li>', $linksAll) . '
120
                            </li>
121
                        </ul>
122
                        </th>
123
                    </tr>
124
        ';
125
        $replacementsAll = [];
126
        ksort($this->replacements);
127
        foreach ($this->replacements as $details) {
128
            $replacementsAll[] = 'FR: ' . $details['from'] . '<br />TO: ' . $details['to'] . '<br />RS: ' . $details['result'] . '<br /><br />';
129
        }
130
        echo  '
131
                    <tr>
132
                        <th>---</th>
133
                        <th>---</th>
134
                        <th>Full List of Links</th>
135
                        <th>
136
                        <ul>
137
                            <li>
138
                                ' . implode('</li><li>', $replacementsAll) . '
139
                            </li>
140
                        </ul>
141
                        </th>
142
                    </tr>
143
        ';
144
        echo '</tbody></table>';
145
    }
146
147
    public function printFields($object)
148
    {
149
        ++$this->count;
150
        $links = [];
151
        foreach ($this->fieldsToTest as $field) {
152
            if (! empty($object->{$field})) {
153
                $dom = new \DOMDocument();
154
155
                @$dom->loadHTML(
0 ignored issues
show
Security Best Practice introduced by
It seems like you do not handle an error condition for loadHTML(). This can introduce security issues, and is generally not recommended. ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-unhandled  annotation

155
                /** @scrutinizer ignore-unhandled */ @$dom->loadHTML(

If you suppress an error, we recommend checking for the error condition explicitly:

// For example instead of
@mkdir($dir);

// Better use
if (@mkdir($dir) === false) {
    throw new \RuntimeException('The directory '.$dir.' could not be created.');
}
Loading history...
156
                    mb_convert_encoding($object->{$field}, 'HTML-ENTITIES', 'UTF-8'),
0 ignored issues
show
It seems like mb_convert_encoding($obj...TML-ENTITIES', 'UTF-8') can also be of type array; however, parameter $source of DOMDocument::loadHTML() does only seem to accept string, maybe add an additional type check? ( Ignorable by Annotation )

If this is a false-positive, you can also ignore this issue in your code via the ignore-type  annotation

156
                    /** @scrutinizer ignore-type */ mb_convert_encoding($object->{$field}, 'HTML-ENTITIES', 'UTF-8'),
Loading history...
157
                    LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD
158
                );
159
                // if (! $dom) {
160
                //     $links[] = 'Error 1 in ' . $field;
161
                //
162
                //     continue;
163
                // }
164
                if (! $this->type || 'a' === strtolower($this->type)) {
165
                    $hrefs = $dom->getElementsByTagName('a');
166
                    for ($i = 0; $i < $hrefs->length; ++$i) {
167
                        $href = $hrefs->item($i);
168
                        $url = $href->getAttribute('href');
169
                        $this->cleanupLittleMistake($object, $field, $url);
170
171
                        $links[$url] = $url . ' | A | ' . $field;
172
173
                        if (! isset($this->allLinks[$url])) {
174
                            $this->allLinks[$url] = [
175
                                'count' => 0,
176
                                'type' => 'A',
177
                            ];
178
                        }
179
                        ++$this->allLinks[$url]['count'];
180
                    }
181
                }
182
                if (! $this->type || 'img' === strtolower($this->type)) {
183
                    $hrefs = $dom->getElementsByTagName('img');
184
                    for ($i = 0; $i < $hrefs->length; ++$i) {
185
                        $href = $hrefs->item($i);
186
                        $url = $href->getAttribute('src');
187
                        $this->cleanupLittleMistake($object, $field, $url);
188
189
                        $links[$url] = $url . ' | IMG | ' . $field;
190
191
                        if (! isset($this->allLinks[$url])) {
192
                            $this->allLinks[$url] = [
193
                                'count' => 0,
194
                                'type' => 'IMG',
195
                            ];
196
                        }
197
                        ++$this->allLinks[$url]['count'];
198
                    }
199
                } else {
200
                    $links[] = 'Error 2 in ' . $field;
201
                }
202
            }
203
        }
204
        echo
205
        '<tr>
206
            <td>' . $this->count . '</td>
207
            <td>
208
                <h6><a href="' . $object->CMSEditLink() . '">CMS</></h6>
209
                <h6><a href="' . $object->Link() . '">Site</a></h6>
210
            </td>
211
            <td>
212
                ' . $object->Title . '
213
            </td>
214
            <td>
215
                <ul>
216
                    <li>
217
                        ' . implode('</li><li>', $links) . '
218
                    </li>
219
                </ul>
220
            </td>
221
        </tr>';
222
    }
223
224
    protected function performMigration()
225
    {
226
    }
227
228
    private function cleanupLittleMistake($object, $field, $url)
229
    {
230
        $oldNeedles = [
231
            'assets/oldsite/assets/' => 'images/assets/',
232
            'assets/oldsite/' => 'images/',
233
        ];
234
        foreach ($oldNeedles as $oldNeedle => $newNeedle) {
235
            if (false !== strpos($url, $oldNeedle)) {
236
                $replacementURL = str_replace($oldNeedle, $newNeedle, $url);
237
                $table = '';
238
                // if($this->urlExists($url) === false && $this->urlExists($replacementURL) === true) {
239
                if (strpos($url, "'")) {
240
                    user_error('bad url: ' . $url);
241
                }
242
                if ('Content' === $field) {
243
                    $table = 'SiteTree';
244
                }
245
                foreach (['', '_Live', '_Versions'] as $stage) {
246
                    DB::query('
247
                        UPDATE "' . $table . $stage . '"
248
                        SET "' . $field . '" = REPLACE(
249
                            "' . $field . '",
250
                            \'' . $url . '\',
251
                            \'' . $replacementURL . '\'
252
                        )
253
                        WHERE ID = ' . $object->ID . ';
254
                    ');
255
                }
256
                $this->replacements[] = [
257
                    'from' => $url,
258
                    'to' => $replacementURL,
259
                    'result' => SiteTree::get_by_id($object->ID)->{$field},
260
                ];
261
            }
262
        }
263
    }
264
}
265