ParseTopicsCommand   A
last analyzed

Complexity

Total Complexity 18

Size/Duplication

Total Lines 180
Duplicated Lines 0 %

Coupling/Cohesion

Components 0
Dependencies 10

Test Coverage

Coverage 0%

Importance

Changes 2
Bugs 1 Features 0
Metric Value
wmc 18
c 2
b 1
f 0
lcom 0
cbo 10
dl 0
loc 180
ccs 0
cts 135
cp 0
rs 10

2 Methods

Rating   Name   Duplication   Size   Complexity  
A configure() 0 4 1
D execute() 0 169 17
1
<?php
2
3
namespace Comrade42\PhpBBParser\Command;
4
5
use Comrade42\PhpBBParser\Helper\BBCodes\PCREParser;
6
use Goutte\Client;
7
use Symfony\Component\BrowserKit\Cookie;
8
use Symfony\Component\Console\Input\InputInterface;
9
use Symfony\Component\Console\Output\OutputInterface;
10
use Symfony\Component\DomCrawler\Crawler;
11
12
/**
13
 * Class ParseTopicsCommand
14
 * @package Comrade42\PhpBBParser\Command
15
 */
16
class ParseTopicsCommand extends ContainerAwareCommand
17
{
18
    const TOPICS_PER_PAGE   = 50;
19
    const POSTS_PER_PAGE    = 15;
20
21
    protected function configure()
22
    {
23
        $this->setName('parse:topics')->setDescription('Parse topics');
24
    }
25
26
    protected function execute(InputInterface $input, OutputInterface $output)
27
    {
28
        /** @var \Symfony\Component\Console\Helper\DialogHelper $dialog */
29
        $dialog = $this->getHelperSet()->get('dialog');
30
        /** @var \Doctrine\ORM\EntityManager $entityManager */
31
        $entityManager = $this->container->get('doctrine');
32
        /** @var \Comrade42\PhpBBParser\Bridge\BridgeInterface $entityBridge */
33
        $entityBridge = $this->container->get('bridge');
34
35
        $choices = array();
36
        foreach ($entityBridge->getForumList($entityManager) as $forum)
37
        {
38
            $choices[$forum->getId()] = $forum->getTitle();
39
        }
40
41
        ksort($choices);
42
        $forumId = $dialog->select($output, 'Please select forum for parsing:', $choices);
43
44
        $client = new Client();
45
        $client->getCookieJar()->set(new Cookie(
46
            $this->container->getParameter('fa_sid_cookie_name'),
47
            $this->container->getParameter('fa_sid_cookie_value')
48
        ));
49
50
        $baseUrl = rtrim($this->container->getParameter('forum_url'), '/');
51
52
        for ($offset = 0; ; $offset += static::TOPICS_PER_PAGE)
0 ignored issues
show
Comprehensibility Bug introduced by
Loop incrementor ($offset) jumbling with inner loop
Loading history...
53
        {
54
            $url = "{$baseUrl}/f{$forumId}p{$offset}-forum";
55
            $output->write(str_pad("<info>⇒ HTTP GET: {$url}</info> ", 80));
56
57
            $crawler = $client->request('GET', $url);
58
59
            $status = $client->getInternalResponse()->getStatus();
60
            $output->writeln("<info>[{$status}]</info>");
61
62
            if ($status != 200)
63
            {
64
                $offset -= static::TOPICS_PER_PAGE;
65
                sleep(1);
66
                continue;
67
            }
68
69
            $crawler = $crawler->filter('#main-content ul.topics li.row');
70
            $crawler->each(function (Crawler $node, $index) use ($output, $entityManager, $entityBridge, $client, $baseUrl, $forumId)
71
            {
72
                $link = $node->filter('dd.dterm a.topictitle');
73
                $topicId = intval(substr($link->attr('href'), 2, -6));
74
                $title = $link->text();
75
76
                $link = $node->filter('dd.dterm span.span-tab a');
77
                if ($link->count() == 1) $authorId = intval(substr($link->attr('href'), 2));
78
                else $authorId = 0;
79
80
                preg_match("/background-image:url\('(.+)'\);/iU", $node->filter('dl.icon')->attr('style'), $matches);
81
                $matches = explode('/', $matches[1]);
82
                $icon = end($matches);
83
84
                switch ($icon)
85
                {
86
                    case 'topic_unread_locked.gif':
87
                    case 'topic_read_locked.gif':
88
                        $isSticky = false;
89
                        $isLocked = true;
90
                        break;
91
92
                    case 'announce_read.gif':
93
                    case 'folder_announce.gif':
94
                    case 'sticky_read.gif':
95
                        $isSticky = true;
96
                        $isLocked = false;
97
                        break;
98
99
                    default:
100
                        $isSticky = false;
101
                        $isLocked = false;
102
                }
103
104
                $links = $node->filter('dd.lastpost a');
105
                if ($links->count() == 2) {
106
                    $memberUpdatedId = intval(substr($links->first()->attr('href'), 2));
107
                    $lastMessageHref = $links->last()->attr('href');
108
                } else {
109
                    $memberUpdatedId = 0;
110
                    $lastMessageHref = $links->first()->attr('href');
111
                }
112
113
                $topic = $entityBridge->getTopicEntity($entityManager, $topicId);
114
                $topic->setForumId($forumId)
115
                    ->setMemberStartedId($authorId)
116
                    ->setMemberUpdatedId($memberUpdatedId)
117
                    ->setLastMessageId(intval(substr($lastMessageHref, strrpos($lastMessageHref, '#') + 1)))
118
                    ->setTitle($title)
119
                    ->setIsSticky($isSticky)
120
                    ->setIsLocked($isLocked)
121
                    ->setRepliesNumber(intval($node->filter('dd.posts')->text()))
122
                    ->setViewsNumber(intval($node->filter('dd.views')->text()))
123
                    ->setOrder($index + 1);
124
125
                for ($offset = 0; ; $offset += ParseTopicsCommand::POSTS_PER_PAGE)
126
                {
127
                    $url = "{$baseUrl}/t{$topicId}p{$offset}-topic";
128
                    $output->write(str_pad("<info>⇒ HTTP GET: {$url}</info> ", 80));
129
130
                    $crawler = $client->request('GET', $url);
131
132
                    $status = $client->getInternalResponse()->getStatus();
133
                    $output->writeln("<info>[{$status}]</info>");
134
135
                    if ($status != 200)
136
                    {
137
                        $offset -= ParseTopicsCommand::POSTS_PER_PAGE;
138
                        sleep(1);
139
                        continue;
140
                    }
141
142
                    /** @var \Symfony\Component\DomCrawler\Crawler $crawler */
143
                    $crawler = $crawler->filter('#main-content div.post')->reduce(function (Crawler $node)
144
                    {
145
                        return is_numeric(substr($node->attr('id'), 1));
146
                    });
147
148
                    if ($offset == 0)
149
                    {
150
                        $topic->setFirstMessageId(intval(substr($crawler->first()->attr('id'), 1)));
151
                        $entityManager->persist($topic);
152
                    }
153
154
                    $crawler->each(function (Crawler $node) use ($output, $entityManager, $entityBridge, $forumId, $topicId)
155
                    {
156
                        $postId = substr($node->attr('id'), 1);
157
158
                        $contentRaw = $node->filter('div.postbody div.content div')->html();
159
                        $contentParsed = trim(PCREParser::parseAll($contentRaw));
160
                        $contentClean = strip_tags($contentParsed);
161
162
                        if ($contentClean != $contentParsed) {
163
                            $output->writeln("\t- parsing problems with post #{$postId}");
164
                        }
165
166
                        $link = $node->filter('div.postbody p.author a');
167
                        $authorId = intval(substr($link->attr('href'), 2));
168
                        $authorName = trim($link->text());
169
170
                        $text = trim($node->filter('div.postbody p.author')->text(), " \t\n\r\0\x0B\xC2\xA0");
171
                        $text = str_replace($authorName . ' в ', '', $text);
172
                        $dateTime = new \DateTime($text);
173
174
                        $post = $entityBridge->getPostEntity($entityManager, $postId);
175
                        $post->setForumId($forumId)
176
                            ->setTopicId($topicId)
177
                            ->setAuthorId($authorId)
178
                            ->setAuthorName($authorName)
179
                            ->setCreateDate($dateTime)
180
                            ->setSubject(trim($node->filter('div.postbody h2.topic-title')->text()))
181
                            ->setContent($contentParsed);
182
183
                        $entityManager->persist($post);
184
                    });
185
186
                    if ($crawler->count() < ParseTopicsCommand::POSTS_PER_PAGE) break;
187
                }
188
189
                $entityManager->flush();
190
            });
191
192
            if ($crawler->count() < static::TOPICS_PER_PAGE) break;
193
        }
194
    }
195
}
196