1
|
|
|
<?php |
2
|
|
|
|
3
|
|
|
namespace Cp\Parser; |
4
|
|
|
|
5
|
|
|
use Cp\Http\HeaderParser; |
6
|
|
|
use PHPHtmlParser\Dom; |
7
|
|
|
|
8
|
|
|
/** |
9
|
|
|
* Class PlanParser |
10
|
|
|
*/ |
11
|
|
|
class PlanParser |
12
|
|
|
{ |
13
|
|
|
/** |
14
|
|
|
* @var Dom |
15
|
|
|
*/ |
16
|
|
|
private $parser; |
17
|
|
|
|
18
|
|
|
/** |
19
|
|
|
* @var HeaderParser |
20
|
|
|
*/ |
21
|
|
|
private $headerParser; |
22
|
|
|
|
23
|
|
|
/** |
24
|
|
|
* CpParser constructor. |
25
|
|
|
* |
26
|
|
|
* @param Dom $parser |
27
|
|
|
* @param HeaderParser $headerParser |
28
|
|
|
*/ |
29
|
3 |
|
public function __construct(Dom $parser, HeaderParser $headerParser) |
30
|
|
|
{ |
31
|
3 |
|
$this->parser = $parser; |
|
|
|
|
32
|
3 |
|
$this->headerParser = $headerParser; |
33
|
3 |
|
} |
34
|
|
|
|
35
|
|
|
/** |
36
|
|
|
* @param string $url |
37
|
|
|
* |
38
|
|
|
* @return string |
39
|
|
|
* @throws \Exception |
40
|
|
|
*/ |
41
|
2 |
|
public function parseToJson($url) |
42
|
|
|
{ |
43
|
2 |
|
$htmlContent = file_get_contents($url); |
44
|
2 |
|
$this->generateBadRequestException($url); |
45
|
|
|
|
46
|
2 |
|
$this->parser->load($htmlContent); |
47
|
|
|
|
48
|
2 |
|
$weeks = $this->parser->find('#plans table'); |
49
|
2 |
|
if (0 >= $weeks->count()) { |
50
|
1 |
|
throw new \Exception(sprintf('Plan not found for this url: %s', $url)); |
51
|
|
|
} |
52
|
|
|
|
53
|
1 |
|
$plan = $this->getPlan(); |
54
|
1 |
|
foreach ($weeks as $training) { |
|
|
|
|
55
|
1 |
|
$week = ['name' => $training->find('thead tr')->find('td')[1]->innerHtml]; |
56
|
1 |
|
foreach ($training->find('tbody tr') as $seance) { |
57
|
|
|
$training = [ |
|
|
|
|
58
|
1 |
|
'type' => strip_tags($seance->find('td')[0]->innerHtml), |
59
|
1 |
|
'content' => strip_tags($seance->find('td')[1]->innerHtml), |
60
|
1 |
|
]; |
61
|
1 |
|
$week['trainings'][] = $training; |
62
|
1 |
|
} |
63
|
1 |
|
$plan['weeks'][] = $week; |
64
|
1 |
|
} |
65
|
|
|
|
66
|
1 |
|
return json_encode($plan, true); |
67
|
|
|
} |
68
|
|
|
|
69
|
|
|
/** |
70
|
|
|
* @param string $url |
71
|
|
|
* |
72
|
|
|
* @return string |
73
|
|
|
* @throws \Exception |
74
|
|
|
*/ |
75
|
1 |
|
public function parseToHtml($url) |
76
|
|
|
{ |
77
|
1 |
|
$htmlContent = file_get_contents($url); |
78
|
|
|
|
79
|
1 |
|
$this->generateBadRequestException($url); |
80
|
1 |
|
$this->parser->load($htmlContent); |
81
|
|
|
|
82
|
1 |
|
return sprintf('%s%s%s', '<table>', $this->parser->find('#plans table')->innerHtml, '</table>'); |
83
|
|
|
} |
84
|
|
|
|
85
|
|
|
/** |
86
|
|
|
* @return array |
87
|
|
|
*/ |
88
|
1 |
|
private function getPlan() |
89
|
|
|
{ |
90
|
1 |
|
$nameOfPlan = strip_tags($this->parser->find('.article-content-main h1')->innerHtml); |
91
|
1 |
|
$typeOfPlan = strip_tags($this->parser->find('.article-content-main h3')->innerHtml); |
92
|
|
|
|
93
|
|
|
return [ |
94
|
1 |
|
'name' => strip_tags($nameOfPlan), |
95
|
1 |
|
'type' => strip_tags($typeOfPlan), |
96
|
1 |
|
'weeks' => [], |
97
|
1 |
|
]; |
98
|
|
|
} |
99
|
|
|
|
100
|
|
|
/** |
101
|
|
|
* @param string $url |
102
|
|
|
* |
103
|
|
|
* @throws \Exception |
104
|
|
|
*/ |
105
|
3 |
|
public function generateBadRequestException($url) |
106
|
|
|
{ |
107
|
3 |
|
if (isset($http_response_header)) { |
|
|
|
|
108
|
|
|
$responseCode = $this->headerParser->get('response_code', $http_response_header); |
109
|
|
|
if ('200' != $responseCode) { |
110
|
|
|
throw new \Exception( |
111
|
|
|
sprintf('Url %s return http response code %s', $url, $responseCode), |
112
|
|
|
$responseCode |
113
|
|
|
); |
114
|
|
|
} |
115
|
|
|
} |
116
|
3 |
|
} |
117
|
|
|
} |
|
|
|
|
118
|
|
|
|
This check looks for multiple assignments in successive lines of code. It will report an issue if the operators are not in a straight line.
To visualize
will produce issues in the first and second line, while this second example
will produce no issues.