1 | <?php |
||
2 | |||
3 | namespace Cion\TextToSpeech\Sources; |
||
4 | |||
5 | use Cion\TextToSpeech\Contracts\Source as SourceContract; |
||
6 | use DOMDocument; |
||
7 | use DOMNodeList; |
||
8 | use RecursiveIteratorIterator; |
||
9 | |||
10 | class WebsiteSource implements SourceContract |
||
11 | { |
||
12 | /** |
||
13 | * Handles in getting the text from source. |
||
14 | * |
||
15 | * @param string $data |
||
16 | * @return string |
||
17 | */ |
||
18 | public function handle(string $data): string |
||
19 | { |
||
20 | $articles = $this->getDOMDocumentArticle($data); |
||
21 | |||
22 | if ($articles === null) { |
||
23 | return ''; |
||
24 | } |
||
25 | |||
26 | return $this->getTextFromArticle($articles); |
||
27 | } |
||
28 | |||
29 | /** |
||
30 | * Get the DOM Node List of article tag. |
||
31 | * |
||
32 | * @return DOMNodeList|null |
||
33 | */ |
||
34 | protected function getDOMDocumentArticle(string $url) |
||
35 | { |
||
36 | $dom = new DOMDocument(); |
||
37 | @$dom->loadHTML(file_get_contents($url)); |
||
0 ignored issues
–
show
|
|||
38 | $element = $dom->getElementsByTagName('article')->item(0); |
||
39 | |||
40 | if ($element !== null) { |
||
41 | return $element->childNodes; |
||
42 | } |
||
43 | } |
||
44 | |||
45 | /** |
||
46 | * Get text from the articles DOM Node List. |
||
47 | * |
||
48 | * @param DOMNodeList $articles |
||
49 | * @return string |
||
50 | */ |
||
51 | protected function getTextFromArticle(DOMNodeList $articles): string |
||
52 | { |
||
53 | $text = ''; |
||
54 | |||
55 | for ($i = 0; $i < $articles->length; $i++) { |
||
56 | // Check element if there is a childNodes |
||
57 | if ($articles->item($i)->childNodes === null) { |
||
58 | continue; |
||
59 | } |
||
60 | |||
61 | $dit = new RecursiveIteratorIterator( |
||
62 | new RecursiveDOMIterator($articles->item($i)), |
||
63 | RecursiveIteratorIterator::SELF_FIRST |
||
64 | ); |
||
65 | foreach ($dit as $node) { |
||
66 | if ($node->nodeName === 'p') { |
||
67 | $text .= $node->textContent.' '; |
||
68 | } |
||
69 | } |
||
70 | } |
||
71 | |||
72 | return $text; |
||
73 | } |
||
74 | } |
||
75 |
If you suppress an error, we recommend checking for the error condition explicitly: