| 1 | <?php |
||
| 2 | |||
| 3 | namespace Cion\TextToSpeech\Sources; |
||
| 4 | |||
| 5 | use Cion\TextToSpeech\Contracts\Source as SourceContract; |
||
| 6 | use DOMDocument; |
||
| 7 | use DOMNodeList; |
||
| 8 | use RecursiveIteratorIterator; |
||
| 9 | |||
| 10 | class WebsiteSource implements SourceContract |
||
| 11 | { |
||
| 12 | /** |
||
| 13 | * Handles in getting the text from source. |
||
| 14 | * |
||
| 15 | * @param string $data |
||
| 16 | * @return string |
||
| 17 | */ |
||
| 18 | public function handle(string $data): string |
||
| 19 | { |
||
| 20 | $articles = $this->getDOMDocumentArticle($data); |
||
| 21 | |||
| 22 | if ($articles === null) { |
||
| 23 | return ''; |
||
| 24 | } |
||
| 25 | |||
| 26 | return $this->getTextFromArticle($articles); |
||
| 27 | } |
||
| 28 | |||
| 29 | /** |
||
| 30 | * Get the DOM Node List of article tag. |
||
| 31 | * |
||
| 32 | * @return DOMNodeList|null |
||
| 33 | */ |
||
| 34 | protected function getDOMDocumentArticle(string $url) |
||
| 35 | { |
||
| 36 | $dom = new DOMDocument(); |
||
| 37 | @$dom->loadHTML(file_get_contents($url)); |
||
|
0 ignored issues
–
show
|
|||
| 38 | $element = $dom->getElementsByTagName('article')->item(0); |
||
| 39 | |||
| 40 | if ($element !== null) { |
||
| 41 | return $element->childNodes; |
||
| 42 | } |
||
| 43 | } |
||
| 44 | |||
| 45 | /** |
||
| 46 | * Get text from the articles DOM Node List. |
||
| 47 | * |
||
| 48 | * @param DOMNodeList $articles |
||
| 49 | * @return string |
||
| 50 | */ |
||
| 51 | protected function getTextFromArticle(DOMNodeList $articles): string |
||
| 52 | { |
||
| 53 | $text = ''; |
||
| 54 | |||
| 55 | for ($i = 0; $i < $articles->length; $i++) { |
||
| 56 | // Check element if there is a childNodes |
||
| 57 | if ($articles->item($i)->childNodes === null) { |
||
| 58 | continue; |
||
| 59 | } |
||
| 60 | |||
| 61 | $dit = new RecursiveIteratorIterator( |
||
| 62 | new RecursiveDOMIterator($articles->item($i)), |
||
| 63 | RecursiveIteratorIterator::SELF_FIRST |
||
| 64 | ); |
||
| 65 | foreach ($dit as $node) { |
||
| 66 | if ($node->nodeName === 'p') { |
||
| 67 | $text .= $node->textContent.' '; |
||
| 68 | } |
||
| 69 | } |
||
| 70 | } |
||
| 71 | |||
| 72 | return $text; |
||
| 73 | } |
||
| 74 | } |
||
| 75 |
If you suppress an error, we recommend checking for the error condition explicitly: