1 | <?php |
||
8 | class Article extends Entity |
||
9 | { |
||
10 | use StandardEntity; |
||
11 | |||
12 | /** @var Discussion */ |
||
13 | protected $discussion = null; |
||
14 | |||
15 | 45 | public function __construct(array $data) |
|
27 | |||
28 | /** |
||
29 | * Should always return "article" |
||
30 | * @return string |
||
31 | */ |
||
32 | 5 | public function getType() |
|
36 | |||
37 | /** |
||
38 | * Returns plaintext version of article (no HTML) as parsed by Diffbot. |
||
39 | * Only the content is returned, the text in the surrounding (layout etc) elements is ignored. |
||
40 | * @return string |
||
41 | */ |
||
42 | 4 | public function getText() |
|
46 | |||
47 | /** |
||
48 | * Returns full HTML of the article's content - only the content, not the surrounding layout HTML. |
||
49 | * @return string |
||
50 | */ |
||
51 | 4 | public function getHtml() |
|
55 | |||
56 | /** |
||
57 | * Returns date as per http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3 |
||
58 | * Example date: "Wed, 18 Dec 2013 00:00:00 GMT" |
||
59 | * This will be a Carbon (https://github.com/briannesbitt/Carbon) instance if Carbon is installed. |
||
60 | * @return \Carbon\Carbon | string |
||
61 | */ |
||
62 | 5 | public function getDate() |
|
75 | |||
76 | /** |
||
77 | * Returns the full name of the author, as signed on the article's page |
||
78 | * @return string | null |
||
79 | */ |
||
80 | 4 | public function getAuthor() |
|
84 | |||
85 | /** |
||
86 | * Returns the URL of the author's profile, if available. Otherwise, null. |
||
87 | * @return string | null |
||
88 | */ |
||
89 | 4 | public function getAuthorUrl() |
|
93 | |||
94 | /** |
||
95 | * The array returned will contain all tags that Diffbot's AI concluded match the content |
||
96 | * |
||
97 | * Note that these are *not* the meta tags as defined by the author, but machine learned ones. |
||
98 | * The format of the array is: |
||
99 | * |
||
100 | * [ |
||
101 | * [ |
||
102 | * "id": 133907, |
||
103 | * "count": 3, |
||
104 | * "prevalence": 0.3103448275862069, |
||
105 | * "label": "Apache HTTP Server", |
||
106 | * "type": "Http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#InformationEntity", |
||
107 | * "uri": "http://dbpedia.org/resource/Apache_HTTP_Server" |
||
108 | * ], |
||
109 | * [ |
||
110 | * "id": 208652, |
||
111 | * "count": 5, |
||
112 | * "prevalence": 0.5172413793103449, |
||
113 | * "label": "PHP", |
||
114 | * "type": "Http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#InformationEntity", |
||
115 | * "uri": "http://dbpedia.org/resource/PHP" |
||
116 | * ] |
||
117 | * ] |
||
118 | * |
||
119 | * @return array |
||
120 | */ |
||
121 | 4 | public function getTags() |
|
125 | |||
126 | /** |
||
127 | * Number of pages automatically concatenated to form the text or html response. |
||
128 | * By default, Diffbot will automatically concatenate up to 20 pages of an article. |
||
129 | * @see http://support.diffbot.com/automatic-apis/handling-multiple-page-articles/ |
||
130 | * @return int |
||
131 | */ |
||
132 | 4 | public function getNumPages() |
|
136 | |||
137 | /** |
||
138 | * Array of all page URLs concatenated in a multipage article. Max 20 entries. |
||
139 | * Empty array if article was not concatenated before being returned. |
||
140 | * @see http://support.diffbot.com/automatic-apis/handling-multiple-page-articles/ |
||
141 | * @return array |
||
142 | */ |
||
143 | 4 | public function getNextPages() |
|
147 | |||
148 | /** |
||
149 | * Returns the sentiment score of the analyzed article text, a value randing from |
||
150 | * -1.0 (very negative) to 1.0 (very positive). |
||
151 | * @return float|null |
||
152 | */ |
||
153 | 4 | public function getSentiment() |
|
157 | |||
158 | /** |
||
159 | * Returns an array of images found in the page's content. |
||
160 | * |
||
161 | * Note that this (tries) to ignore content-unrelated images like ads arounds the page, etc. |
||
162 | * The format of the array will be: |
||
163 | * |
||
164 | * [ |
||
165 | * { |
||
166 | * "height": 808, |
||
167 | * "diffbotUri": "image|3|-543943368", |
||
168 | * "naturalHeight": 808, |
||
169 | * "width": 717, |
||
170 | * "primary": true, |
||
171 | * "naturalWidth": 717, |
||
172 | * "url": "https://example.com/image1.png" |
||
173 | * }, |
||
174 | * { |
||
175 | * "height": 506, |
||
176 | * "diffbotUri": "image|3|-844014913", |
||
177 | * "naturalHeight": 506, |
||
178 | * "width": 715, |
||
179 | * "naturalWidth": 715, |
||
180 | * "url": "https://example.com/image1.jpeg" |
||
181 | * } |
||
182 | * ] |
||
183 | * |
||
184 | * @return array |
||
185 | */ |
||
186 | 4 | public function getImages() |
|
190 | |||
191 | /** |
||
192 | * Returns an array of videos found in the article's content. |
||
193 | * |
||
194 | * Works on and off - the better choice is the Video API |
||
195 | * @see https://www.diffbot.com/dev/docs/video |
||
196 | * The format of the array will be: |
||
197 | * |
||
198 | * [ |
||
199 | * { |
||
200 | * "diffbotUri": "video|3|-1138675744", |
||
201 | * "primary": true, |
||
202 | * "url": "http://player.vimeo.com/video/22439234" |
||
203 | * }, |
||
204 | * { |
||
205 | * "diffbotUri": "video|3|-1138675744", |
||
206 | * "primary": true, |
||
207 | * "url": "http://player.vimeo.com/video/22439234" |
||
208 | * } |
||
209 | * ] |
||
210 | * |
||
211 | * @return array |
||
212 | */ |
||
213 | 4 | public function getVideos() |
|
217 | |||
218 | /** |
||
219 | * Returns the Discussion entity - comments of the article |
||
220 | * @return Discussion |
||
221 | */ |
||
222 | 1 | public function getDiscussion() |
|
226 | |||
227 | /** |
||
228 | * The plain-text name of the site (e.g. The New York Times or Diffbot). |
||
229 | * |
||
230 | * If no site name is automatically determined, the root domain (diffbot.com) will be returned. |
||
231 | * |
||
232 | * @return string | null |
||
233 | */ |
||
234 | 1 | public function getSiteName() |
|
238 | |||
239 | /** |
||
240 | * If known, the country of the article publication. |
||
241 | * |
||
242 | * @return string | null |
||
243 | */ |
||
244 | 1 | public function getPublisherCountry() |
|
248 | |||
249 | /** |
||
250 | * If known, the region of the article publication. |
||
251 | * |
||
252 | * @return string | null |
||
253 | */ |
||
254 | 1 | public function getPublisherRegion() |
|
258 | |||
259 | /** |
||
260 | * If an article's date is ambiguous, Diffbot will attempt to estimate a |
||
261 | * more specific timestamp using various factors. This will not be |
||
262 | * generated for articles older than two days, or articles without an identified date. |
||
263 | * |
||
264 | * @see Article::getDate() - used when estimatedDate isn't defined |
||
265 | * |
||
266 | * This will be a Carbon (https://github.com/briannesbitt/Carbon) instance if Carbon is installed. |
||
267 | * |
||
268 | * @return \Carbon\Carbon | string |
||
269 | */ |
||
270 | 1 | public function getEstimatedDate() |
|
278 | |||
279 | /** |
||
280 | * Returns the canonical url from a page, if any. |
||
281 | * @return string |
||
282 | */ |
||
283 | public function getCanonicalUrl() |
||
287 | } |
||
288 |