1 | <?php |
||
7 | class Post extends Entity |
||
8 | { |
||
9 | |||
10 | 67 | public function __construct(array $data) |
|
19 | |||
20 | /** |
||
21 | * Should always return "post" |
||
22 | * @return string |
||
23 | */ |
||
24 | 1 | public function getType() |
|
28 | |||
29 | /** |
||
30 | * Alias for getLang() |
||
31 | * @see getLang() |
||
32 | * @return string |
||
33 | */ |
||
34 | 1 | public function getHumanLanguage() |
|
38 | |||
39 | /** |
||
40 | * Returns the human language of the page as determined by Diffbot when looking at content. |
||
41 | * The code returned is a two-character ISO 639-1 code: http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes |
||
42 | * @return string |
||
43 | */ |
||
44 | 1 | public function getLang() |
|
48 | |||
49 | /** |
||
50 | * Returns plaintext version of article (no HTML) as parsed by Diffbot. |
||
51 | * Only the content is returned, the text in the surrounding (layout etc) elements is ignored. |
||
52 | * @return string |
||
53 | */ |
||
54 | 1 | public function getText() |
|
58 | |||
59 | /** |
||
60 | * Returns full HTML of the article's content - only the content, not the surrounding layout HTML. |
||
61 | * @return string |
||
62 | */ |
||
63 | 1 | public function getHtml() |
|
67 | |||
68 | /** |
||
69 | * Returns date as per http://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3 |
||
70 | * Example date: "Wed, 18 Dec 2013 00:00:00 GMT" |
||
71 | * This will be a Carbon (https://github.com/briannesbitt/Carbon) instance if Carbon is installed. |
||
72 | * @return \Carbon\Carbon | string |
||
73 | */ |
||
74 | 1 | public function getDate() |
|
81 | |||
82 | /** |
||
83 | * Returns the full name of the author, as signed on the article's page |
||
84 | * @return string | null |
||
85 | */ |
||
86 | 1 | public function getAuthor() |
|
90 | |||
91 | /** |
||
92 | * Returns the url of the author - their profile URL if available |
||
93 | * @return string | null |
||
94 | */ |
||
95 | 1 | public function getAuthorUrl() |
|
99 | |||
100 | /** |
||
101 | * The array returned will contain all tags that Diffbot's AI concluded match the content |
||
102 | * |
||
103 | * Note that these are *not* the meta tags as defined by the author, but machine learned ones. |
||
104 | * The format of the array is: |
||
105 | * |
||
106 | * [ |
||
107 | * [ |
||
108 | * "id": 133907, |
||
109 | * "count": 3, |
||
110 | * "prevalence": 0.3103448275862069, |
||
111 | * "label": "Apache HTTP Server", |
||
112 | * "type": "Http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#InformationEntity", |
||
113 | * "uri": "http://dbpedia.org/resource/Apache_HTTP_Server" |
||
114 | * ], |
||
115 | * [ |
||
116 | * "id": 208652, |
||
117 | * "count": 5, |
||
118 | * "prevalence": 0.5172413793103449, |
||
119 | * "label": "PHP", |
||
120 | * "type": "Http://www.ontologydesignpatterns.org/ont/dul/DUL.owl#InformationEntity", |
||
121 | * "uri": "http://dbpedia.org/resource/PHP" |
||
122 | * ] |
||
123 | * ] |
||
124 | * |
||
125 | * @return array |
||
126 | */ |
||
127 | 1 | public function getTags() |
|
131 | |||
132 | /** |
||
133 | * Returns the sentiment score of the analyzed article text, a value randing from |
||
134 | * -1.0 (very negative) to 1.0 (very positive). |
||
135 | * @return float|null |
||
136 | */ |
||
137 | 1 | public function getSentiment() |
|
141 | |||
142 | /** |
||
143 | * Returns the number of upvotes on a post or 0 if none or unavailable |
||
144 | * @return int |
||
145 | */ |
||
146 | 1 | public function getVotes() |
|
150 | |||
151 | /** |
||
152 | * Returns the ID of the post (usually the ordinary number of the post in |
||
153 | * the list of all posts, starting with 0 for the first one |
||
154 | * @return int |
||
155 | */ |
||
156 | 1 | public function getId() |
|
160 | |||
161 | /** |
||
162 | * If the post is a reply, this is the ID of the post it replies to |
||
163 | * @return int |
||
164 | */ |
||
165 | 1 | public function getParentId() |
|
169 | |||
170 | /** |
||
171 | * Returns an array of images found in the page's content. |
||
172 | * |
||
173 | * Note that this (tries) to ignore content-unrelated images like ads arounds the page, etc. |
||
174 | * The format of the array will be: |
||
175 | * |
||
176 | * [ |
||
177 | * { |
||
178 | * "height": 808, |
||
179 | * "diffbotUri": "image|3|-543943368", |
||
180 | * "naturalHeight": 808, |
||
181 | * "width": 717, |
||
182 | * "primary": true, |
||
183 | * "naturalWidth": 717, |
||
184 | * "url": "https://example.com/image1.png" |
||
185 | * }, |
||
186 | * { |
||
187 | * "height": 506, |
||
188 | * "diffbotUri": "image|3|-844014913", |
||
189 | * "naturalHeight": 506, |
||
190 | * "width": 715, |
||
191 | * "naturalWidth": 715, |
||
192 | * "url": "https://example.com/image1.jpeg" |
||
193 | * } |
||
194 | * ] |
||
195 | * |
||
196 | * @return array |
||
197 | */ |
||
198 | 1 | public function getImages() |
|
202 | |||
203 | /** |
||
204 | * Returns the URL which was crawled |
||
205 | * @return string |
||
206 | */ |
||
207 | 1 | public function getPageUrl() |
|
211 | |||
212 | /** |
||
213 | * An internal identifier for Diffbot, used for indexing in their databases |
||
214 | * @return string |
||
215 | */ |
||
216 | 1 | public function getDiffbotUri() |
|
220 | } |