@@ 1309-1335 (lines=27) @@ | ||
1306 | url = 'http://www.qwantz.com' |
|
1307 | get_url_from_archive_element = get_href |
|
1308 | comic_link_re = re.compile('^%s/index.php\\?comic=([0-9]*)$' % url) |
|
1309 | ||
1310 | @classmethod |
|
1311 | def get_archive_elements(cls): |
|
1312 | archive_url = urljoin_wrapper(cls.url, 'archive.php') |
|
1313 | # first link is random -> skip it |
|
1314 | return reversed(get_soup_at_url(archive_url).find_all('a', href=cls.comic_link_re)[1:]) |
|
1315 | ||
1316 | @classmethod |
|
1317 | def get_comic_info(cls, soup, link): |
|
1318 | """Get information about a particular comics.""" |
|
1319 | url = cls.get_url_from_archive_element(link) |
|
1320 | num = int(cls.comic_link_re.match(url).groups()[0]) |
|
1321 | date_str = link.string |
|
1322 | text = link.next_sibling.string |
|
1323 | day = string_to_date(remove_st_nd_rd_th_from_date(date_str), "%B %d, %Y") |
|
1324 | comic_img_re = re.compile('^%s/comics/' % cls.url) |
|
1325 | img = soup.find('img', src=comic_img_re) |
|
1326 | return { |
|
1327 | 'month': day.month, |
|
1328 | 'year': day.year, |
|
1329 | 'day': day.day, |
|
1330 | 'img': [img.get('src')], |
|
1331 | 'title': img.get('title'), |
|
1332 | 'text': text, |
|
1333 | 'num': num, |
|
1334 | } |
|
1335 | ||
1336 | ||
1337 | class ButterSafe(GenericListableComic): |
|
1338 | """Class to retrieve Butter Safe comics.""" |
|
@@ 1374-1395 (lines=22) @@ | ||
1371 | long_name = 'Calvin and Hobbes' |
|
1372 | # This is not through any official webpage but eh... |
|
1373 | url = 'http://marcel-oehler.marcellosendos.ch/comics/ch/' |
|
1374 | ||
1375 | @classmethod |
|
1376 | def get_next_comic(cls, last_comic): |
|
1377 | last_date = get_date_for_comic( |
|
1378 | last_comic) if last_comic else date(1985, 11, 1) |
|
1379 | link_re = re.compile('^([0-9]*)/([0-9]*)/') |
|
1380 | img_re = re.compile('') |
|
1381 | for link in get_soup_at_url(cls.url).find_all('a', href=link_re): |
|
1382 | url = link['href'] |
|
1383 | year, month = link_re.match(url).groups() |
|
1384 | if date(int(year), int(month), 1) + timedelta(days=31) >= last_date: |
|
1385 | img_re = re.compile('^%s%s([0-9]*)' % (year, month)) |
|
1386 | month_url = urljoin_wrapper(cls.url, url) |
|
1387 | for img in get_soup_at_url(month_url).find_all('img', src=img_re): |
|
1388 | img_src = img['src'] |
|
1389 | day = int(img_re.match(img_src).groups()[0]) |
|
1390 | comic_date = date(int(year), int(month), day) |
|
1391 | if comic_date > last_date: |
|
1392 | yield { |
|
1393 | 'url': month_url, |
|
1394 | 'year': int(year), |
|
1395 | 'month': int(month), |
|
1396 | 'day': int(day), |
|
1397 | 'img': ['%s%s/%s/%s' % (cls.url, year, month, img_src)], |
|
1398 | } |