GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

PyJobsWebConnector   A
last analyzed

Complexity

Total Complexity 11

Size/Duplication

Total Lines 76
Duplicated Lines 0 %

Importance

Changes 0
Metric Value
c 0
b 0
f 0
dl 0
loc 76
rs 10
wmc 11

4 Methods

Rating   Name   Duplication   Size   Complexity  
A log() 0 13 2
A job_exist() 0 9 1
B add_job() 0 40 6
A get_most_recent_job_date() 0 10 2
1
# -*- coding: utf-8 -*-
2
import datetime
3
import json
4
5
import transaction
6
from pyjobs_crawlers.run import Connector
7
from sqlalchemy.orm.exc import NoResultFound
8
9
from pyjobsweb import model
10
from pyjobsweb.model import DBSession, Log
11
12
__all__ = ('helpers', 'app_globals')
13
14
15
class PyJobsWebConnector(Connector):
16
    def add_job(self, job_item):
17
        """
18
19
        Add job to PyJobsWeb database
20
21
        :param job_item: Scrapy pyjobs_crawlers item object
22
        :return:
23
        """
24
        job_public_id = job_item['url']
25
26
        if self.job_exist(job_public_id):
27
            print 'Skip existing item'
28
            return
29
30
        job = model.JobAlchemy()
31
32
        # Populate attributes which do not require special treatments before
33
        # population
34
        attributes = ['title', 'description', 'company', 'address',
35
                      'company_url', 'publication_datetime',
36
                      'publication_datetime_is_fake']
37
38
        # Populate job attributes if item contain it
39
        for attribute in attributes:
40
            if attribute in job_item:
41
                setattr(job, attribute, job_item[attribute])
42
43
        job.url = job_item['url']
44
        job.source = job_item['source']
45
        job.crawl_datetime = job_item['initial_crawl_datetime']
46
47
        # Populate attributes which require special treatments before population
48
        if 'tags' in job_item:
49
            tags = [{'tag': t.tag, 'weight': t.weight}
50
                    for t in job_item['tags']]
51
            job.tags = json.dumps(tags)
52
53
        # Insert the job offer in the Postgresql database
54
        DBSession.add(job)
55
        transaction.commit()
56
57
    def job_exist(self, job_url):
58
        """
59
60
        Return count of jobs having this url
61
62
        :param job_url: External identifier of job (url)
63
        :return:
64
        """
65
        return model.JobAlchemy.job_offer_exists(job_url)
66
67
    def log(self, source, action, more=None):
68
        if more is not None:
69
            message = '%s (%s)' % (action, more)
70
        else:
71
            message = action
72
73
        log = Log()
74
        log.source = source
75
        log.message = message
76
        log.datetime = datetime.datetime.now()
77
78
        DBSession.add(log)
79
        transaction.commit()
80
81
    def get_most_recent_job_date(self, source):
82
        try:
83
            return \
84
                model.DBSession.query(model.JobAlchemy.publication_datetime)\
85
                .filter(model.JobAlchemy.source == source)\
86
                .order_by(model.JobAlchemy.publication_datetime.desc())\
87
                .limit(1)\
88
                .one()[0]  # First element is publication_datetime value
89
        except NoResultFound:
90
            return datetime.datetime(1970, 1, 1, 0, 0, 0)
91