PyJobsWebConnector - Code Metrics - pyjobs/web - Measure and Improve Code Quality continuously with Scrutinizer

PyJobsWebConnector A
last analyzed 2016-09-27 21:29 UTC

↳ Parent: Project

Complexity

Total Complexity

Size/Duplication

Total Lines	76
Duplicated Lines	0 %

Importance

Changes

Metric	Value
c	0
b	0
f	0
dl	0
loc	76
rs	10
wmc	11

4 Methods

Rating	Name	Size	Complexity
A	log()	13	2
A	job_exist()	9	1
B	add_job()	40	6
A	get_most_recent_job_date()	10	2

# -*- coding: utf-8 -*-
import datetime
import json

import transaction
from pyjobs_crawlers.run import Connector
from sqlalchemy.orm.exc import NoResultFound

from pyjobsweb import model
from pyjobsweb.model import DBSession, Log

__all__ = ('helpers', 'app_globals')


class PyJobsWebConnector(Connector):
    def add_job(self, job_item):
        """

        Add job to PyJobsWeb database

        :param job_item: Scrapy pyjobs_crawlers item object
        :return:
        """
        job_public_id = job_item['url']

        if self.job_exist(job_public_id):
            print 'Skip existing item'
            return

        job = model.JobAlchemy()

        # Populate attributes which do not require special treatments before
        # population
        attributes = ['title', 'description', 'company', 'address',
                      'company_url', 'publication_datetime',
                      'publication_datetime_is_fake']

        # Populate job attributes if item contain it
        for attribute in attributes:
            if attribute in job_item:
                setattr(job, attribute, job_item[attribute])

        job.url = job_item['url']
        job.source = job_item['source']
        job.crawl_datetime = job_item['initial_crawl_datetime']

        # Populate attributes which require special treatments before population
        if 'tags' in job_item:
            tags = [{'tag': t.tag, 'weight': t.weight}
                    for t in job_item['tags']]
            job.tags = json.dumps(tags)

        # Insert the job offer in the Postgresql database
        DBSession.add(job)
        transaction.commit()

    def job_exist(self, job_url):
        """

        Return count of jobs having this url

        :param job_url: External identifier of job (url)
        :return:
        """
        return model.JobAlchemy.job_offer_exists(job_url)

    def log(self, source, action, more=None):
        if more is not None:
            message = '%s (%s)' % (action, more)
        else:
            message = action

        log = Log()
        log.source = source
        log.message = message
        log.datetime = datetime.datetime.now()

        DBSession.add(log)
        transaction.commit()

    def get_most_recent_job_date(self, source):
        try:
            return \
                model.DBSession.query(model.JobAlchemy.publication_datetime)\
                .filter(model.JobAlchemy.source == source)\
                .order_by(model.JobAlchemy.publication_datetime.desc())\
                .limit(1)\
                .one()[0]  # First element is publication_datetime value
        except NoResultFound:
            return datetime.datetime(1970, 1, 1, 0, 0, 0)


1			# -- coding: utf-8 --
2			import datetime
3			import json
4
5			import transaction
6			from pyjobs_crawlers.run import Connector
7			from sqlalchemy.orm.exc import NoResultFound
8
9			from pyjobsweb import model
10			from pyjobsweb.model import DBSession, Log
11
12			__all__ = ('helpers', 'app_globals')
13
14
15			class PyJobsWebConnector(Connector):
16			def add_job(self, job_item):
17			"""
18
19			Add job to PyJobsWeb database
20
21			:param job_item: Scrapy pyjobs_crawlers item object
22			:return:
23			"""
24			job_public_id = job_item['url']
25
26			if self.job_exist(job_public_id):
27			print 'Skip existing item'
28			return
29
30			job = model.JobAlchemy()
31
32			# Populate attributes which do not require special treatments before
33			# population
34			attributes = ['title', 'description', 'company', 'address',
35			'company_url', 'publication_datetime',
36			'publication_datetime_is_fake']
37
38			# Populate job attributes if item contain it
39			for attribute in attributes:
40			if attribute in job_item:
41			setattr(job, attribute, job_item[attribute])
42
43			job.url = job_item['url']
44			job.source = job_item['source']
45			job.crawl_datetime = job_item['initial_crawl_datetime']
46
47			# Populate attributes which require special treatments before population
48			if 'tags' in job_item:
49			tags = [{'tag': t.tag, 'weight': t.weight}
50			for t in job_item['tags']]
51			job.tags = json.dumps(tags)
52
53			# Insert the job offer in the Postgresql database
54			DBSession.add(job)
55			transaction.commit()
56
57			def job_exist(self, job_url):
58			"""
59
60			Return count of jobs having this url
61
62			:param job_url: External identifier of job (url)
63			:return:
64			"""
65			return model.JobAlchemy.job_offer_exists(job_url)
66
67			def log(self, source, action, more=None):
68			if more is not None:
69			message = '%s (%s)' % (action, more)
70			else:
71			message = action
72
73			log = Log()
74			log.source = source
75			log.message = message
76			log.datetime = datetime.datetime.now()
77
78			DBSession.add(log)
79			transaction.commit()
80
81			def get_most_recent_job_date(self, source):
82			try:
83			return \
84			model.DBSession.query(model.JobAlchemy.publication_datetime)\
85			.filter(model.JobAlchemy.source == source)\
86			.order_by(model.JobAlchemy.publication_datetime.desc())\
87			.limit(1)\
88			.one()[0] # First element is publication_datetime value
89			except NoResultFound:
90			return datetime.datetime(1970, 1, 1, 0, 0, 0)
91

pyjobs / web

GitHub Access Token became invalid

PyJobsWebConnector A last analyzed 2016-09-27 21:29 UTC

Complexity

Size/Duplication

Importance

4 Methods

Duplication Side-by-Side

Filter issues like

PyJobsWebConnector A
last analyzed 2016-09-27 21:29 UTC