StatsQuestioner.extract_stats() - Code Metrics - pyjobs/web - Measure and Improve Code Quality continuously with Scrutinizer

StatsQuestioner.extract_stats() C
last analyzed 2016-09-27 21:29 UTC

↳ Parent: StatsQuestioner

Complexity

Conditions

Size

Total Lines

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
cc	7
c	0
b	0
f	0
dl	0
loc	23
rs	5.5

# -*- coding: utf-8 -*-
import calendar
import datetime

from dateutil.relativedelta import relativedelta
from sqlalchemy import func

from pyjobsweb import model


class StatsQuestioner(object):
    """
        Provide database query method and query result formatting.
    """

    PERIOD_MONTH = 'mon'
    PERIOD_WEEK = 'week'

    FIELD_SOURCE = 0
    FIELD_COUNT = 1
    FIELD_DATE = 2

    FIELDS = {
        FIELD_SOURCE: 'source',
        FIELD_COUNT: 'jobs',
        FIELD_DATE: 'date'
    }

    FLAT_Y_FIELD = FIELD_SOURCE
    FLAT_X_FIELD = FIELD_DATE

    @staticmethod
    def get_month_period(period_range):
        """
        return a start date and a end date of x complete previous month
        :param period_range: number of months
        :return: date_from (datetime), date_to (datetime)
        """

        today = datetime.datetime.today().replace(hour=23, minute=59, second=59, microsecond=999999)
        current_month = today.replace(day=1)
        last_day_of_current_month = calendar.monthrange(today.year, today.month)[1]

        if today.day == last_day_of_current_month:
            date_from = current_month + relativedelta(months=-period_range - 1)
            date_to = today
        else:
            date_from = current_month + relativedelta(months=-period_range)
            date_to = current_month - datetime.timedelta(days=1)

        return date_from, date_to

    @classmethod
    def extract(cls, query_result, field):
        """
        Simple make a list containing asked field value from query_result
        :param query_result: result from StatsQuestioner query
        :param field: field to be extracted
        :return: list containing asked field value from query_result
        """
        return sorted(set([r[field] for r in query_result]))

    @classmethod
    def extract_stats(cls, query_result, sources):
        """
        Return a dict of stats: { source_name: { datetime: { job_count } } }
        :param query_result: result from StatsQuestioner query
        :param sources: pyjobs_crawler sources
        :return: dict of sources where keys value are dict of date:jobs_count
        """
        periods = cls.extract(query_result, cls.FIELD_DATE)

        stats = {}
        for source in sources:
            stats[source] = {}
            for period in periods:
                for query_line in query_result:
                    query_line_source, query_line_jobs_count, query_line_date = query_line
                    if query_line_source == source \
                            and query_line_date == period:
                        stats[source][period] = query_line_jobs_count
                if period not in stats[source]:
                    stats[source][period] = 0

        return stats

    @classmethod
    def flat_query_by_y(cls, query_result, sources, date_value_callback=lambda date: date.strftime('%Y-%m-%d')):
        """
        Return flatted stats: [{date: 'YYYY-mm-dd', source_1: job_count, source_2: job_count, source_...}, ...]
        :param query_result: result from StatsQuestioner query
        :param sources: pyjobs_crawler sources
        :param date_value_callback: callback used to format
        :return: list of flatted stats dicts
        """
        periods = cls.extract(query_result, cls.FIELD_DATE)

        flat = []
        for period in periods:
            flat_line = cls.extract_flat_dict_for_field(
                query_result=query_result,
                where_field=cls.FLAT_X_FIELD,
                where_field_value=period,
                flat_field_key=cls.FLAT_Y_FIELD,
                flat_field_value=cls.FIELD_COUNT,
                sources=sources
            )
            # Stringify date
            flat_line[cls.FIELDS[cls.FIELD_DATE]] = date_value_callback(flat_line[cls.FIELDS[cls.FIELD_DATE]])
            flat.append(flat_line)

        return flat

    @classmethod
    def extract_flat_dict_for_field(cls, query_result, where_field, where_field_value, flat_field_key, flat_field_value,
                                    sources):
        """
        return a flatted dict based on field correspondence:
            {based_field_name: based_field_value, source_1: source_1_value, source_2: source_2_value, ...}
        Sources not found in query_result for field correspondence are filled by 0.
        :param query_result: result from StatsQuestioner query
        :param where_field: based field
        :param where_field_value:  based field value
        :param flat_field_key: field where apply flatting (eg. source)
        :param flat_field_value: field to use for value of flat_field_key in dict (eg. count)
        :param sources: pyjobs_crawler sources
        :return: flatted dict
        """
        extracted_dict = {cls.FIELDS[where_field]: where_field_value}

        for result_line in query_result:
            if result_line[where_field] == where_field_value:
                extracted_dict[result_line[flat_field_key]] = result_line[flat_field_value]

        for source in sources:
            if source not in extracted_dict:
                extracted_dict[source] = 0

        return extracted_dict

    def __init__(self, session):
        self._session = session

    def by_complete_period(self, period, date_from, date_to):
        """

        :param period: period unit (eg StatsQuestioner.PERIOD_MONTH)
        :param date_from: start date of concerned jobs
        :param date_to: end date of concerned jobs
        :return: sqlalchemy Query
        :rtype: sqlalchemy.orm.Query
        """
        # TODO - B.S. - 20160204: date_trunc only compatible with postgresql
        date_trunc_func = \
            func.date_trunc(period, model.JobAlchemy.publication_datetime)
        return self._session.query(model.JobAlchemy.source,
                                   func.count(model.JobAlchemy.id),
                                   date_trunc_func) \
            .filter(model.JobAlchemy.publication_datetime >= date_from) \
            .filter(model.JobAlchemy.publication_datetime <= date_to) \
            .group_by(model.JobAlchemy.source) \
            .group_by(date_trunc_func) \
            .order_by(date_trunc_func)


1			# -- coding: utf-8 --
2			import calendar
3			import datetime
4
5			from dateutil.relativedelta import relativedelta
6			from sqlalchemy import func
7
8			from pyjobsweb import model
9
10
11			class StatsQuestioner(object):
12			"""
13			Provide database query method and query result formatting.
14			"""
15
16			PERIOD_MONTH = 'mon'
17			PERIOD_WEEK = 'week'
18
19			FIELD_SOURCE = 0
20			FIELD_COUNT = 1
21			FIELD_DATE = 2
22
23			FIELDS = {
24			FIELD_SOURCE: 'source',
25			FIELD_COUNT: 'jobs',
26			FIELD_DATE: 'date'
27			}
28
29			FLAT_Y_FIELD = FIELD_SOURCE
30			FLAT_X_FIELD = FIELD_DATE
31
32			@staticmethod
33			def get_month_period(period_range):
34			"""
35			return a start date and a end date of x complete previous month
36			:param period_range: number of months
37			:return: date_from (datetime), date_to (datetime)
38			"""
39
40			today = datetime.datetime.today().replace(hour=23, minute=59, second=59, microsecond=999999)
41			current_month = today.replace(day=1)
42			last_day_of_current_month = calendar.monthrange(today.year, today.month)[1]
43
44			if today.day == last_day_of_current_month:
45			date_from = current_month + relativedelta(months=-period_range - 1)
46			date_to = today
47			else:
48			date_from = current_month + relativedelta(months=-period_range)
49			date_to = current_month - datetime.timedelta(days=1)
50
51			return date_from, date_to
52
53			@classmethod
54			def extract(cls, query_result, field):
55			"""
56			Simple make a list containing asked field value from query_result
57			:param query_result: result from StatsQuestioner query
58			:param field: field to be extracted
59			:return: list containing asked field value from query_result
60			"""
61			return sorted(set([r[field] for r in query_result]))
62
63			@classmethod
64			def extract_stats(cls, query_result, sources):
65			"""
66			Return a dict of stats: { source_name: { datetime: { job_count } } }
67			:param query_result: result from StatsQuestioner query
68			:param sources: pyjobs_crawler sources
69			:return: dict of sources where keys value are dict of date:jobs_count
70			"""
71			periods = cls.extract(query_result, cls.FIELD_DATE)
72
73			stats = {}
74			for source in sources:
75			stats[source] = {}
76			for period in periods:
77			for query_line in query_result:
78			query_line_source, query_line_jobs_count, query_line_date = query_line
79			if query_line_source == source \
80			and query_line_date == period:
81			stats[source][period] = query_line_jobs_count
82			if period not in stats[source]:
83			stats[source][period] = 0
84
85			return stats
86
87			@classmethod
88			def flat_query_by_y(cls, query_result, sources, date_value_callback=lambda date: date.strftime('%Y-%m-%d')):
89			"""
90			Return flatted stats: [{date: 'YYYY-mm-dd', source_1: job_count, source_2: job_count, source_...}, ...]
91			:param query_result: result from StatsQuestioner query
92			:param sources: pyjobs_crawler sources
93			:param date_value_callback: callback used to format
94			:return: list of flatted stats dicts
95			"""
96			periods = cls.extract(query_result, cls.FIELD_DATE)
97
98			flat = []
99			for period in periods:
100			flat_line = cls.extract_flat_dict_for_field(
101			query_result=query_result,
102			where_field=cls.FLAT_X_FIELD,
103			where_field_value=period,
104			flat_field_key=cls.FLAT_Y_FIELD,
105			flat_field_value=cls.FIELD_COUNT,
106			sources=sources
107			)
108			# Stringify date
109			flat_line[cls.FIELDS[cls.FIELD_DATE]] = date_value_callback(flat_line[cls.FIELDS[cls.FIELD_DATE]])
110			flat.append(flat_line)
111
112			return flat
113
114			@classmethod
115			def extract_flat_dict_for_field(cls, query_result, where_field, where_field_value, flat_field_key, flat_field_value,
116			sources):
117			"""
118			return a flatted dict based on field correspondence:
119			{based_field_name: based_field_value, source_1: source_1_value, source_2: source_2_value, ...}
120			Sources not found in query_result for field correspondence are filled by 0.
121			:param query_result: result from StatsQuestioner query
122			:param where_field: based field
123			:param where_field_value: based field value
124			:param flat_field_key: field where apply flatting (eg. source)
125			:param flat_field_value: field to use for value of flat_field_key in dict (eg. count)
126			:param sources: pyjobs_crawler sources
127			:return: flatted dict
128			"""
129			extracted_dict = {cls.FIELDS[where_field]: where_field_value}
130
131			for result_line in query_result:
132			if result_line[where_field] == where_field_value:
133			extracted_dict[result_line[flat_field_key]] = result_line[flat_field_value]
134
135			for source in sources:
136			if source not in extracted_dict:
137			extracted_dict[source] = 0
138
139			return extracted_dict
140
141			def __init__(self, session):
142			self._session = session
143
144			def by_complete_period(self, period, date_from, date_to):
145			"""
146
147			:param period: period unit (eg StatsQuestioner.PERIOD_MONTH)
148			:param date_from: start date of concerned jobs
149			:param date_to: end date of concerned jobs
150			:return: sqlalchemy Query
151			:rtype: sqlalchemy.orm.Query
152			"""
153			# TODO - B.S. - 20160204: date_trunc only compatible with postgresql
154			date_trunc_func = \
155			func.date_trunc(period, model.JobAlchemy.publication_datetime)
156			return self._session.query(model.JobAlchemy.source,
157			func.count(model.JobAlchemy.id),
158			date_trunc_func) \
159			.filter(model.JobAlchemy.publication_datetime >= date_from) \
160			.filter(model.JobAlchemy.publication_datetime <= date_to) \
161			.group_by(model.JobAlchemy.source) \
162			.group_by(date_trunc_func) \
163			.order_by(date_trunc_func)
164

pyjobs / web

GitHub Access Token became invalid

StatsQuestioner.extract_stats() C last analyzed 2016-09-27 21:29 UTC

Complexity

Size

Duplication

Importance

Duplication Side-by-Side

Filter issues like

StatsQuestioner.extract_stats() C
last analyzed 2016-09-27 21:29 UTC