GitHub Access Token became invalid

It seems like the GitHub access token used for retrieving details about this repository from GitHub became invalid. This might prevent certain types of inspections from being run (in particular, everything related to pull requests).
Please ask an admin of your repository to re-new the access token on this website.

StatsQuestioner.extract_stats()   C
last analyzed

Complexity

Conditions 7

Size

Total Lines 23

Duplication

Lines 0
Ratio 0 %

Importance

Changes 0
Metric Value
cc 7
c 0
b 0
f 0
dl 0
loc 23
rs 5.5
1
# -*- coding: utf-8 -*-
2
import calendar
3
import datetime
4
5
from dateutil.relativedelta import relativedelta
6
from sqlalchemy import func
7
8
from pyjobsweb import model
9
10
11
class StatsQuestioner(object):
12
    """
13
        Provide database query method and query result formatting.
14
    """
15
16
    PERIOD_MONTH = 'mon'
17
    PERIOD_WEEK = 'week'
18
19
    FIELD_SOURCE = 0
20
    FIELD_COUNT = 1
21
    FIELD_DATE = 2
22
23
    FIELDS = {
24
        FIELD_SOURCE: 'source',
25
        FIELD_COUNT: 'jobs',
26
        FIELD_DATE: 'date'
27
    }
28
29
    FLAT_Y_FIELD = FIELD_SOURCE
30
    FLAT_X_FIELD = FIELD_DATE
31
32
    @staticmethod
33
    def get_month_period(period_range):
34
        """
35
        return a start date and a end date of x complete previous month
36
        :param period_range: number of months
37
        :return: date_from (datetime), date_to (datetime)
38
        """
39
40
        today = datetime.datetime.today().replace(hour=23, minute=59, second=59, microsecond=999999)
41
        current_month = today.replace(day=1)
42
        last_day_of_current_month = calendar.monthrange(today.year, today.month)[1]
43
44
        if today.day == last_day_of_current_month:
45
            date_from = current_month + relativedelta(months=-period_range - 1)
46
            date_to = today
47
        else:
48
            date_from = current_month + relativedelta(months=-period_range)
49
            date_to = current_month - datetime.timedelta(days=1)
50
51
        return date_from, date_to
52
53
    @classmethod
54
    def extract(cls, query_result, field):
55
        """
56
        Simple make a list containing asked field value from query_result
57
        :param query_result: result from StatsQuestioner query
58
        :param field: field to be extracted
59
        :return: list containing asked field value from query_result
60
        """
61
        return sorted(set([r[field] for r in query_result]))
62
63
    @classmethod
64
    def extract_stats(cls, query_result, sources):
65
        """
66
        Return a dict of stats: { source_name: { datetime: { job_count } } }
67
        :param query_result: result from StatsQuestioner query
68
        :param sources: pyjobs_crawler sources
69
        :return: dict of sources where keys value are dict of date:jobs_count
70
        """
71
        periods = cls.extract(query_result, cls.FIELD_DATE)
72
73
        stats = {}
74
        for source in sources:
75
            stats[source] = {}
76
            for period in periods:
77
                for query_line in query_result:
78
                    query_line_source, query_line_jobs_count, query_line_date = query_line
79
                    if query_line_source == source \
80
                            and query_line_date == period:
81
                        stats[source][period] = query_line_jobs_count
82
                if period not in stats[source]:
83
                    stats[source][period] = 0
84
85
        return stats
86
87
    @classmethod
88
    def flat_query_by_y(cls, query_result, sources, date_value_callback=lambda date: date.strftime('%Y-%m-%d')):
89
        """
90
        Return flatted stats: [{date: 'YYYY-mm-dd', source_1: job_count, source_2: job_count, source_...}, ...]
91
        :param query_result: result from StatsQuestioner query
92
        :param sources: pyjobs_crawler sources
93
        :param date_value_callback: callback used to format
94
        :return: list of flatted stats dicts
95
        """
96
        periods = cls.extract(query_result, cls.FIELD_DATE)
97
98
        flat = []
99
        for period in periods:
100
            flat_line = cls.extract_flat_dict_for_field(
101
                query_result=query_result,
102
                where_field=cls.FLAT_X_FIELD,
103
                where_field_value=period,
104
                flat_field_key=cls.FLAT_Y_FIELD,
105
                flat_field_value=cls.FIELD_COUNT,
106
                sources=sources
107
            )
108
            # Stringify date
109
            flat_line[cls.FIELDS[cls.FIELD_DATE]] = date_value_callback(flat_line[cls.FIELDS[cls.FIELD_DATE]])
110
            flat.append(flat_line)
111
112
        return flat
113
114
    @classmethod
115
    def extract_flat_dict_for_field(cls, query_result, where_field, where_field_value, flat_field_key, flat_field_value,
116
                                    sources):
117
        """
118
        return a flatted dict based on field correspondence:
119
            {based_field_name: based_field_value, source_1: source_1_value, source_2: source_2_value, ...}
120
        Sources not found in query_result for field correspondence are filled by 0.
121
        :param query_result: result from StatsQuestioner query
122
        :param where_field: based field
123
        :param where_field_value:  based field value
124
        :param flat_field_key: field where apply flatting (eg. source)
125
        :param flat_field_value: field to use for value of flat_field_key in dict (eg. count)
126
        :param sources: pyjobs_crawler sources
127
        :return: flatted dict
128
        """
129
        extracted_dict = {cls.FIELDS[where_field]: where_field_value}
130
131
        for result_line in query_result:
132
            if result_line[where_field] == where_field_value:
133
                extracted_dict[result_line[flat_field_key]] = result_line[flat_field_value]
134
135
        for source in sources:
136
            if source not in extracted_dict:
137
                extracted_dict[source] = 0
138
139
        return extracted_dict
140
141
    def __init__(self, session):
142
        self._session = session
143
144
    def by_complete_period(self, period, date_from, date_to):
145
        """
146
147
        :param period: period unit (eg StatsQuestioner.PERIOD_MONTH)
148
        :param date_from: start date of concerned jobs
149
        :param date_to: end date of concerned jobs
150
        :return: sqlalchemy Query
151
        :rtype: sqlalchemy.orm.Query
152
        """
153
        # TODO - B.S. - 20160204: date_trunc only compatible with postgresql
154
        date_trunc_func = \
155
            func.date_trunc(period, model.JobAlchemy.publication_datetime)
156
        return self._session.query(model.JobAlchemy.source,
157
                                   func.count(model.JobAlchemy.id),
158
                                   date_trunc_func) \
159
            .filter(model.JobAlchemy.publication_datetime >= date_from) \
160
            .filter(model.JobAlchemy.publication_datetime <= date_to) \
161
            .group_by(model.JobAlchemy.source) \
162
            .group_by(date_trunc_func) \
163
            .order_by(date_trunc_func)
164