biosample.tasks.retrieval.FetchStatusHelper.check_submission_status() - Code Metrics - Inspection of ":zap: Batch submission to Biosamples" - cnr-ibba/IMAGE-InjectTool - Measure and Improve Code Quality continuously with Scrutinizer

Completed

Pull Request — master (#44)

by Paolo

created 2019-07-24 15:33 UTC

FetchStatusHelper.check_submission_status() B

↳ Parent: biosample.tasks.retrieval

Complexity

Conditions

Size

Total Lines	58
Code Lines	31

Duplication

Lines	0
Ratio	0 %

Importance

Changes

Metric	Value
eloc	31
dl	0
loc	58
rs	7.2693
c	0
b	0
f	0
cc	8
nop	1

How to fix Long Method

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Jul 16 11:25:03 2019

@author: Paolo Cozzi <[email protected]>
"""

import os
import json

from decouple import AutoConfig
from celery.utils.log import get_task_logger

import pyUSIrest.client

from django.conf import settings
from django.utils import timezone

from image.celery import app as celery_app, MyTask
from image_app.helpers import parse_image_alias, get_model_object
from image_app.models import Submission
from common.tasks import redis_lock
from common.constants import (
    ERROR, NEED_REVISION, SUBMITTED, COMPLETED)
from submissions.helpers import send_message

from ..helpers import get_manager_auth
from ..models import Submission as USISubmission

# Get an instance of a logger
logger = get_task_logger(__name__)

# define a decouple config object
settings_dir = os.path.join(settings.BASE_DIR, 'image')
config = AutoConfig(search_path=settings_dir)

# a threshold of days to determine a very long task
MAX_DAYS = 5


# HINT: how this class could be similar to SubmissionHelper?
class FetchStatusHelper():
    """Helper class to deal with submission data"""

    # define my class attributes
    def __init__(self, usi_submission):
        """
        Helper function to have info for a biosample.models.Submission

        Args:
            usi_submission (biosample.models.Submission): a biosample
                model Submission instance
        """

        # ok those are my default class attributes
        self.usi_submission = usi_submission
        self.uid_submission = usi_submission.uid_submission

        # here are pyUSIrest object
        self.auth = get_manager_auth()
        self.root = pyUSIrest.client.Root(self.auth)

        # here I will track the biosample submission
        self.submission_name = self.usi_submission.usi_submission_name
        self.submission = self.root.get_submission_by_name(
            submission_name=self.submission_name)

    def check_submission_status(self):
        logger.debug("Checking status for '%s'" % (
            self.submission_name))

        # Update submission status if completed
        if self.submission.status == 'Completed':
            # fetch biosample ids with a proper function
            self.complete()

        elif self.submission.status == 'Draft':
            # check for a long task
            if self.submission_has_issues():
                # return to the caller. I've just marked the submission with
                # errors and sent a mail to the user
                return

            # check validation. If it is ok, finalize submission
            status = self.submission.get_status()

            # this mean validation statuses, I want to see completed in all
            # samples
            if len(status) == 1 and 'Complete' in status:
                # check for errors and eventually finalize
                self.finalize()

            else:
                logger.warning(
                    "Biosample validation is not completed yet (%s)" %
                    (status))

        elif self.submission.status == 'Submitted':
            # check for a long task
            if self.submission_has_issues():
                # return to the caller. I've just marked the submission with
                # errors and sent a mail to the user
                return

            logger.info(
                "Submission '%s' is '%s'. Waiting for biosample ids" % (
                    self.submission_name,
                    self.submission.status))

            # debug submission status
            document = self.submission.follow_url(
                "processingStatusSummary", self.auth)

            logger.debug(
                "Current status for submission '%s' is '%s'" % (
                    self.submission_name, document.data))

        else:
            # HINT: thrown an exception?
            logger.warning("Unknown status '%s' for submission '%s'" % (
                self.submission.status,
                self.submission_name))

        logger.debug("Checking status for '%s' completed" % (
            self.submission_name))

    def submission_has_issues(self):
        """
        Check that biosample submission has not issues. For example, that
        it will remain in the same status for a long time

        Returns:
            bool: True if an issue is detected
        """

        logger.debug(
            "Check if submission '%s' remained in the same status "
            "for a long time" % (
                self.submission_name))

        if (timezone.now() - self.usi_submission.updated_at).days > MAX_DAYS:
            message = (
                "Biosample submission '%s' remained with the same status "
                "for more than %s days. Please report it to InjectTool "
                "team" % (self.submission_name, MAX_DAYS))

            self.usi_submission.status = ERROR
            self.usi_submission.message = message
            self.usi_submission.save()

            logger.error(
                "Errors for submission: %s" % (
                    self.submission_name))
            logger.error(message)

            return True

        else:
            return False

    def __sample_has_errors(self, sample, table, pk):
        """
        Helper metod to mark a (animal/sample) with its own errors. Table
        sould be Animal or Sample to update the approriate object. Sample
        is a USI sample object

        Args:
            sample (pyUSIrest.client.sample): a USI sample object
            table (str): ``Animal`` or ``Sample``, mean the table where this
                object should be searched
            pk (int): table primary key
        """

        # get sample/animal object relying on table name and pk
        sample_obj = get_model_object(table, pk)

        sample_obj.name.status = NEED_REVISION
        sample_obj.name.save()

        # get a USI validation result
        validation_result = sample.get_validation_result()

        # TODO: should I store validation_result error in validation tables?
        errorMessages = validation_result.errorMessages

        # return an error for each object
        return {str(sample_obj): errorMessages}

    def finalize(self):
        """Finalize a submission by closing document and send it to
        biosample"""

        logger.debug("Finalizing submission '%s'" % (
            self.submission_name))

        # get errors for a submission
        errors = self.submission.has_errors()

        # collect all error messages in a list
        messages = []

        if True in errors:
            # get sample with errors then update database
            samples = self.submission.get_samples(has_errors=True)

            for sample in samples:
                # derive pk and table from alias
                table, pk = parse_image_alias(sample.alias)

                # need to check if this sample/animals has errors or not
                if sample.has_errors():
                    logger.warning(
                        "%s in table %s has errors!!!" % (sample, table))

                    # mark this sample since has problems
                    errorMessages = self.__sample_has_errors(
                        sample, table, pk)

                    # append this into error messages list
                    messages.append(errorMessages)

                # if a sample has no errors, status will be the same

            logger.error(
                "Errors for submission: '%s'" % (self.submission_name))
            logger.error("Fix them, then finalize")

            # report error
            message = "Some items needs revision:\n\n" + \
                json.dumps(messages, indent=2)

            # Update status for biosample.models.Submission
            self.usi_submission.status = NEED_REVISION
            self.usi_submission.message = message
            self.usi_submission.save()

        else:
            # raising an exception while finalizing will result
            # in a failed task.
            # TODO: model and test exception in finalization
            self.submission.finalize()

    def complete(self):
        """Complete a submission and fetch name objects"""

        logger.debug("Completing submission '%s'" % (
            self.submission_name))

        for sample in self.submission.get_samples():
            # derive pk and table from alias
            table, pk = parse_image_alias(sample.alias)

            # if no accession, return without doing anything
            if sample.accession is None:
                logger.error("No accession found for sample '%s'" % (sample))
                logger.error("Ignoring submission '%s'" % (self.submission))
                return

            # get sample/animal object relying on table name and pk
            sample_obj = get_model_object(table, pk)

            # update statuses
            sample_obj.name.status = COMPLETED
            sample_obj.name.biosample_id = sample.accession
            sample_obj.name.save()

        # update submission
        self.usi_submission.status = COMPLETED
        self.usi_submission.message = "Successful submission into biosample"
        self.usi_submission.save()

        logger.info(
            "Submission %s is now completed and recorded into UID" % (
                self.submission))


class FetchStatusTask(MyTask):
    name = "Fetch USI status"
    description = """Fetch biosample using USI API"""
    lock_id = "FetchStatusTask"

    def run(self):
        """
        This function is called when delay is called. It will acquire a lock
        in redis, so those tasks are mutually exclusive

        Returns:
            str: success if everything is ok. Different messages if task is
            already running or exception is caught"""

        # debugging instance
        self.debug_task()

        # forcing blocking condition: Wait until a get a lock object
        with redis_lock(self.lock_id, blocking=False) as acquired:
            if acquired:
                # do stuff and return something
                return self.fetch_status()

        message = "%s already running!" % (self.name)

        logger.warning(message)

        return message

    def fetch_status(self):
        """
        Fetch status from pending submissions. Called from
        :py:meth:`run`, handles exceptions from USI, select
        all :py:class:`Submission <image_app.models.Submission>` objects
        with :py:const:`SUBMITTED <common.constants.SUBMITTED>` status
        from :ref:`UID <The Unified Internal Database>` and call
        :py:meth:`fetch_queryset` with this data
        """

        logger.info("fetch_status started")

        # search for submission with SUBMITTED status. Other submission are
        # not yet finalized. This function need to be called by exclusives
        # tasks
        qs = Submission.objects.filter(status=SUBMITTED)

        # check for queryset length
        if qs.count() != 0:
            try:
                # fetch biosample status
                self.fetch_queryset(qs)

            # retry a task under errors
            # http://docs.celeryproject.org/en/latest/userguide/tasks.html#retrying
            except ConnectionError as exc:
                raise self.retry(exc=exc)

        else:
            logger.debug("No pending submission in UID database")

        # debug
        logger.info("fetch_status completed")

        return "success"

    # a function to retrieve biosample submission
    def fetch_queryset(self, queryset):
        """Fetch biosample against a queryset (a list of
        :py:const:`SUBMITTED <common.constants.SUBMITTED>`
        :py:class:`Submission <image_app.models.Submission>` objects). Iterate
        through submission to get USI info. Calls
        :py:meth:`fetch_submission_obj`
        """

        logger.info("Searching for submissions into biosample")

        for uid_submission in queryset:
            usi_submissions = USISubmission.objects.filter(
                uid_submission=uid_submission)

            # HINT: fetch statuses using tasks?
            for usi_submission in usi_submissions:
                status_helper = FetchStatusHelper(usi_submission)
                status_helper.check_submission_status()

            # TODO: set the final status for a submission like
            # submission complete task

        logger.info("fetch_queryset completed")


# register explicitly tasks
# https://github.com/celery/celery/issues/3744#issuecomment-271366923
celery_app.tasks.register(FetchStatusTask)


1			#!/usr/bin/env python3
2			# -- coding: utf-8 --
3			"""
4			Created on Tue Jul 16 11:25:03 2019
5
6			@author: Paolo Cozzi <[email protected]>
7			"""
8
9			import os
10			import json
11
12			from decouple import AutoConfig
13			from celery.utils.log import get_task_logger
14
15			import pyUSIrest.client
16
17			from django.conf import settings
18			from django.utils import timezone
19
20			from image.celery import app as celery_app, MyTask
21			from image_app.helpers import parse_image_alias, get_model_object
22			from image_app.models import Submission
23			from common.tasks import redis_lock
24			from common.constants import (
25			ERROR, NEED_REVISION, SUBMITTED, COMPLETED)
26			from submissions.helpers import send_message
27
28			from ..helpers import get_manager_auth
29			from ..models import Submission as USISubmission
30
31			# Get an instance of a logger
32			logger = get_task_logger(__name__)
33
34			# define a decouple config object
35			settings_dir = os.path.join(settings.BASE_DIR, 'image')
36			config = AutoConfig(search_path=settings_dir)
37
38			# a threshold of days to determine a very long task
39			MAX_DAYS = 5
40
41
42			# HINT: how this class could be similar to SubmissionHelper?
43			class FetchStatusHelper():
44			"""Helper class to deal with submission data"""
45
46			# define my class attributes
47			def __init__(self, usi_submission):
48			"""
49			Helper function to have info for a biosample.models.Submission
50
51			Args:
52			usi_submission (biosample.models.Submission): a biosample
53			model Submission instance
54			"""
55
56			# ok those are my default class attributes
57			self.usi_submission = usi_submission
58			self.uid_submission = usi_submission.uid_submission
59
60			# here are pyUSIrest object
61			self.auth = get_manager_auth()
62			self.root = pyUSIrest.client.Root(self.auth)
63
64			# here I will track the biosample submission
65			self.submission_name = self.usi_submission.usi_submission_name
66			self.submission = self.root.get_submission_by_name(
67			submission_name=self.submission_name)
68
69			def check_submission_status(self):
70			logger.debug("Checking status for '%s'" % (
71			self.submission_name))
72
73			# Update submission status if completed
74			if self.submission.status == 'Completed':
75			# fetch biosample ids with a proper function
76			self.complete()
77
78			elif self.submission.status == 'Draft':
79			# check for a long task
80			if self.submission_has_issues():
81			# return to the caller. I've just marked the submission with
82			# errors and sent a mail to the user
83			return
84
85			# check validation. If it is ok, finalize submission
86			status = self.submission.get_status()
87
88			# this mean validation statuses, I want to see completed in all
89			# samples
90			if len(status) == 1 and 'Complete' in status:
91			# check for errors and eventually finalize
92			self.finalize()
93
94			else:
95			logger.warning(
96			"Biosample validation is not completed yet (%s)" %
97			(status))
98
99			elif self.submission.status == 'Submitted':
100			# check for a long task
101			if self.submission_has_issues():
102			# return to the caller. I've just marked the submission with
103			# errors and sent a mail to the user
104			return
105
106			logger.info(
107			"Submission '%s' is '%s'. Waiting for biosample ids" % (
108			self.submission_name,
109			self.submission.status))
110
111			# debug submission status
112			document = self.submission.follow_url(
113			"processingStatusSummary", self.auth)
114
115			logger.debug(
116			"Current status for submission '%s' is '%s'" % (
117			self.submission_name, document.data))
118
119			else:
120			# HINT: thrown an exception?
121			logger.warning("Unknown status '%s' for submission '%s'" % (
122			self.submission.status,
123			self.submission_name))
124
125			logger.debug("Checking status for '%s' completed" % (
126			self.submission_name))
127
128			def submission_has_issues(self):
129			"""
130			Check that biosample submission has not issues. For example, that
131			it will remain in the same status for a long time
132
133			Returns:
134			bool: True if an issue is detected
135			"""
136
137			logger.debug(
138			"Check if submission '%s' remained in the same status "
139			"for a long time" % (
140			self.submission_name))
141
142			if (timezone.now() - self.usi_submission.updated_at).days > MAX_DAYS:
143			message = (
144			"Biosample submission '%s' remained with the same status "
145			"for more than %s days. Please report it to InjectTool "
146			"team" % (self.submission_name, MAX_DAYS))
147
148			self.usi_submission.status = ERROR
149			self.usi_submission.message = message
150			self.usi_submission.save()
151
152			logger.error(
153			"Errors for submission: %s" % (
154			self.submission_name))
155			logger.error(message)
156
157			return True
158
159			else:
160			return False
161
162			def __sample_has_errors(self, sample, table, pk):
163			"""
164			Helper metod to mark a (animal/sample) with its own errors. Table
165			sould be Animal or Sample to update the approriate object. Sample
166			is a USI sample object
167
168			Args:
169			sample (pyUSIrest.client.sample): a USI sample object
170			table (str): ``Animal`` or ``Sample``, mean the table where this
171			object should be searched
172			pk (int): table primary key
173			"""
174
175			# get sample/animal object relying on table name and pk
176			sample_obj = get_model_object(table, pk)
177
178			sample_obj.name.status = NEED_REVISION
179			sample_obj.name.save()
180
181			# get a USI validation result
182			validation_result = sample.get_validation_result()
183
184			# TODO: should I store validation_result error in validation tables?
185			errorMessages = validation_result.errorMessages
186
187			# return an error for each object
188			return {str(sample_obj): errorMessages}
189
190			def finalize(self):
191			"""Finalize a submission by closing document and send it to
192			biosample"""
193
194			logger.debug("Finalizing submission '%s'" % (
195			self.submission_name))
196
197			# get errors for a submission
198			errors = self.submission.has_errors()
199
200			# collect all error messages in a list
201			messages = []
202
203			if True in errors:
204			# get sample with errors then update database
205			samples = self.submission.get_samples(has_errors=True)
206
207			for sample in samples:
208			# derive pk and table from alias
209			table, pk = parse_image_alias(sample.alias)
210
211			# need to check if this sample/animals has errors or not
212			if sample.has_errors():
213			logger.warning(
214			"%s in table %s has errors!!!" % (sample, table))
215
216			# mark this sample since has problems
217			errorMessages = self.__sample_has_errors(
218			sample, table, pk)
219
220			# append this into error messages list
221			messages.append(errorMessages)
222
223			# if a sample has no errors, status will be the same
224
225			logger.error(
226			"Errors for submission: '%s'" % (self.submission_name))
227			logger.error("Fix them, then finalize")
228
229			# report error
230			message = "Some items needs revision:\n\n" + \
231			json.dumps(messages, indent=2)
232
233			# Update status for biosample.models.Submission
234			self.usi_submission.status = NEED_REVISION
235			self.usi_submission.message = message
236			self.usi_submission.save()
237
238			else:
239			# raising an exception while finalizing will result
240			# in a failed task.
241			# TODO: model and test exception in finalization
242			self.submission.finalize()
243
244			def complete(self):
245			"""Complete a submission and fetch name objects"""
246
247			logger.debug("Completing submission '%s'" % (
248			self.submission_name))
249
250			for sample in self.submission.get_samples():
251			# derive pk and table from alias
252			table, pk = parse_image_alias(sample.alias)
253
254			# if no accession, return without doing anything
255			if sample.accession is None:
256			logger.error("No accession found for sample '%s'" % (sample))
257			logger.error("Ignoring submission '%s'" % (self.submission))
258			return
259
260			# get sample/animal object relying on table name and pk
261			sample_obj = get_model_object(table, pk)
262
263			# update statuses
264			sample_obj.name.status = COMPLETED
265			sample_obj.name.biosample_id = sample.accession
266			sample_obj.name.save()
267
268			# update submission
269			self.usi_submission.status = COMPLETED
270			self.usi_submission.message = "Successful submission into biosample"
271			self.usi_submission.save()
272
273			logger.info(
274			"Submission %s is now completed and recorded into UID" % (
275			self.submission))
276
277
278			class FetchStatusTask(MyTask):
279			name = "Fetch USI status"
280			description = """Fetch biosample using USI API"""
281			lock_id = "FetchStatusTask"
282
283			def run(self):
284			"""
285			This function is called when delay is called. It will acquire a lock
286			in redis, so those tasks are mutually exclusive
287
288			Returns:
289			str: success if everything is ok. Different messages if task is
290			already running or exception is caught"""
291
292			# debugging instance
293			self.debug_task()
294
295			# forcing blocking condition: Wait until a get a lock object
296			with redis_lock(self.lock_id, blocking=False) as acquired:
297			if acquired:
298			# do stuff and return something
299			return self.fetch_status()
300
301			message = "%s already running!" % (self.name)
302
303			logger.warning(message)
304
305			return message
306
307			def fetch_status(self):
308			"""
309			Fetch status from pending submissions. Called from
310			:py:meth:`run`, handles exceptions from USI, select
311			all :py:class:`Submission <image_app.models.Submission>` objects
312			with :py:const:`SUBMITTED <common.constants.SUBMITTED>` status
313			from :ref:`UID <The Unified Internal Database>` and call
314			:py:meth:`fetch_queryset` with this data
315			"""
316
317			logger.info("fetch_status started")
318
319			# search for submission with SUBMITTED status. Other submission are
320			# not yet finalized. This function need to be called by exclusives
321			# tasks
322			qs = Submission.objects.filter(status=SUBMITTED)
323
324			# check for queryset length
325			if qs.count() != 0:
326			try:
327			# fetch biosample status
328			self.fetch_queryset(qs)
329
330			# retry a task under errors
331			# http://docs.celeryproject.org/en/latest/userguide/tasks.html#retrying
332			except ConnectionError as exc:
333			raise self.retry(exc=exc)
334
335			else:
336			logger.debug("No pending submission in UID database")
337
338			# debug
339			logger.info("fetch_status completed")
340
341			return "success"
342
343			# a function to retrieve biosample submission
344			def fetch_queryset(self, queryset):
345			"""Fetch biosample against a queryset (a list of
346			:py:const:`SUBMITTED <common.constants.SUBMITTED>`
347			:py:class:`Submission <image_app.models.Submission>` objects). Iterate
348			through submission to get USI info. Calls
349			:py:meth:`fetch_submission_obj`
350			"""
351
352			logger.info("Searching for submissions into biosample")
353
354			for uid_submission in queryset:
355			usi_submissions = USISubmission.objects.filter(
356			uid_submission=uid_submission)
357
358			# HINT: fetch statuses using tasks?
359			for usi_submission in usi_submissions:
360			status_helper = FetchStatusHelper(usi_submission)
361			status_helper.check_submission_status()
362
363			# TODO: set the final status for a submission like
364			# submission complete task
365
366			logger.info("fetch_queryset completed")
367
368
369			# register explicitly tasks
370			# https://github.com/celery/celery/issues/3744#issuecomment-271366923
371			celery_app.tasks.register(FetchStatusTask)
372

cnr-ibba / IMAGE-InjectTool

Pull Request — master (#44)

FetchStatusHelper.check_submission_status() B

Complexity

Size

Duplication

Importance

How to fix Long Method

Long Method

Duplication Side-by-Side

Filter issues like