diff --git a/mark2cure/document/models.py b/mark2cure/document/models.py index cda50416..6c9352dc 100644 --- a/mark2cure/document/models.py +++ b/mark2cure/document/models.py @@ -121,11 +121,8 @@ class Pubtator(models.Model): document = models.ForeignKey(Document) kind = models.CharField(max_length=200, blank=True) - session_id = models.CharField(max_length=200, blank=True) content = models.TextField(blank=True, null=True) - request_count = models.IntegerField(default=0) - validate_cache = models.BooleanField(default=False) # Updated is also trigged during doc.valid_pubtator # so it's associated with when last polled or @@ -139,18 +136,22 @@ class Meta: def __unicode__(self): return 'pubtator' - def valid(self): - # (TODO) This may return 2 different "types" check on - # implications of this discrepancy - if self.validate_cache: - return True - - if self.session_id != '': + def is_valid(self): + """ + Returns a boolean for if the pubtator is a valid state + """ + if self.content is None: return False - if self.content is None: + if self.get_instance(): + return True + else: return False + def get_instance(self): + """ + Returns the pubtator BioC instance if valid or None + """ try: r = BioCReader(source=self.content) r.read() @@ -159,12 +160,16 @@ def valid(self): # If one of them doesn't validate leave return False + def count_annotations(self): - if self.valid(): + """ + Returns an Integer count of all types of annotations, accross all sections for a pubtator response of any type. + If none are found or the document is invalid, return 0 + """ + instance = self.get_instance() + if instance: count = 0 - reader = BioCReader(source=self.content) - reader.read() - for doc in reader.collection.documents: + for doc in instance.collection.documents: for passage in doc.passages: count += len(passage.annotations) return count @@ -172,6 +177,21 @@ def count_annotations(self): else: return 0 +class PubtatorRequest(models.Model): + """ + Pending jobs that have been submitted to Pubtator and are + awaiting completion + """ + pubtator = models.ForeignKey(Pubtator) + + fulfilled = models.BooleanField(default=False) + session_id = models.CharField(max_length=200, blank=True) + # The Number of times we've checked on the session_id + request_count = models.IntegerField(default=0) + + updated = models.DateTimeField(auto_now=True) + created = models.DateTimeField(auto_now_add=True) + class Section(models.Model): SECTION_KIND_CHOICE = ( diff --git a/mark2cure/document/signals.py b/mark2cure/document/signals.py index a8860979..e5ea0dc6 100644 --- a/mark2cure/document/signals.py +++ b/mark2cure/document/signals.py @@ -27,7 +27,10 @@ def pubtator_post_save(sender, instance, created, **kwargs): raise e session_id = re.findall(r'\d{4}-\d{4}-\d{4}-\d{4}', response.url)[0] - pubtator.session_id = session_id - pubtator.save() + + PubtatorRequest.objects.get_or_create( + pubtator=pubtator, + fulfilled=False, + session_id=session_id) diff --git a/mark2cure/document/tasks.py b/mark2cure/document/tasks.py index ade94cf6..0d17fcd4 100644 --- a/mark2cure/document/tasks.py +++ b/mark2cure/document/tasks.py @@ -113,11 +113,12 @@ def check_pubtator_health(self): # For all Pubtator models with content ensure it validates and cleanup the session_id and content for pubtator in Pubtator.objects.filter(content__isnull=False).all(): # (TODO) Do robust checks for Pubtator object valid status - p_valid = pubtator.valid() + instance = pubtator.get_instance() - if p_valid: + if instance: # Association with the correct document - pubtator.document = Document.objects.get(document_id=p_valid.collection.documents[0].id) + doc_id = instance.collection.documents[0].id + pubtator.document = Document.objects.get(document_id=doc_id) # Prevents subsequent API calls pubtator.session_id = ''