Skip to content

Commit

Permalink
Addresses #183 and #177
Browse files Browse the repository at this point in the history
  • Loading branch information
x0xMaximus committed Nov 16, 2016
1 parent 1288e71 commit a04023b
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 12 deletions.
4 changes: 2 additions & 2 deletions mark2cure/control/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def pubtator_actions(request, pk):

if request.method == 'POST':
pubtator.delete()
doc.init_pubtator()
doc.run_pubtator()

return redirect(reverse('control:document', kwargs={'pk': doc.pk}))

Expand All @@ -158,7 +158,7 @@ def document_pubtator_actions(request, pk):
if request.method == 'POST':
print '> Reset doc pubtators'
doc.pubtator_set.all().delete()
doc.init_pubtator()
doc.run_pubtator()

return HttpResponseRedirect(request.META.get('HTTP_REFERER'))

Expand Down
34 changes: 26 additions & 8 deletions mark2cure/document/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.db import models
from django.contrib.auth.models import User
from django.utils import timezone

from nltk.tokenize import WhitespaceTokenizer
from mark2cure.common.bioc import BioCReader
Expand Down Expand Up @@ -34,17 +35,32 @@ def available_sections(self):
def count_available_sections(self):
return self.section_set.exclude(kind='o').count()

def init_pubtator(self, force_create=True):
def run_pubtator(self):
if self.available_sections().exists() and Pubtator.objects.filter(document=self).count() < 3:
for api_ann in ['tmChem', 'DNorm', 'GNormPlus']:
Pubtator.objects.get_or_create(document=self, kind=api_ann)

if force_create:
# Submit pubtator requests for created and empty Pubtator models
# without any pending PubtatorRequests
for p in self.pubtator_set.filter(content__isnull=True).all():
if not p.pubtatorrequest_set.filter(status=PubtatorRequest.UNFULLFILLED).exists():
p.submit()
for pubtator in self.pubtator_set.all():
last_request = pubtator.pubtatorrequest_set.filter(status__in=[PubtatorRequest.FULLFILLED, PubtatorRequest.FAILED]).order_by('-updated').first()

# Should never be more than 1 spending request per Pubtator
try:
pending_request = pubtator.pubtatorrequest_set.get(status=PubtatorRequest.UNFULLFILLED)
except self.DoesNotExist:
pending_request = False

# If we successfully retrieved in the past, but it's old now
if last_request and (timezone.now() - last_request.updated).days >= 60 and not pending_request:
pubtator.submit
return

# If the current request is never going to finish, flag it and start over
if pending_request and (timezone.now() - pending_request.updated).days >= 1:
pending_request.status = PubtatorRequest.EXPIRED
pending_request.save()

pubtator.submit()
return

def update_padding(self):
from mark2cure.common.formatter import pad_split
Expand Down Expand Up @@ -197,10 +213,12 @@ class PubtatorRequest(models.Model):
UNFULLFILLED = 0
FULLFILLED = 1
FAILED = 2
EXPIRED = 3
STATUS_CHOICES = (
(UNFULLFILLED, 'Unfullfilled'),
(FULLFILLED, 'Fullfilled'),
(FAILED, 'Failed')
(FAILED, 'Failed'),
(EXPIRED, 'Expired')
)
status = models.IntegerField(default=UNFULLFILLED, choices=STATUS_CHOICES)
session_id = models.CharField(max_length=19)
Expand Down
7 changes: 5 additions & 2 deletions mark2cure/document/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def check_corpus_health(self):
# If the document doesn't pass the validator
# delete all existing content and retry
if not document.valid_pubtator() or document.update_padding():
document.init_pubtator()
document.run_pubtator()

if not self.request.called_directly:
return True
Expand Down Expand Up @@ -85,7 +85,7 @@ def get_pubmed_document(self, pubmed_ids, source='pubmed', include_pubtator=True
sec.save()

if include_pubtator:
doc.init_pubtator()
doc.run_pubtator()

if group_pk:
docs = Document.objects.filter(source=source).all()
Expand All @@ -104,6 +104,9 @@ def maintain_pubtator_requests(self):
"""A routine job that continually checks for pending Pubtator Requests
and will resubmit Pubtators that haven't been updated in a "long time"
"""
# Start any new or old Pubtator requests
for d in Document.objects.all():
d.run_pubtator()

# Try to fetch all the pending pubtator requests
for pubtator_request in PubtatorRequest.objects.filter(status=PubtatorRequest.UNFULLFILLED).all():
Expand Down

0 comments on commit a04023b

Please sign in to comment.