Skip to content

Commit

Permalink
[doc.pubtator] validity + request tracking: #183
Browse files Browse the repository at this point in the history
- separation into 2 models, new one exclusively for tracking requests
- new validation approach to prevent return type confusion
  • Loading branch information
x0xMaximus committed Oct 27, 2016
1 parent 6df4c11 commit 073eb0f
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 20 deletions.
50 changes: 35 additions & 15 deletions mark2cure/document/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,8 @@ class Pubtator(models.Model):
document = models.ForeignKey(Document)

kind = models.CharField(max_length=200, blank=True)
session_id = models.CharField(max_length=200, blank=True)
content = models.TextField(blank=True, null=True)

request_count = models.IntegerField(default=0)
validate_cache = models.BooleanField(default=False)

# Updated is also trigged during doc.valid_pubtator
# so it's associated with when last polled or
Expand All @@ -139,18 +136,22 @@ class Meta:
def __unicode__(self):
return 'pubtator'

def valid(self):
# (TODO) This may return 2 different "types" check on
# implications of this discrepancy
if self.validate_cache:
return True

if self.session_id != '':
def is_valid(self):
"""
Returns a boolean for if the pubtator is a valid state
"""
if self.content is None:
return False

if self.content is None:
if self.get_instance():
return True
else:
return False

def get_instance(self):
"""
Returns the pubtator BioC instance if valid or None
"""
try:
r = BioCReader(source=self.content)
r.read()
Expand All @@ -159,19 +160,38 @@ def valid(self):
# If one of them doesn't validate leave
return False


def count_annotations(self):
if self.valid():
"""
Returns an Integer count of all types of annotations, accross all sections for a pubtator response of any type.
If none are found or the document is invalid, return 0
"""
instance = self.get_instance()
if instance:
count = 0
reader = BioCReader(source=self.content)
reader.read()
for doc in reader.collection.documents:
for doc in instance.collection.documents:
for passage in doc.passages:
count += len(passage.annotations)
return count

else:
return 0

class PubtatorRequest(models.Model):
"""
Pending jobs that have been submitted to Pubtator and are
awaiting completion
"""
pubtator = models.ForeignKey(Pubtator)

fulfilled = models.BooleanField(default=False)
session_id = models.CharField(max_length=200, blank=True)
# The Number of times we've checked on the session_id
request_count = models.IntegerField(default=0)

updated = models.DateTimeField(auto_now=True)
created = models.DateTimeField(auto_now_add=True)


class Section(models.Model):
SECTION_KIND_CHOICE = (
Expand Down
7 changes: 5 additions & 2 deletions mark2cure/document/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,10 @@ def pubtator_post_save(sender, instance, created, **kwargs):
raise e

session_id = re.findall(r'\d{4}-\d{4}-\d{4}-\d{4}', response.url)[0]
pubtator.session_id = session_id
pubtator.save()

PubtatorRequest.objects.get_or_create(
pubtator=pubtator,
fulfilled=False,
session_id=session_id)


7 changes: 4 additions & 3 deletions mark2cure/document/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,11 +113,12 @@ def check_pubtator_health(self):
# For all Pubtator models with content ensure it validates and cleanup the session_id and content
for pubtator in Pubtator.objects.filter(content__isnull=False).all():
# (TODO) Do robust checks for Pubtator object valid status
p_valid = pubtator.valid()
instance = pubtator.get_instance()

if p_valid:
if instance:
# Association with the correct document
pubtator.document = Document.objects.get(document_id=p_valid.collection.documents[0].id)
doc_id = instance.collection.documents[0].id
pubtator.document = Document.objects.get(document_id=doc_id)

# Prevents subsequent API calls
pubtator.session_id = ''
Expand Down

0 comments on commit 073eb0f

Please sign in to comment.