Skip to content

Commit

Permalink
merging latest changes
Browse files Browse the repository at this point in the history
  • Loading branch information
mscaldas2012 committed Aug 7, 2024
1 parent efce286 commit 9b4ac2f
Show file tree
Hide file tree
Showing 26 changed files with 260 additions and 180 deletions.
2 changes: 1 addition & 1 deletion src/main/resources/DefaultBatchingProfile.json
Original file line number Diff line number Diff line change
Expand Up @@ -949,7 +949,7 @@
"dataType": "TS",
"maxLength": "26",
"usage": "X",
"cardinality": "[)..1]",
"cardinality": "[0..1]",
"conformance": "",
"notes": ""
},
Expand Down
16 changes: 16 additions & 0 deletions src/main/resources/DefaultFieldsProfile.json
Original file line number Diff line number Diff line change
Expand Up @@ -970,6 +970,22 @@
"conformance": ""
}

],
"EIP": [
{
"fieldNumber": 1,
"name": "Place Assigned Identifier",
"dataType": "EI",
"usage": "O",
"cardinality": "[0..1]"
},
{
"fieldNumber": 2,
"name": "Filler Assigned Identfier",
"dataType": "EI",
"usage": "O",
"cardinality": "[0..1]"
}
]
}
}
33 changes: 31 additions & 2 deletions src/main/resources/DefaultProfile.json
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
}
}
}

}
}
},
Expand Down Expand Up @@ -567,7 +566,7 @@
"dataType": "TS",
"maxLength": "26",
"usage": "X",
"cardinality": "[)..1]",
"cardinality": "[0..1]",
"conformance": "",
"notes": ""
},
Expand Down Expand Up @@ -1118,6 +1117,36 @@
"conformance": "",
"notes": ""
}
],
"SPM": [
{
"fieldNumber": 1,
"name": "Set ID",
"dataType": "SI",
"maxLength": "4",
"usage": "O",
"cardinality": "[0..1]",
"conformance": "",
"notes": ""
},
{
"fieldNumber": 2,
"name": "Specimen ID",
"dataType": "EIP",
"usage": "O",
"cardinality": "[0..1]",
"conformance": "",
"notes": ""
},
{
"fieldNumber": 3,
"name": "Specimen Parent IDs",
"dataType": "EIP",
"usage": "O",
"cardinality": "[0..*]",
"conformance": "",
"notes": ""
}
]

}
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/PhinGuideProfile_NoORC.json
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@
"dataType": "TS",
"maxLength": "26",
"usage": "X",
"cardinality": "[)..1]",
"cardinality": "[0..1]",
"conformance": "",
"notes": ""
},
Expand Down
2 changes: 1 addition & 1 deletion src/main/resources/PhinProfileFlat.json
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@
"dataType": "TS",
"maxLength": "26",
"usage": "X",
"cardinality": "[)..1]",
"cardinality": "[0..1]",
"conformance": "",
"notes": ""
},
Expand Down
21 changes: 21 additions & 0 deletions src/main/resources/redaction_rules.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
PATH, VALUE, CONDITION
PID-3.1,REDACTED,PID-3.5 !IN (PT;PI;MB;PN;SR;PHC;PH;AN)
PID-5[1],REDACTED
PID-5[2],^^^^^^S,PID-5[2] !IN (^^^^^^S;^^^^^^U)
PID-6,REDACTED
PID-7,REDACTED
PID-9,REDACTED
PID-11.5,99999,
PID-11.9,
PID-12,REDACTED
PID-13,
PID-18,REDACTED
PID-19,REDACTED
PID-20,REDACTED
PID-21,REDACTED
NK1-13,REDACTED,NK1-3.5 != EMR
PV1,$REMOVE
SPM,
OBR-2.1,$HASH
OBX[@3.1='94531-1']-5,NA
NTE,$REMOVE
105 changes: 60 additions & 45 deletions src/main/scala/gov/cdc/hl7/DeIdentifier.scala
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,12 @@ import gov.cdc.utils.{ConsoleProgress, FileUtils}
import scala.jdk.CollectionConverters._
import scala.util.matching.Regex

case class RedactInfo(path: String, var rule: String, @transient condition: String, lineNumber: Int) {
@transient var rulemsg = s"Redacted $path with "
case class RedactInfo(path: String, fieldIndex: Int, var rule: String, @transient condition: String, lineNumber: Int) {
@transient var rulemsg = s"Redacted $path"
if ( fieldIndex > 1 )
rulemsg += s" (repeating value $fieldIndex)"
rulemsg += " with "

if (rule == null || rule.isEmpty)
rulemsg += "empty value"
else rulemsg += s"value '$rule'"
Expand All @@ -20,8 +24,9 @@ case class RedactInfo(path: String, var rule: String, @transient condition: St
/**
* This is a simple De-identifier of HL7 messages where it replaces entire Lines that can potentially have PII data
* It uses a comma delimited file to configure which lines need to be replaced and the values to replace with.
* The first column on the config file, should be a regular expression to match the text
* The second column on the config file, is the text to replace the entire line with.
* The first column on the config file, should be a HL7 path to find the information to be redacted
* The second column on the config file, is the text to replace value of the matching path.
* An optional third column on the config file can provide a special condition of whether to redact or not.
*
* Created - 6/2/17
* Author Marcelo Caldas mcq1@cdc.gov
Expand Down Expand Up @@ -72,11 +77,12 @@ class DeIdentifier() {
val condition = if (rule.length > 2) rule(2) else null
val matchLine = HL7StaticParser.getValue(subline, path) //Make sure the path matches something
if (matchLine.isDefined && matchLine.get.length > 0) {
if (evalCondition(subline, condition)) { //Redact only if Condition evals to TRUE!
val matchBools = evalCondition(subline, condition)
if (matchBools.reduce(_ || _)) { //Redact only if at least one Condition evaluates to TRUE!
replacement match {
case FN_REMOVE => {
subline = ""
report += RedactInfo(path, replacement, condition, lineNbr + 1)
report += RedactInfo(path, 0, replacement, condition, lineNbr + 1)
}
case _ =>
val lineIndexed = HL7StaticParser.retrieveFirstSegmentOf(subline, path.substring(0, 3))
Expand All @@ -87,39 +93,42 @@ class DeIdentifier() {
val repeats = lineIndexed._2(field.toInt).split("\\~")
repeats.zipWithIndex.foreach {
case (elem, i) => {
var redacted = false
if (fieldIdx == null || fieldIdx.toInt == i + 1) {
if (comp != null) {
val compArray = elem.split("\\^")
if (compArray.length >= comp.toInt) {
redacted = !compArray(comp.toInt - 1).equals(replacement)
compArray(comp.toInt - 1) = getReplacementValue(replacement, compArray(comp.toInt - 1))

}
if (fieldIdx == null || fieldIdx.toInt == i + 1)
repeats(i) = compArray.mkString("^")

else {
repeats(i) = elem
redacted = !elem.equals(replacement)
}
if (redacted)
report += RedactInfo(path, replacement, condition, lineNbr + 1)
} else {
if (!repeats(i).isEmpty && !replacement.equals(elem)) {
repeats(i) = getReplacementValue(replacement, elem)
report += RedactInfo(path, replacement, condition, lineNbr + 1)
if (elem.nonEmpty)
if ((matchBools.length == 1 && matchBools(0) ) || matchBools(i)) {
var redacted = false
if (fieldIdx == null || fieldIdx.toInt == i + 1) {
if (comp != null) {
val compArray = elem.split("\\^")
if (compArray.length >= comp.toInt && compArray(comp.toInt - 1).nonEmpty) {
redacted = !compArray(comp.toInt - 1).equals(replacement)
compArray(comp.toInt - 1) = getReplacementValue(replacement, compArray(comp.toInt - 1))

}
if (fieldIdx == null || fieldIdx.toInt == i + 1)
repeats(i) = compArray.mkString("^")

else {
repeats(i) = elem
redacted = !elem.equals(replacement)
}
if (redacted)
report += RedactInfo(path, (i +1), replacement, condition, lineNbr + 1)
} else {
if (repeats(i).nonEmpty && !replacement.equals(elem)) {
repeats(i) = getReplacementValue(replacement, elem)
report += RedactInfo(path, (i +1), replacement, condition, lineNbr + 1)
}
}
}
}
}
}
lineIndexed._2(field.toInt) = repeats.mkString("~")
}
subline = lineIndexed._2.mkString("|")

} else {
subline = getReplacementValue(replacement, subline) //The whole segment will be replaced!
report += RedactInfo(path, replacement, condition, lineNbr + 1)
report += RedactInfo(path, 0, replacement, condition, lineNbr + 1)
}
}
}
Expand All @@ -134,23 +143,29 @@ class DeIdentifier() {

}

private def evalCondition(msg: String, condition: String): Boolean = {
private def evalCondition(msg: String, condition: String): Array[Boolean] = {
if (condition == null || condition.isEmpty)
return true //empty condition -> Redact!
return Array(true) //empty condition -> Redact!
val condParts = condition.split(" ") //get PATH<space>Comparator<space>Value
val msgValue = HL7StaticParser.getFirstValue(msg, condParts(0))//we only support single cardinality for now.
if (msgValue == null || msgValue == None || msgValue.get.isEmpty)
return false //don't redact
return condParts(1).toUpperCase() match {
case "=" => condParts(2).equals( msgValue.get.toUpperCase())
case "!=" => !condParts(2).equals( msgValue.get.toUpperCase())
case "IN" =>
val values = condParts(2).substring(1, condParts(2).length -1).split(";")
values.contains(msgValue.get.toUpperCase())
case "!IN" =>
val values = condParts(2).substring(1, condParts(2).length -1).split(";")
!values.contains(msgValue.get.toUpperCase())
val msgValues = HL7StaticParser.getValue(msg, condParts(0), removeEmpty = false)//we only support single cardinality for now.
if (msgValues == null || msgValues.isEmpty || msgValues.get.isEmpty)
return Array(false) //don't redact
//We eval line by line, so the first array will always have one entry. the second array will possibly have repeats
val anyRepeatMatches = msgValues.get(0)
val boolResults = new Array[Boolean](anyRepeatMatches.length)
anyRepeatMatches.zipWithIndex.foreach { case(i, idx) =>
boolResults(idx) = condParts(1).toUpperCase() match {
case "=" => condParts(2).equals( i.toUpperCase())
case "!=" => !condParts(2).equals( i.toUpperCase())
case "IN" =>
val values = condParts(2).substring(1, condParts(2).length -1).split(";")
values.contains(i.toUpperCase())
case "!IN" =>
val values = condParts(2).substring(1, condParts(2).length -1).split(";")
!values.contains(i.toUpperCase())
}
}
boolResults
}

def deIdentifyFile(messageFileName: String, rulesFileName: String): Unit = {
Expand All @@ -177,7 +192,7 @@ object DeIdentifier {
}

object DeIdentifierApp {
val DEFAULT_RULES_FILE = "deid_rules.txt"
val DEFAULT_RULES_FILE = "redaction_rules.txt"

def showUsage() = {
println("Pass the file you want to translate and the file with rules.")
Expand Down
4 changes: 0 additions & 4 deletions src/main/scala/gov/cdc/hl7/HL7HierarchyParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,6 @@ object HL7HierarchyParser {
val mapper = new ObjectMapper()
mapper.registerModule(DefaultScalaModule)
val profileObj:Profile = mapper.readValue(profile, classOf[Profile])
<<<<<<< HEAD
return parseMessageHierarchy(message, profileObj)
=======
return parseMessageHierarchy(message, profileObj)
>>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d
}
}
16 changes: 1 addition & 15 deletions src/main/scala/gov/cdc/hl7/HL7ParseUtils.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,7 @@ import scala.util.matching.Regex

class HL7ParseUtils(message: String, var profile: Profile = null, val buildHierarchy: Boolean = true) {
//If no Profile is passed, we assume no Hierarchy will be used.
<<<<<<< HEAD
var profileName = "PhinGuideProfile.json"
=======
var profileName = "DefaultProfile.json"
>>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d

def this(message: String) {
this(message, null, false)
Expand Down Expand Up @@ -139,15 +135,6 @@ class HL7ParseUtils(message: String, var profile: Profile = null, val buildHiera

//main Entry - can be called outside code to find values based on path
def getValue(path: String, removeEmpty: Boolean = true): Option[Array[Array[String]]] = {
<<<<<<< HEAD
//val EMPTY = new Array[String](0)
path match {
//TODO:: see what to do with children!!!
case CHILDREN_REGEX(parent, child) => { //Tried implementing a full RegEx, but run into a 22 limit of fields. Breaking down into multiple regex then...
getChildrenValues(parent, child, removeEmpty)
}
case _ => HL7StaticParser.getValue(message, path, removeEmpty)
=======
if (buildHierarchy) {
path match {
case CHILDREN_REGEX(parent, child) => { //Tried implementing a full RegEx, but run into a 22 limit of fields. Breaking down into multiple regex then...
Expand All @@ -156,8 +143,7 @@ class HL7ParseUtils(message: String, var profile: Profile = null, val buildHiera
case _ => HL7StaticParser.getValue(message, path, removeEmpty)
}
} else HL7StaticParser.getValue(message, path, removeEmpty)
>>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d
}

}

//Gets values only from a single segment
Expand Down
5 changes: 1 addition & 4 deletions src/main/scala/gov/cdc/hl7/HL7StaticParser.scala
Original file line number Diff line number Diff line change
Expand Up @@ -320,11 +320,8 @@ object HL7StaticParser {
fieldArray :+= finalValue
} else {
for (onefield <- fieldValueSplit.zipWithIndex) {
<<<<<<< HEAD
finalValue = onefield._1
=======
finalValue = onefield
>>>>>>> 11b2a2b225555e8a9ef99bc7f4a42303b34960f1
// finalValue = onefield
if (comp > 0) {
val compSplit = finalValue.split(HL7_COMPONENT_SEPARATOR)
finalValue = compSplit.lift(comp.toInt - 1).getOrElse("")
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/gov/cdc/hl7/StructureValidator.scala
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class StructureValidator(message: String, var profile: Profile, var fieldDefinit

if (profile == null) {
println("Using Default profile")
val content:String = Source.fromResource("PhinGuideProfile.json").getLines().mkString("\n")
val content:String = Source.fromResource("DefaultProfile.json").getLines().mkString("\n")

profile = mapper.readValue(content, classOf[Profile])
}
Expand Down
26 changes: 26 additions & 0 deletions src/test/resources/CELR-config.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
PID-3.1,REDACTED,PID-3.5 !IN (PT;PI;MB;PN;SR;PHC;PH;AN)
PID-5.1,REDACTED
PID-5.2,REDACTED
PID-5.3,REDACTED
PID-5.4,REDACTED
PID-11.1,REDACTED
PID-11.2,REDACTED
PID-13,REDACTED
PID-14,REDACTED
PID-19,REDACTED
NK1-13,REDACTED,NK1-3.5 != EMR
NK1-2,REDACTED
NK1-4.1,REDACTED
NK1-4.2,REDACTED
NK1-5,REDACTED
NK1-6,REDACTED
NK1-26,REDACTED
NK1-30,REDACTED
NK1-31,REDACTED
NK1-32,REDACTED
NK1-33,REDACTED
NK1-37,REDACTED
ZLR-6,REDACTED
ZLR-7,REDACTED
ZLR-8,REDACTED
ZLR-9,REDACTED
Loading

0 comments on commit 9b4ac2f

Please sign in to comment.