diff --git a/src/main/resources/DefaultBatchingProfile.json b/src/main/resources/DefaultBatchingProfile.json index 2f128fd..ead4c11 100644 --- a/src/main/resources/DefaultBatchingProfile.json +++ b/src/main/resources/DefaultBatchingProfile.json @@ -949,7 +949,7 @@ "dataType": "TS", "maxLength": "26", "usage": "X", - "cardinality": "[)..1]", + "cardinality": "[0..1]", "conformance": "", "notes": "" }, diff --git a/src/main/resources/DefaultFieldsProfile.json b/src/main/resources/DefaultFieldsProfile.json index 86b0d5e..0f89957 100644 --- a/src/main/resources/DefaultFieldsProfile.json +++ b/src/main/resources/DefaultFieldsProfile.json @@ -970,6 +970,22 @@ "conformance": "" } + ], + "EIP": [ + { + "fieldNumber": 1, + "name": "Place Assigned Identifier", + "dataType": "EI", + "usage": "O", + "cardinality": "[0..1]" + }, + { + "fieldNumber": 2, + "name": "Filler Assigned Identfier", + "dataType": "EI", + "usage": "O", + "cardinality": "[0..1]" + } ] } } \ No newline at end of file diff --git a/src/main/resources/DefaultProfile.json b/src/main/resources/DefaultProfile.json index 796a29f..7cb3ad4 100644 --- a/src/main/resources/DefaultProfile.json +++ b/src/main/resources/DefaultProfile.json @@ -41,7 +41,6 @@ } } } - } } }, @@ -567,7 +566,7 @@ "dataType": "TS", "maxLength": "26", "usage": "X", - "cardinality": "[)..1]", + "cardinality": "[0..1]", "conformance": "", "notes": "" }, @@ -1118,6 +1117,36 @@ "conformance": "", "notes": "" } + ], + "SPM": [ + { + "fieldNumber": 1, + "name": "Set ID", + "dataType": "SI", + "maxLength": "4", + "usage": "O", + "cardinality": "[0..1]", + "conformance": "", + "notes": "" + }, + { + "fieldNumber": 2, + "name": "Specimen ID", + "dataType": "EIP", + "usage": "O", + "cardinality": "[0..1]", + "conformance": "", + "notes": "" + }, + { + "fieldNumber": 3, + "name": "Specimen Parent IDs", + "dataType": "EIP", + "usage": "O", + "cardinality": "[0..*]", + "conformance": "", + "notes": "" + } ] } diff --git a/src/main/resources/PhinGuideProfile_NoORC.json b/src/main/resources/PhinGuideProfile_NoORC.json index ea65e69..dd021fa 100644 --- a/src/main/resources/PhinGuideProfile_NoORC.json +++ b/src/main/resources/PhinGuideProfile_NoORC.json @@ -563,7 +563,7 @@ "dataType": "TS", "maxLength": "26", "usage": "X", - "cardinality": "[)..1]", + "cardinality": "[0..1]", "conformance": "", "notes": "" }, diff --git a/src/main/resources/PhinProfileFlat.json b/src/main/resources/PhinProfileFlat.json index 3dac83e..59e8b83 100644 --- a/src/main/resources/PhinProfileFlat.json +++ b/src/main/resources/PhinProfileFlat.json @@ -543,7 +543,7 @@ "dataType": "TS", "maxLength": "26", "usage": "X", - "cardinality": "[)..1]", + "cardinality": "[0..1]", "conformance": "", "notes": "" }, diff --git a/src/main/resources/redaction_rules.txt b/src/main/resources/redaction_rules.txt new file mode 100644 index 0000000..ee1e7f9 --- /dev/null +++ b/src/main/resources/redaction_rules.txt @@ -0,0 +1,21 @@ +PATH, VALUE, CONDITION +PID-3.1,REDACTED,PID-3.5 !IN (PT;PI;MB;PN;SR;PHC;PH;AN) +PID-5[1],REDACTED +PID-5[2],^^^^^^S,PID-5[2] !IN (^^^^^^S;^^^^^^U) +PID-6,REDACTED +PID-7,REDACTED +PID-9,REDACTED +PID-11.5,99999, +PID-11.9, +PID-12,REDACTED +PID-13, +PID-18,REDACTED +PID-19,REDACTED +PID-20,REDACTED +PID-21,REDACTED +NK1-13,REDACTED,NK1-3.5 != EMR +PV1,$REMOVE +SPM, +OBR-2.1,$HASH +OBX[@3.1='94531-1']-5,NA +NTE,$REMOVE \ No newline at end of file diff --git a/src/main/scala/gov/cdc/hl7/DeIdentifier.scala b/src/main/scala/gov/cdc/hl7/DeIdentifier.scala index 9c40e8b..3597f44 100644 --- a/src/main/scala/gov/cdc/hl7/DeIdentifier.scala +++ b/src/main/scala/gov/cdc/hl7/DeIdentifier.scala @@ -7,8 +7,12 @@ import gov.cdc.utils.{ConsoleProgress, FileUtils} import scala.jdk.CollectionConverters._ import scala.util.matching.Regex -case class RedactInfo(path: String, var rule: String, @transient condition: String, lineNumber: Int) { - @transient var rulemsg = s"Redacted $path with " +case class RedactInfo(path: String, fieldIndex: Int, var rule: String, @transient condition: String, lineNumber: Int) { + @transient var rulemsg = s"Redacted $path" + if ( fieldIndex > 1 ) + rulemsg += s" (repeating value $fieldIndex)" + rulemsg += " with " + if (rule == null || rule.isEmpty) rulemsg += "empty value" else rulemsg += s"value '$rule'" @@ -20,8 +24,9 @@ case class RedactInfo(path: String, var rule: String, @transient condition: St /** * This is a simple De-identifier of HL7 messages where it replaces entire Lines that can potentially have PII data * It uses a comma delimited file to configure which lines need to be replaced and the values to replace with. - * The first column on the config file, should be a regular expression to match the text - * The second column on the config file, is the text to replace the entire line with. + * The first column on the config file, should be a HL7 path to find the information to be redacted + * The second column on the config file, is the text to replace value of the matching path. + * An optional third column on the config file can provide a special condition of whether to redact or not. * * Created - 6/2/17 * Author Marcelo Caldas mcq1@cdc.gov @@ -72,11 +77,12 @@ class DeIdentifier() { val condition = if (rule.length > 2) rule(2) else null val matchLine = HL7StaticParser.getValue(subline, path) //Make sure the path matches something if (matchLine.isDefined && matchLine.get.length > 0) { - if (evalCondition(subline, condition)) { //Redact only if Condition evals to TRUE! + val matchBools = evalCondition(subline, condition) + if (matchBools.reduce(_ || _)) { //Redact only if at least one Condition evaluates to TRUE! replacement match { case FN_REMOVE => { subline = "" - report += RedactInfo(path, replacement, condition, lineNbr + 1) + report += RedactInfo(path, 0, replacement, condition, lineNbr + 1) } case _ => val lineIndexed = HL7StaticParser.retrieveFirstSegmentOf(subline, path.substring(0, 3)) @@ -87,31 +93,34 @@ class DeIdentifier() { val repeats = lineIndexed._2(field.toInt).split("\\~") repeats.zipWithIndex.foreach { case (elem, i) => { - var redacted = false - if (fieldIdx == null || fieldIdx.toInt == i + 1) { - if (comp != null) { - val compArray = elem.split("\\^") - if (compArray.length >= comp.toInt) { - redacted = !compArray(comp.toInt - 1).equals(replacement) - compArray(comp.toInt - 1) = getReplacementValue(replacement, compArray(comp.toInt - 1)) - - } - if (fieldIdx == null || fieldIdx.toInt == i + 1) - repeats(i) = compArray.mkString("^") - - else { - repeats(i) = elem - redacted = !elem.equals(replacement) - } - if (redacted) - report += RedactInfo(path, replacement, condition, lineNbr + 1) - } else { - if (!repeats(i).isEmpty && !replacement.equals(elem)) { - repeats(i) = getReplacementValue(replacement, elem) - report += RedactInfo(path, replacement, condition, lineNbr + 1) + if (elem.nonEmpty) + if ((matchBools.length == 1 && matchBools(0) ) || matchBools(i)) { + var redacted = false + if (fieldIdx == null || fieldIdx.toInt == i + 1) { + if (comp != null) { + val compArray = elem.split("\\^") + if (compArray.length >= comp.toInt && compArray(comp.toInt - 1).nonEmpty) { + redacted = !compArray(comp.toInt - 1).equals(replacement) + compArray(comp.toInt - 1) = getReplacementValue(replacement, compArray(comp.toInt - 1)) + + } + if (fieldIdx == null || fieldIdx.toInt == i + 1) + repeats(i) = compArray.mkString("^") + + else { + repeats(i) = elem + redacted = !elem.equals(replacement) + } + if (redacted) + report += RedactInfo(path, (i +1), replacement, condition, lineNbr + 1) + } else { + if (repeats(i).nonEmpty && !replacement.equals(elem)) { + repeats(i) = getReplacementValue(replacement, elem) + report += RedactInfo(path, (i +1), replacement, condition, lineNbr + 1) + } + } } } - } } lineIndexed._2(field.toInt) = repeats.mkString("~") } @@ -119,7 +128,7 @@ class DeIdentifier() { } else { subline = getReplacementValue(replacement, subline) //The whole segment will be replaced! - report += RedactInfo(path, replacement, condition, lineNbr + 1) + report += RedactInfo(path, 0, replacement, condition, lineNbr + 1) } } } @@ -134,23 +143,29 @@ class DeIdentifier() { } - private def evalCondition(msg: String, condition: String): Boolean = { + private def evalCondition(msg: String, condition: String): Array[Boolean] = { if (condition == null || condition.isEmpty) - return true //empty condition -> Redact! + return Array(true) //empty condition -> Redact! val condParts = condition.split(" ") //get PATHComparatorValue - val msgValue = HL7StaticParser.getFirstValue(msg, condParts(0))//we only support single cardinality for now. - if (msgValue == null || msgValue == None || msgValue.get.isEmpty) - return false //don't redact - return condParts(1).toUpperCase() match { - case "=" => condParts(2).equals( msgValue.get.toUpperCase()) - case "!=" => !condParts(2).equals( msgValue.get.toUpperCase()) - case "IN" => - val values = condParts(2).substring(1, condParts(2).length -1).split(";") - values.contains(msgValue.get.toUpperCase()) - case "!IN" => - val values = condParts(2).substring(1, condParts(2).length -1).split(";") - !values.contains(msgValue.get.toUpperCase()) + val msgValues = HL7StaticParser.getValue(msg, condParts(0), removeEmpty = false)//we only support single cardinality for now. + if (msgValues == null || msgValues.isEmpty || msgValues.get.isEmpty) + return Array(false) //don't redact + //We eval line by line, so the first array will always have one entry. the second array will possibly have repeats + val anyRepeatMatches = msgValues.get(0) + val boolResults = new Array[Boolean](anyRepeatMatches.length) + anyRepeatMatches.zipWithIndex.foreach { case(i, idx) => + boolResults(idx) = condParts(1).toUpperCase() match { + case "=" => condParts(2).equals( i.toUpperCase()) + case "!=" => !condParts(2).equals( i.toUpperCase()) + case "IN" => + val values = condParts(2).substring(1, condParts(2).length -1).split(";") + values.contains(i.toUpperCase()) + case "!IN" => + val values = condParts(2).substring(1, condParts(2).length -1).split(";") + !values.contains(i.toUpperCase()) + } } + boolResults } def deIdentifyFile(messageFileName: String, rulesFileName: String): Unit = { @@ -177,7 +192,7 @@ object DeIdentifier { } object DeIdentifierApp { - val DEFAULT_RULES_FILE = "deid_rules.txt" + val DEFAULT_RULES_FILE = "redaction_rules.txt" def showUsage() = { println("Pass the file you want to translate and the file with rules.") diff --git a/src/main/scala/gov/cdc/hl7/HL7HierarchyParser.scala b/src/main/scala/gov/cdc/hl7/HL7HierarchyParser.scala index fa00b3d..3b973f0 100644 --- a/src/main/scala/gov/cdc/hl7/HL7HierarchyParser.scala +++ b/src/main/scala/gov/cdc/hl7/HL7HierarchyParser.scala @@ -98,10 +98,6 @@ object HL7HierarchyParser { val mapper = new ObjectMapper() mapper.registerModule(DefaultScalaModule) val profileObj:Profile = mapper.readValue(profile, classOf[Profile]) -<<<<<<< HEAD return parseMessageHierarchy(message, profileObj) -======= - return parseMessageHierarchy(message, profileObj) ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d } } diff --git a/src/main/scala/gov/cdc/hl7/HL7ParseUtils.scala b/src/main/scala/gov/cdc/hl7/HL7ParseUtils.scala index d688fbe..4dcf46f 100644 --- a/src/main/scala/gov/cdc/hl7/HL7ParseUtils.scala +++ b/src/main/scala/gov/cdc/hl7/HL7ParseUtils.scala @@ -13,11 +13,7 @@ import scala.util.matching.Regex class HL7ParseUtils(message: String, var profile: Profile = null, val buildHierarchy: Boolean = true) { //If no Profile is passed, we assume no Hierarchy will be used. -<<<<<<< HEAD - var profileName = "PhinGuideProfile.json" -======= var profileName = "DefaultProfile.json" ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d def this(message: String) { this(message, null, false) @@ -139,15 +135,6 @@ class HL7ParseUtils(message: String, var profile: Profile = null, val buildHiera //main Entry - can be called outside code to find values based on path def getValue(path: String, removeEmpty: Boolean = true): Option[Array[Array[String]]] = { -<<<<<<< HEAD - //val EMPTY = new Array[String](0) - path match { - //TODO:: see what to do with children!!! - case CHILDREN_REGEX(parent, child) => { //Tried implementing a full RegEx, but run into a 22 limit of fields. Breaking down into multiple regex then... - getChildrenValues(parent, child, removeEmpty) - } - case _ => HL7StaticParser.getValue(message, path, removeEmpty) -======= if (buildHierarchy) { path match { case CHILDREN_REGEX(parent, child) => { //Tried implementing a full RegEx, but run into a 22 limit of fields. Breaking down into multiple regex then... @@ -156,8 +143,7 @@ class HL7ParseUtils(message: String, var profile: Profile = null, val buildHiera case _ => HL7StaticParser.getValue(message, path, removeEmpty) } } else HL7StaticParser.getValue(message, path, removeEmpty) ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d - } + } //Gets values only from a single segment diff --git a/src/main/scala/gov/cdc/hl7/HL7StaticParser.scala b/src/main/scala/gov/cdc/hl7/HL7StaticParser.scala index 416ab5a..4b503da 100644 --- a/src/main/scala/gov/cdc/hl7/HL7StaticParser.scala +++ b/src/main/scala/gov/cdc/hl7/HL7StaticParser.scala @@ -320,11 +320,8 @@ object HL7StaticParser { fieldArray :+= finalValue } else { for (onefield <- fieldValueSplit.zipWithIndex) { -<<<<<<< HEAD finalValue = onefield._1 -======= - finalValue = onefield ->>>>>>> 11b2a2b225555e8a9ef99bc7f4a42303b34960f1 +// finalValue = onefield if (comp > 0) { val compSplit = finalValue.split(HL7_COMPONENT_SEPARATOR) finalValue = compSplit.lift(comp.toInt - 1).getOrElse("") diff --git a/src/main/scala/gov/cdc/hl7/StructureValidator.scala b/src/main/scala/gov/cdc/hl7/StructureValidator.scala index 9e0bf60..edde4bf 100644 --- a/src/main/scala/gov/cdc/hl7/StructureValidator.scala +++ b/src/main/scala/gov/cdc/hl7/StructureValidator.scala @@ -26,7 +26,7 @@ class StructureValidator(message: String, var profile: Profile, var fieldDefinit if (profile == null) { println("Using Default profile") - val content:String = Source.fromResource("PhinGuideProfile.json").getLines().mkString("\n") + val content:String = Source.fromResource("DefaultProfile.json").getLines().mkString("\n") profile = mapper.readValue(content, classOf[Profile]) } diff --git a/src/test/resources/CELR-config.txt b/src/test/resources/CELR-config.txt new file mode 100644 index 0000000..a8b07ee --- /dev/null +++ b/src/test/resources/CELR-config.txt @@ -0,0 +1,26 @@ +PID-3.1,REDACTED,PID-3.5 !IN (PT;PI;MB;PN;SR;PHC;PH;AN) +PID-5.1,REDACTED +PID-5.2,REDACTED +PID-5.3,REDACTED +PID-5.4,REDACTED +PID-11.1,REDACTED +PID-11.2,REDACTED +PID-13,REDACTED +PID-14,REDACTED +PID-19,REDACTED +NK1-13,REDACTED,NK1-3.5 != EMR +NK1-2,REDACTED +NK1-4.1,REDACTED +NK1-4.2,REDACTED +NK1-5,REDACTED +NK1-6,REDACTED +NK1-26,REDACTED +NK1-30,REDACTED +NK1-31,REDACTED +NK1-32,REDACTED +NK1-33,REDACTED +NK1-37,REDACTED +ZLR-6,REDACTED +ZLR-7,REDACTED +ZLR-8,REDACTED +ZLR-9,REDACTED \ No newline at end of file diff --git a/src/test/resources/HL7_2.5_New HHS Fields1.txt b/src/test/resources/HL7_2.5_New HHS Fields1.txt new file mode 100644 index 0000000..22f7d7c --- /dev/null +++ b/src/test/resources/HL7_2.5_New HHS Fields1.txt @@ -0,0 +1 @@ +MSH|^~\&|Epic Lab^2.16.840.1.113883.3.1575^ISO|RC^2.16.840.1.113883.4.590^ISO|SDEDSS^2.16.840.1.114222.4.1.3661^ISO|SD.DOH^2.16.840.1.114222.4.1.3661^ISO|20200607061729|LABBACKGROUND|ORU^R01^ORU_R01|RaviG_07232733|P|2.5|||||USA||||PHLabReport-NoAck^HL7^2.16.840.1.113883.9.11^ISO PID|1||abc123^^^WDL&52D0391886&CLIA^PI^WDL&52D0391886&CLIA~xyz456^^^WDL&52D0391886&CLIA^PP^WDL&52D0391886&CLIA||John Doe^^^^s~^^^^^^s||19500807|F||B|^^MILWAUKEE^WI^53205^DONOTSEND|||||||117842901|||||||0 ORC|RE|abc123| abc123^WDL^52D0391886^CLIA||||||20200730094810|||1407218175^Hartleben^Elyse^^^^^^Unspecified Identifier||^^^^^414^8057600|||||||Froedtert Hospital-2|9200 W. Wisconsin Ave^^Milwaukee^WI^53226|^^^^^414^8057600|9200 W. Wisconsin Ave^^Milwaukee^WI^53226 OBR|1| abc123|231OBX^WDL^52D0391886^CLIA|^^^3556190^SARS-CoV-2 (2019-nCoV) Nucleic Acid Amplified Test^L|||20200730083800|||||||20200730084100|NP/Throat&NP/Throat|1407218175^Hartleben^Elyse^^^^^^Unspecified Identifier|^^^^^414^8057600||| abc123||20200730094809||GL|F||1^^^20200730082906^^RT~^^^^^RT|||||||||20200730082900 OBX|1|ST|41458-1^SARS-CoV RNA XXX Ql NAA+probe^LN^3556190^Coronavirus COVID-19 Result^L|1|Not Detected||Not Detected||||F|||20200730094809|^WDL NTE|1||Not Detected results do not preclude SARS-CoV-2 infection and should NTE|2||not be used as the sole basis for treatment or patient management NTE|3||decisions. NTE|4 NTE|5||Test performed using Cepheid Xpert Xpress SARS-CoV-2 assay. Xpert NTE|6||Xpress SARS-CoV-2 is a real-time PCR assay intended for the NTE|7||qualitative detection of SARS-CoV-2 (COVID-19) nucleic acid. This NTE|8||assay has Emergency Use Authorization (EUA) from the U.S. Food and NTE|9||Drug Administration for specimens collected from individuals who meet NTE|10||CDC criteria for SARS-CoV-2 testing. Test performance characteristics NTE|11||were determined by the Wisconsin Diagnostic Laboratories NTE|12||Microbiology/Molecular Department. This test is used for clinical NTE|13||purposes and should not be regarded as investigational or for research NTE|14||use. This laboratory is certified under the Clinical Laboratory NTE|15||Improvements Amendments of 1988 (CLIA) as qualified to perform high NTE|16||complexity clinical laboratory testing. OBX|2|ST|95417-2^First test for condition of interest^LN^4500867^First COVID Test?^L|1|No||||||F|||20200730082915|^WDL OBX|3|TX|67098-4^Reason for test or procedure^LN^4500866^Reason for COVID Test?^L|1|Symptomatic as defined by CDC - Continue to select symptoms||||||F|||20200730082915|^WDL OBX|4|ST|75325-1^Symptom^LN^4500877^Symptoms?^L|1|Yes||||||F|||20200730084212|^WDL OBX|5|TX|75325-1^Symptom^LN^4500872^Symptoms^L|1|Cough,Shortness of breath||||||F|||20200730082915|^WDL OBX|6|TX|11368-8^Illness or injury onset date and time^LN^4500876^Date of Symptom Onset^L|1|01/Jun/2020 05:05:00:00||||||F|||20200730082915|^WDL OBX|7|ST|95418-0^Employed in a healthcare setting^LN^4500868^Healthcare worker?^L|1|No||||||F|||20200730082915|^WDL OBX|8|ST|82810-3^Pregnancy status^LN^4500869^Pregnant?^L|1|No||||||F|||20200730082915|^WDL OBX|9|DT|11368-8^SARS-CoV-2 N gene Ct XXX Qn NAA N1^LN||20190506||||||F|||201902281257-0500|||||201904020721-0500||||Public Health Laboratory^D^^^^CLIA&2.16.840.1.113883.19.4.6&ISO^XX^^^05D0897628|3434 Industrial Loop^^Little Rock^AR^72205^USA^B OBX|10|CWE|85658-3^SARS-CoV-2 N gene XXX Ql NAA N2^LN||260373001^patocc^SCT||||||F|||201902281257-0500|||||201904020721-0500||||Public Health Laboratory^D^^^^CLIA&2.16.840.1.113883.19.4.6&ISO^XX^^^05D0897628|3434 Industrial Loop^^Little Rock^AR^72205^USA^B OBX|11|CWE|75617-1^SARS-CoV-2 N gene XXX Ql NAA N2^LN||260373001^pat_res_type^SCT||||||F|||201902281257-0500|||||201904020721-0500||||Public Health Laboratory^D^^^^CLIA&2.16.840.1.113883.19.4.6&ISO^XX^^^05D0897628|3434 Industrial Loop^^Little Rock^AR^72205^USA^B OBX|12|CWE|INV290^SARS-CoV-2 N gene XXX Ql NAA N2^LN||260373001^most_r_t_t^SCT||||||F|||201902281257-0500|||||201904020721-0500||||Public Health Laboratory^D^^^^CLIA&2.16.840.1.113883.19.4.6&ISO^XX^^^05D0897628|3434 Industrial Loop^^Little Rock^AR^72205^USA^B OBX|13|CWE|INV291^SARS-CoV-2 N gene XXX Ql NAA N2^LN||260373001^most_r_t_r^SCT||||||F|||201902281257-0500|||||201904020721-0500||||Public Health Laboratory^D^^^^CLIA&2.16.840.1.113883.19.4.6&ISO^XX^^^05D0897628|3434 Industrial Loop^^Little Rock^AR^72205^USA^B OBX|14|DT|82772-5^SARS-CoV-2 N gene Ct XXX Qn NAA N1^LN||20190507||||||F|||201902281257-0500|||||201904020721-0500||||Public Health Laboratory^D^^^^CLIA&2.16.840.1.113883.19.4.6&ISO^XX^^^05D0897628|3434 Industrial Loop^^Little Rock^AR^72205^USA^B SPM|1|^RGNK072733BXTest07723RG&STARLIMS.AR.STAG&2.16.840.1.114222.4.3.3.2.5.2&ISO||258500002^Nasopharyngeal swab (specimen1)^SCT^^^^^^Text1||||Swab1^Swab1^codesys1|Specimentypefreetext||||||||201902281257-0500|201903011118-0500 \ No newline at end of file diff --git a/src/test/resources/covid19_elr.hl7 b/src/test/resources/covid19_elr.hl7 index 920b506..0e8a3ec 100644 --- a/src/test/resources/covid19_elr.hl7 +++ b/src/test/resources/covid19_elr.hl7 @@ -1,5 +1,5 @@ MSH|^~\&|SendingApp|SendingFac|ReceivingApp|ReceivingFac|20240502120000||ORU^R01|MSG000001|P|2.3| -PID|1|123456789|123456789|Doe^John^Q^Jr.||19700101|M|||2106-3^Caucasian^CDCREC~1002-5^American Indian^CDCREC|123 Main St^^Anytown^CA^12345^USA||(555)555-5555| +PID|1|123456789|123456789^^^^PT~123456789^^^^AA||Doe^John^Q^Jr.~SMITH^JOHN^^^^^S||M|||2106-3^Caucasian^CDCREC~1002-5^American Indian^CDCREC|123 Main St^^Anytown^CA^12345^USA^^^TEST||(555)555-5555|t| OBR|1|123456|COVID-19 PCR^COVID-19 Polymerase Chain Reaction^LN|||20240502080000|20240502110000|||||||||F|||| OBX|1|CE|94500-6^SARS-CoV-2 RNA Pnl Respiratory|1|Detected^Detected^LN|||||F|| OBX|2|ST|94308-4^SARS-CoV-2 RNA^LN||Positive|||F|| diff --git a/src/test/scala/BatchValidatorTest.scala b/src/test/scala/BatchValidatorTest.scala index c8dda2d..532a806 100644 --- a/src/test/scala/BatchValidatorTest.scala +++ b/src/test/scala/BatchValidatorTest.scala @@ -3,11 +3,6 @@ import gov.cdc.hl7.model.Profile import org.scalatest.flatspec.AnyFlatSpec - -<<<<<<< HEAD -======= - ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d class BatchValidatorTest extends AnyFlatSpec { //Happy Path: diff --git a/src/test/scala/DemoRedaction.scala b/src/test/scala/DemoRedaction.scala new file mode 100644 index 0000000..5cf78a8 --- /dev/null +++ b/src/test/scala/DemoRedaction.scala @@ -0,0 +1,27 @@ +import gov.cdc.hl7.HL7StaticParser.NEW_LINE_FEED +import gov.cdc.hl7.{DeIdentifier, RedactInfo} +import org.scalatest.flatspec.AnyFlatSpec + +import java.util +import scala.io.Source + +class DemoRedaction extends AnyFlatSpec { + + "hl7-pet" should "redact message" in { + val d = new DeIdentifier() + val msg = loadFile("covid19_elr.hl7") + val rules = loadFile("redaction_rules.txt").split(NEW_LINE_FEED) + val (redactedMessage, report) = d.deIdentifyMessage(msg, rules) + printReport(report) + println(s"redacted message:\n$redactedMessage") + } + + private def printReport(report: util.List[RedactInfo]): Unit = { + report.forEach( i => println(s"${i.lineNumber}) ${i.path} / ${i.fieldIndex}: ${i.rule}")) + } + + private def loadFile(fileName: String): String = { + Source.fromResource(fileName).getLines().mkString("\n") + } + +} diff --git a/src/test/scala/DemoUseCases.scala b/src/test/scala/DemoUseCases.scala index a75c935..5babc23 100644 --- a/src/test/scala/DemoUseCases.scala +++ b/src/test/scala/DemoUseCases.scala @@ -7,48 +7,49 @@ import org.scalatest.flatspec.AnyFlatSpec import scala.io.Source class DemoUseCases extends AnyFlatSpec { + "static hl7-pet-demo " should "query data" in { - //load mesage from file into String var - val testMsg = Source.fromResource("covid19_elr.hl7").getLines().mkString("\n") + //load message from file into String var + val testMessage = loadFile("covid19_elr.hl7") //query message - val matchfound = HL7StaticParser.getValue(testMsg, "MSH") + val results = HL7StaticParser.getValue(testMessage, "OBR[1] -> OBX") //show results - printResults(matchfound) + printResults(results) } "hierarchical hl7-pet-demo " should "query data" in { - //load mesage from file into String var - val testMsg = Source.fromResource("covid19_elr.hl7").getLines().mkString("\n") + //load message from file into String var + val testMsg = loadFile("covid19_elr.hl7") + //init profile and Hl7 Parser val profile = getProfile("DefaultProfile.json") - var hl7Util = new HL7ParseUtils(testMsg, profile, true) - //declare PATH to query - val PATH = "OBR[2]->OBX" + val hl7Util = new HL7ParseUtils(testMsg, profile, true ) //query message - val matchfound = hl7Util.getValue(PATH) - - //show rresults - printResults(matchfound) + val results = hl7Util.getFirstValue("PID[1]-10[2].2") + println(s"First Value is ${results.get}") + //show results + //printResults(results) } - private def printResults(resultSet: Option[Array[Array[String]]]) = { println(s"results ") if (resultSet.isDefined) { - resultSet.get foreach { - v => v.foreach(f => println(s"\t--> $f")) - } + val flat = resultSet.get.flatten + flat.foreach (v => println(s"\t--> $v")) } println("---") } private def getProfile(fileName: String): Profile = { - val profileFile = Source.fromResource(fileName).getLines().mkString("\n") + val profileFile = loadFile(fileName) val mapper = new ObjectMapper() mapper.registerModule(DefaultScalaModule) mapper.readValue(profileFile, classOf[Profile]) + } + private def loadFile(fileName: String): String = { + Source.fromResource(fileName).getLines().mkString("\n") } } diff --git a/src/test/scala/ExampleApp.scala b/src/test/scala/ExampleApp.scala index c23b641..9059b46 100644 --- a/src/test/scala/ExampleApp.scala +++ b/src/test/scala/ExampleApp.scala @@ -16,7 +16,7 @@ object ExampleApp { def main(args:Array[String]) = { - var filename = args.lift(0).getOrElse("ARLN_GC_DUB.hl7") + var filename = args.lift(0).getOrElse("covid19_elr.hl7") val source = io.Source.fromResource(filename) var allLines = "" for (line <- source.getLines) { diff --git a/src/test/scala/HL7ParserUtilsTest.scala b/src/test/scala/HL7ParserUtilsTest.scala index 7624165..8391032 100644 --- a/src/test/scala/HL7ParserUtilsTest.scala +++ b/src/test/scala/HL7ParserUtilsTest.scala @@ -3,20 +3,10 @@ import com.fasterxml.jackson.module.scala.DefaultScalaModule import gov.cdc.hl7.{BatchValidator, HL7ParseError, HL7ParseUtils} import gov.cdc.hl7.model.Profile import org.scalatest.flatspec.AnyFlatSpec -<<<<<<< HEAD -//import org.scalatest.FlatSpec -======= - ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d - import scala.io.Source -<<<<<<< HEAD class HL7ParserUtilsTest extends AnyFlatSpec { -======= -class HL7ParserUtilsTest extends AnyFlatSpec { ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d private val testMessage = "MSH|^~\\&|MDSS^2.16.840.1.114222.4.3.2.2.3.161.1.1000.1^ISO|MDCH^2.16.840.1.114222.4.1.3660^ISO|PHINCDS^2.16.840.1.114222.4.3.2.10^ISO|PHIN^2.16.840.1.114222^ISO|20150632162510||ORU^R01^ORU_R01|5276074519_20150626162510529|P|2.5.1|||||||||NOTF_ORU_v3.0^PHINProfileID^2.16.840.1.114222.4.10.3^ISO~Generic_MMG_V2.0^PHINMsgMapID^2.16.840.1.114222.4.10.4^ISO~Hepatitis_MMG_V1.0^PHINMsgMapID^2.16.840.1.114222.4.10.4^ISO\r" + "PID|1||5276074529^^^MDCH&2.16.840.1.114222.4.1.3660&ISO||~^^^^^^S||19600101|F||2106-3^Caucasian^CDCREC~1002-5^American Indian^CDCREC|^^ANN ARBOR^26^48105^USA^^^26161|||||||||||2135-2^Hispanic or Latino^CDCREC|||||||20141031\r" + "OBR|1||5276074519^MDCH^2.16.840.1.114222.4.1.3660^ISO|68991-9^Epidemiologic Information^LN|||20150626162510|||||||||||||||20150626162510|||F||||||10110^Hepatitis A^NND\r" + diff --git a/src/test/scala/HL7StaticParserUtilsTest.scala b/src/test/scala/HL7StaticParserUtilsTest.scala index 5ee65f7..2ab645e 100644 --- a/src/test/scala/HL7StaticParserUtilsTest.scala +++ b/src/test/scala/HL7StaticParserUtilsTest.scala @@ -1,31 +1,12 @@ -<<<<<<< HEAD -import gov.cdc.hl7.{HL7ParseError, HL7ParseUtils, HL7StaticParser} -import org.scalatest.flatspec.AnyFlatSpec -======= -import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} +import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule -import gov.cdc.hl7.{BatchValidator, HL7ParseError, HL7ParseUtils, HL7StaticParser} import gov.cdc.hl7.model.Profile +import gov.cdc.hl7.{HL7ParseError, HL7ParseUtils, HL7StaticParser} import org.scalatest.flatspec.AnyFlatSpec -import java.sql.ResultSet ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d -//import org.scalatest.FlatSpec - - -<<<<<<< HEAD +import scala.io.Source class HL7StaticParserUtilsTest extends AnyFlatSpec { -<<<<<<< HEAD private val testMessage = "MSH|^~\\&|MDSS^2.16.840.1.114222.4.3.2.2.3.161.1.1000.1^ISO|MDCH^2.16.840.1.114222.4.1.3660^ISO|PHINCDS^2.16.840.1.114222.4.3.2.10^ISO|PHIN^2.16.840.1.114222^ISO|20150632162510||ORU^R01^ORU_R01|5276074519_20150626162510529|P|2.5.1|||||||||NOTF_ORU_v3.0^PHINProfileID^2.16.840.1.114222.4.10.3^ISO~Generic_MMG_V2.0^PHINMsgMapID^2.16.840.1.114222.4.10.4^ISO~Hepatitis_MMG_V1.0^PHINMsgMapID^2.16.840.1.114222.4.10.4^ISO\r" + -======= - -class HL7StaticParserUtilsTest extends AnyFlatSpec { - private val testMessage = - "MSH|^~\\&|MDSS^2.16.840.1.114222.4.3.2.2.3.161.1.1000.1^ISO|MDCH^2.16.840.1.114222.4.1.3660^ISO|PHINCDS^2.16.840.1.114222.4.3.2.10^ISO|PHIN^2.16.840.1.114222^ISO|20150632162510||ORU^R01^ORU_R01|5276074519_20150626162510529|P|2.5.1|||||||||NOTF_ORU_v3.0^PHINProfileID^2.16.840.1.114222.4.10.3^ISO~Generic_MMG_V2.0^PHINMsgMapID^2.16.840.1.114222.4.10.4^ISO~Hepatitis_MMG_V1.0^PHINMsgMapID^2.16.840.1.114222.4.10.4^ISO\r" + ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d -======= - private val testMessage = "MSH|^~\\&#|MDSS^2.16.840.1.114222.4.3.2.2.3.161.1.1000.1^ISO|MDCH^2.16.840.1.114222.4.1.3660^ISO|PHINCDS^2.16.840.1.114222.4.3.2.10^ISO|PHIN^2.16.840.1.114222^ISO|20150632162510||ORU^R01^ORU_R01|5276074519_20150626162510529|P|2.5.1|||||||||NOTF_ORU_v3.0^PHINProfileID^2.16.840.1.114222.4.10.3^ISO~Generic_MMG_V2.0^PHINMsgMapID^2.16.840.1.114222.4.10.4^ISO~Hepatitis_MMG_V1.0^PHINMsgMapID^2.16.840.1.114222.4.10.4^ISO\r" + ->>>>>>> 11b2a2b225555e8a9ef99bc7f4a42303b34960f1 "PID|1||5276074529^^^MDCH&2.16.840.1.114222.4.1.3660&ISO||~^^^^^^S||19600101|F||2106-3^Caucasian^CDCREC~1002-5^American Indian^CDCREC|^^ANN ARBOR^26^48105^USA^^^26161|||||||||||2135-2^Hispanic or Latino^CDCREC|||||||20141031\r" + "OBR|1||5276074519^MDCH^2.16.840.1.114222.4.1.3660^ISO|68991-9^Epidemiologic Information^LN|||20150626162510|||||||||||||||20150626162510|||F||||||10110^Hepatitis A^NND\r" + "OBX|1|CWE|NOT116^National Reporting Jurisdiction^PHINQUESTION||26^Michigan^FIPS5_2||||||F\n\r" + @@ -232,11 +213,7 @@ class HL7StaticParserUtilsTest extends AnyFlatSpec { println("\n\nRepeats...") assert(HL7StaticParser.getValue(testMessage, "OBR[*]-4[1].1").get.length == 3) -<<<<<<< HEAD - assert(HL7StaticParser.getValue(testMessage, "OBR-4[1]").get.length == 2) -======= assert(HL7StaticParser.getValue(testMessage, "OBR-4[1]").get.length == 3) ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d assert(HL7StaticParser.getValue(testMessage, "PID[1]-5[*]").get.length == 1) diff --git a/src/test/scala/LoadProfileTest.scala b/src/test/scala/LoadProfileTest.scala index 5091bd5..f673be1 100644 --- a/src/test/scala/LoadProfileTest.scala +++ b/src/test/scala/LoadProfileTest.scala @@ -3,14 +3,7 @@ import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.gson.{JsonObject, JsonParser} import gov.cdc.hl7.HL7HierarchyParser import gov.cdc.hl7.model.{Profile, SegmentConfig} -<<<<<<< HEAD - -import org.scalatest.flatspec.AnyFlatSpec -======= import org.scalatest.flatspec.AnyFlatSpec -//import org.scalatest.FlatSpec ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d - import scala.io.Source class LoadProfileTest extends AnyFlatSpec { diff --git a/src/test/scala/RulesValidatorTests.scala b/src/test/scala/RulesValidatorTests.scala index 338ba6e..f6db117 100644 --- a/src/test/scala/RulesValidatorTests.scala +++ b/src/test/scala/RulesValidatorTests.scala @@ -1,14 +1,8 @@ import gov.cdc.hl7.{RulesValidator, ValidationErrors} import gov.cdc.utils.ConsoleProgress -<<<<<<< HEAD import org.scalatest.flatspec.AnyFlatSpec -======= -import org.scalatest.flatspec.AnyFlatSpec -//import org.scalatest.FlatSpec - ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d class RulesValidatorTests extends AnyFlatSpec { "PredicateRules" must "validate" in { diff --git a/src/test/scala/StructureValidatorTest.scala b/src/test/scala/StructureValidatorTest.scala index dc398d5..9be2589 100644 --- a/src/test/scala/StructureValidatorTest.scala +++ b/src/test/scala/StructureValidatorTest.scala @@ -2,14 +2,7 @@ import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import gov.cdc.hl7.{BatchValidator, StructureValidator, ValidationErrors} import gov.cdc.hl7.model.Profile -<<<<<<< HEAD - -import org.scalatest.flatspec.AnyFlatSpec -======= import org.scalatest.flatspec.AnyFlatSpec -//import org.scalatest.FlatSpec ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d - import scala.io.Source /** @@ -20,7 +13,7 @@ import scala.io.Source */ class StructureValidatorTest extends AnyFlatSpec { "SingleBatchedMessage" must "pass validation" in { - val errors = processHappyPathMessage("FDD_CAMP_TC01_ADD.txt") + val errors = processHappyPathMessage("covid19_elr.hl7") // assert(errors.totalErrors == 0) // assert(errors.totalWarnings == 0) } @@ -31,37 +24,37 @@ class StructureValidatorTest extends AnyFlatSpec { } - "Strcuture Validation" must "throw errors on missing MSH" in { + "Structure Validation" must "throw errors on missing MSH" in { val errors = processBatchValidation("FileBatchMissingMSH.hl7") assert(errors.totalErrors == 1) } - "Strcuture Validation" must "throw errors on extra FHS fields" in { + "Structure Validation" must "throw errors on extra FHS fields" in { val errors = processBatchValidation("FileBatchExtraFHSFields.hl7") assert(errors.totalErrors == 2) } - "Strcuture Validation" must "throw errors on invalid BTS count" in { + "Structure Validation" must "throw errors on invalid BTS count" in { val errors = processBatchValidation("FileBatchInvalidFTSCount.hl7") assert(errors.totalErrors == 1) } - "Strcuture Validation" must "throw errors on Repeat BHS" in { + "Structure Validation" must "throw errors on Repeat BHS" in { val errors = processBatchValidation("FileBatchInvalidRepeatBHS3.hl7") assert(errors.totalErrors == 1) } - "Strcuture Validation" must "throw errors Multiple BHS" in { + "Structure Validation" must "throw errors Multiple BHS" in { val errors = processBatchValidation("FileBatchMultipleBHS.hl7") assert(errors.totalErrors == 3) } - "Strcuture Validation" must "throw errors Multiple BTS" in { + "Structure Validation" must "throw errors Multiple BTS" in { val errors = processBatchValidation("FileBatchMultipleBTS.hl7") assert(errors.totalErrors == 1) } - "Strcuture Validation" must "throw errors Multiple FHS" in { + "Structure Validation" must "throw errors Multiple FHS" in { val errors = processBatchValidation("FileBatchMultipleFHS.hl7") //assert(errors.totalErrors == 2) val found = errors.entries.exists{ it => it.path == "FHS"} diff --git a/src/test/scala/TestBugs.scala b/src/test/scala/TestBugs.scala index 32e701f..ea47f45 100644 --- a/src/test/scala/TestBugs.scala +++ b/src/test/scala/TestBugs.scala @@ -1,12 +1,5 @@ import gov.cdc.hl7.HL7ParseUtils -<<<<<<< HEAD - -import org.scalatest.flatspec.AnyFlatSpec -======= import org.scalatest.flatspec.AnyFlatSpec -//import org.scalatest.FlatSpec ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d - import scala.io.Source class TestBugs extends AnyFlatSpec { diff --git a/src/test/scala/TestDeidentifer.scala b/src/test/scala/TestDeidentifer.scala index 21b779b..c4044cb 100644 --- a/src/test/scala/TestDeidentifer.scala +++ b/src/test/scala/TestDeidentifer.scala @@ -1,22 +1,25 @@ //import open.HL7PET.tools.HL7StaticParser.{NEW_LINE_FEED, PATH_REGEX} -import gov.cdc.hl7.DeIdentifier +import gov.cdc.hl7.{DeIdentifier, RedactInfo} import gov.cdc.hl7.HL7StaticParser.NEW_LINE_FEED import gov.cdc.utils.FileUtils import org.scalatest.flatspec.AnyFlatSpec + +import java.util +import scala.io.Source //import org.scalatest.FlatSpec class TestDeidentifer extends AnyFlatSpec { "DeIdentifier" should "clean data" in { val d = new DeIdentifier() - d.deIdentifyFile( "src/test/resources/ORU_SampleOne.hl7", "src/main/resources/deid_rules.txt") + d.deIdentifyFile( "src/test/resources/ORU_SampleOne.hl7", "src/main/resources/redaction_rules.txt") } "Deidentifier" should "generate report" in { val d = new DeIdentifier() val msg = FileUtils.readFile("src/test/resources/ORU_SampleOne.hl7") - val rules = FileUtils.readFile("src/main/resources/deid_rules.txt").split(NEW_LINE_FEED) + val rules = FileUtils.readFile("src/main/resources/redaction_rules.txt").split(NEW_LINE_FEED) val (redactedMessage, report) = d.deIdentifyMessage(msg, rules) println(report) println("\n\n\n") @@ -36,4 +39,38 @@ class TestDeidentifer extends AnyFlatSpec { println(testLine.substring(testLine.indexOf("|",initIndex+2), testLine.length)) } + + "hl7-pet" should "redact message" in { + val d = new DeIdentifier() + val msg = loadFile("covid19_elr.hl7") + val rules = loadFile("redaction_rules.txt").split(NEW_LINE_FEED) + val (redactedMessage, report) = d.deIdentifyMessage(msg, rules) + printReport(report) + println(s"redacted message:\n$redactedMessage") + } + + "hl7-pet" should "redact celr message" in { + val d = new DeIdentifier() + val msg = loadFile("HL7_2.5_New HHS Fields1.txt") + val rules = loadFile("CELR-config.txt").split(NEW_LINE_FEED) + val (redactedMessage, report) = d.deIdentifyMessage(msg, rules) + printReport(report) + println(s"redacted message:\n$redactedMessage") + } + + private def printReport(report: util.List[RedactInfo]): Unit = { + report.forEach( i => println(s"${i.lineNumber}) ${i.path}: ${i.rule}")) + } + + private def loadFile(fileName: String): String = { + Source.fromResource(fileName).getLines().mkString("\n") + } + + "array get" should "get values" in { + val r: Option[Array[Array[Boolean]]] = Some(Array(Array(false, false))) + + val v1 = r.get(0).reduce(_ || _) +// val v1 = a2.reduce(_ || _) + println(v1) + } } diff --git a/src/test/scala/TestRegExp.scala b/src/test/scala/TestRegExp.scala index b0c4d7b..7a3074d 100644 --- a/src/test/scala/TestRegExp.scala +++ b/src/test/scala/TestRegExp.scala @@ -1,11 +1,4 @@ -<<<<<<< HEAD import org.scalatest.flatspec.AnyFlatSpec -======= -import gov.cdc.hl7.HL7StaticParser -import org.scalatest.flatspec.AnyFlatSpec -//import org.scalatest.FlatSpec ->>>>>>> 78c370b53da9a444962a2178ee2c33f169faea8d - import scala.collection.immutable.ListMap import scala.util.Try