Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Don't pass field value to ValidatorReport if validation fails #892

Merged
merged 2 commits into from
May 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@ import cats.data.NonEmptyList
import com.snowplowanalytics.snowplow.badrows.FailureDetails

import com.snowplowanalytics.iglu.client.ClientError.ValidationError
import com.snowplowanalytics.iglu.client.validator.{ValidatorError, ValidatorReport}
import com.snowplowanalytics.iglu.client.validator.ValidatorError

import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer}

import com.snowplowanalytics.snowplow.enrich.common.enrichments.AtomicFields.LimitedAtomicField
import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicError

final case class AtomicFields(value: List[LimitedAtomicField])

Expand Down Expand Up @@ -133,8 +134,8 @@ object AtomicFields {
AtomicFields(withLimits)
}

def errorsToSchemaViolation(errors: NonEmptyList[ValidatorReport]): FailureDetails.SchemaViolation = {
val clientError = ValidationError(ValidatorError.InvalidData(errors), None)
def errorsToSchemaViolation(errors: NonEmptyList[AtomicError]): FailureDetails.SchemaViolation = {
val clientError = ValidationError(ValidatorError.InvalidData(errors.map(_.toValidatorReport)), None)

FailureDetails.SchemaViolation.IgluError(
AtomicFields.atomicSchema,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,11 @@ import cats.data.NonEmptyList

import cats.implicits._

import com.snowplowanalytics.iglu.client.validator.ValidatorReport

import com.snowplowanalytics.snowplow.badrows.FailureDetails

import com.snowplowanalytics.snowplow.enrich.common.enrichments.AtomicFields.LimitedAtomicField
import com.snowplowanalytics.snowplow.enrich.common.outputs.EnrichedEvent
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicError

/**
* Atomic fields length validation inspired by
Expand Down Expand Up @@ -53,28 +52,28 @@ object AtomicFieldsLengthValidator {
private def validateField(
event: EnrichedEvent,
atomicField: LimitedAtomicField
): Either[ValidatorReport, Unit] = {
): Either[AtomicError.FieldLengthError, Unit] = {
val actualValue = atomicField.value.enrichedValueExtractor(event)
if (actualValue != null && actualValue.length > atomicField.limit)
ValidatorReport(
s"Field is longer than maximum allowed size ${atomicField.limit}",
Some(atomicField.value.name),
Nil,
Some(actualValue)
).asLeft
AtomicError
.FieldLengthError(
s"Field is longer than maximum allowed size ${atomicField.limit}",
atomicField.value.name
)
.asLeft
else
Right(())
}

private def handleAcceptableErrors[F[_]: Monad](
invalidCount: F[Unit],
event: EnrichedEvent,
errors: NonEmptyList[ValidatorReport]
errors: NonEmptyList[AtomicError.FieldLengthError]
): F[Unit] =
invalidCount *>
Monad[F].pure(
logger.debug(
s"Enriched event not valid against atomic schema. Event id: ${event.event_id}. Invalid fields: ${errors.map(_.path).toList.flatten.mkString(", ")}"
s"Enriched event not valid against atomic schema. Event id: ${event.event_id}. Invalid fields: ${errors.map(_.field).toList.flatten.mkString(", ")}"
)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import java.lang.{Integer => JInteger}

import cats.syntax.either._

import com.snowplowanalytics.iglu.client.validator.ValidatorReport
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicError

/**
* Contains enrichments related to the client - where the client is the software which is using the
Expand All @@ -36,7 +36,7 @@ object ClientEnrichments {
* @param res The packed string holding the screen dimensions
* @return the ResolutionTuple or an error message, boxed in a Scalaz Validation
*/
val extractViewDimensions: (String, String) => Either[ValidatorReport, (JInteger, JInteger)] =
val extractViewDimensions: (String, String) => Either[AtomicError.ParseError, (JInteger, JInteger)] =
(field, res) =>
(res match {
case ResRegex(width, height) =>
Expand All @@ -45,7 +45,7 @@ object ClientEnrichments {
.leftMap(_ => "Could not be converted to java.lang.Integer s")
case _ => s"Does not conform to regex ${ResRegex.toString}".asLeft
}).leftMap { msg =>
ValidatorReport(msg, Some(field), Nil, Option(res))
AtomicError.ParseError(msg, field)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import com.snowplowanalytics.refererparser._

import com.snowplowanalytics.iglu.client.IgluCirceClient
import com.snowplowanalytics.iglu.client.resolver.registries.RegistryLookup
import com.snowplowanalytics.iglu.client.validator.ValidatorReport
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicError

import com.snowplowanalytics.iglu.core.SelfDescribingData
import com.snowplowanalytics.iglu.core.circe.implicits._
Expand Down Expand Up @@ -338,7 +338,7 @@ object EnrichmentManager {
.toEither
}

def setCollectorTstamp(event: EnrichedEvent, timestamp: Option[DateTime]): Either[ValidatorReport, Unit] =
def setCollectorTstamp(event: EnrichedEvent, timestamp: Option[DateTime]): Either[AtomicError.ParseError, Unit] =
EE.formatCollectorTstamp(timestamp).map { t =>
event.collector_tstamp = t
().asRight
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ import cats.syntax.option._
import org.joda.time.{DateTime, DateTimeZone, Period}
import org.joda.time.format.DateTimeFormat

import com.snowplowanalytics.iglu.client.validator.ValidatorReport
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicError

import com.snowplowanalytics.snowplow.badrows._

Expand Down Expand Up @@ -49,14 +49,14 @@ object EventEnrichments {
* @param Optional collectorTstamp
* @return Validation boxing the result of making the timestamp Redshift-compatible
*/
def formatCollectorTstamp(collectorTstamp: Option[DateTime]): Either[ValidatorReport, String] =
def formatCollectorTstamp(collectorTstamp: Option[DateTime]): Either[AtomicError.ParseError, String] =
collectorTstamp match {
case None => ValidatorReport("Field not set", Some("collector_tstamp"), Nil, None).asLeft
case None => AtomicError.ParseError("Field not set", "collector_tstamp").asLeft
case Some(t) =>
val formattedTimestamp = toTimestamp(t)
if (formattedTimestamp.startsWith("-") || t.getYear > 9999 || t.getYear < 0) {
val msg = s"Formatted as $formattedTimestamp is not Redshift-compatible"
ValidatorReport(msg, Some("collector_tstamp"), Nil, Some(t.toString)).asLeft
AtomicError.ParseError(msg, "collector_tstamp").asLeft
} else
formattedTimestamp.asRight
}
Expand Down Expand Up @@ -113,28 +113,28 @@ object EventEnrichments {
* @param tstamp The timestamp as stored in the Tracker Protocol
* @return a Tuple of two Strings (date and time), or an error message if the format was invalid
*/
val extractTimestamp: (String, String) => Either[ValidatorReport, String] =
val extractTimestamp: (String, String) => Either[AtomicError.ParseError, String] =
(field, tstamp) =>
try {
val dt = new DateTime(tstamp.toLong)
val timestampString = toTimestamp(dt)
if (timestampString.startsWith("-") || dt.getYear > 9999 || dt.getYear < 0)
ValidatorReport(
s"Formatting as $timestampString is not Redshift-compatible",
Some(field),
Nil,
Option(tstamp)
).asLeft
AtomicError
.ParseError(
s"Formatting as $timestampString is not Redshift-compatible",
field
)
.asLeft
else
timestampString.asRight
} catch {
case _: NumberFormatException =>
ValidatorReport(
"Not in the expected format: ms since epoch",
Some(field),
Nil,
Option(tstamp)
).asLeft
AtomicError
.ParseError(
"Not in the expected format: ms since epoch",
field
)
.asLeft
}

/**
Expand All @@ -144,7 +144,7 @@ object EventEnrichments {
* @param eventCode The event code
* @return the event type, or an error message if not recognised, boxed in a Scalaz Validation
*/
val extractEventType: (String, String) => Either[ValidatorReport, String] =
val extractEventType: (String, String) => Either[AtomicError.ParseError, String] =
(field, code) =>
code match {
case "se" => "struct".asRight
Expand All @@ -157,7 +157,7 @@ object EventEnrichments {
case "pp" => "page_ping".asRight
case _ =>
val msg = "Not a valid event type"
ValidatorReport(msg, Some(field), Nil, Option(code)).asLeft
AtomicError.ParseError(msg, field).asLeft
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,10 @@ import io.circe._

import com.snowplowanalytics.snowplow.badrows.Processor

import com.snowplowanalytics.iglu.client.validator.ValidatorReport

import com.snowplowanalytics.iglu.core.{SchemaKey, SchemaVer, SelfDescribingData}
import com.snowplowanalytics.iglu.core.circe.implicits._

import com.snowplowanalytics.snowplow.enrich.common.utils.{ConversionUtils => CU}
import com.snowplowanalytics.snowplow.enrich.common.utils.{ConversionUtils => CU, AtomicError}

/** Miscellaneous enrichments which don't fit into one of the other modules. */
object MiscEnrichments {
Expand All @@ -46,7 +44,7 @@ object MiscEnrichments {
* @param platform The code for the platform generating this event.
* @return a Scalaz ValidatedString.
*/
val extractPlatform: (String, String) => Either[ValidatorReport, String] =
val extractPlatform: (String, String) => Either[AtomicError.ParseError, String] =
(field, platform) =>
platform match {
case "web" => "web".asRight // Web, including Mobile Web
Expand All @@ -60,11 +58,11 @@ object MiscEnrichments {
case "headset" => "headset".asRight // AR/VR Headset
case _ =>
val msg = "Not a valid platform"
ValidatorReport(msg, Some(field), Nil, Option(platform)).asLeft
AtomicError.ParseError(msg, field).asLeft
}

/** Make a String TSV safe */
val toTsvSafe: (String, String) => Either[ValidatorReport, String] =
val toTsvSafe: (String, String) => Either[AtomicError.ParseError, String] =
(_, value) => CU.makeTsvSafe(value).asRight

/**
Expand All @@ -73,7 +71,7 @@ object MiscEnrichments {
* Here we retrieve the first one as it is supposed to be the client one, c.f.
* https://en.m.wikipedia.org/wiki/X-Forwarded-For#Format
*/
val extractIp: (String, String) => Either[ValidatorReport, String] =
val extractIp: (String, String) => Either[AtomicError.ParseError, String] =
(_, value) => {
val lastIp = Option(value).map(_.split("[,|, ]").head).orNull
CU.makeTsvSafe(lastIp).asRight
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ package com.snowplowanalytics.snowplow.enrich.common.enrichments
import cats.implicits._
import cats.data.ValidatedNel

import com.snowplowanalytics.iglu.client.validator.ValidatorReport
import com.snowplowanalytics.snowplow.enrich.common.utils.AtomicError

import com.snowplowanalytics.snowplow.enrich.common.enrichments.{EventEnrichments => EE}
import com.snowplowanalytics.snowplow.enrich.common.enrichments.{MiscEnrichments => ME}
Expand All @@ -31,7 +31,7 @@ object Transform {
* to "user_ipaddress" in the enriched event
* @param enriched /!\ MUTABLE enriched event, mutated IN-PLACE /!\
*/
private[enrichments] def transform(raw: RawEvent, enriched: EnrichedEvent): ValidatedNel[ValidatorReport, Unit] = {
private[enrichments] def transform(raw: RawEvent, enriched: EnrichedEvent): ValidatedNel[AtomicError.ParseError, Unit] = {
val sourceMap: SourceMap = raw.parameters.collect { case (k, Some(v)) => (k, v) }
val firstPassTransform = enriched.transform(sourceMap, firstPassTransformMap)
val secondPassTransform = enriched.transform(sourceMap, secondPassTransformMap)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,8 @@ object CrossNavigationEnrichment extends ParseableEnrichment {
EE.extractTimestamp("sp_dtm", s)
.leftMap { error =>
val f = FailureDetails.EnrichmentFailureMessage.InputData(
error.path.getOrElse(""),
error.keyword,
error.field,
s.some,
error.message
)
FailureDetails.EnrichmentFailure(None, f)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Copyright (c) 2022-present Snowplow Analytics Ltd.
* All rights reserved.
*
* This software is made available by Snowplow Analytics, Ltd.,
* under the terms of the Snowplow Limited Use License Agreement, Version 1.0
* located at https://docs.snowplow.io/limited-use-license-1.0
* BY INSTALLING, DOWNLOADING, ACCESSING, USING OR DISTRIBUTING ANY PORTION
* OF THE SOFTWARE, YOU AGREE TO THE TERMS OF SUCH LICENSE AGREEMENT.
*/
package com.snowplowanalytics.snowplow.enrich.common.utils

import com.snowplowanalytics.iglu.client.validator.ValidatorReport

sealed trait AtomicError {
def message: String
def field: String
def repr: String
def toValidatorReport: ValidatorReport =
ValidatorReport(message, Some(field), Nil, Some(repr))
}

object AtomicError {

case class ParseError(
message: String,
field: String
) extends AtomicError {
override def repr: String = "atomic_field_parse_error"
}

case class FieldLengthError(
message: String,
field: String
) extends AtomicError {
override def repr: String = "atomic_field_length_exceeded"
}
}
Loading
Loading