Skip to content

Commit

Permalink
configuration, build, and deployment scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
Ram Mohan committed Jul 8, 2019
1 parent 34c438e commit 14568cd
Show file tree
Hide file tree
Showing 9 changed files with 687 additions and 0 deletions.
15 changes: 15 additions & 0 deletions appspec.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
version: 0.0
os: linux
files:
- source: collectorApp
destination: collectorApp
hooks:
BeforeInstall:
- location: collectorApp/scripts/kill_previous_one.sh
timeout: 40
ApplicationStart:
- location: collectorApp/scripts/start_collector.sh
timeout: 40
ValidateService:
- location: collectorApp/scripts/health_check.sh
timeout: 70
52 changes: 52 additions & 0 deletions build.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash


WORKDIR="$(dirname "$0")"
JAR_FILE="2-collectors/scala-stream-collector/kinesis/target/scala-2.11/snowplow-stream-collector-kinesis-0.15.0.jar"

cd "$WORKDIR"
mkdir -p target/common

# TODO: include ivy-cache

# change into scala-stream-collector directory
cd 2-collectors/scala-stream-collector

# build jar
if [ -z "$1" ]
then
sbt "project kinesis" assembly
else
sbt -Dsbt.ivy.home=$1 "project kinesis" assembly
fi

cd ../..
cp -R $JAR_FILE target/common/snowplow-stream-collector.jar
echo "jar copied to target/common"

cp -r scripts target/common/

mkdir -p target/us/collectorApp/config
mkdir -p target/sit/collectorApp/config

cp -R target/common/* target/us/collectorApp/
# Using the tmp direcotry creation so that it works on MacOs and Linux
sed -e 's/collector-THISWILLCHANGE-stdout.log/collector-usprod-stdout.log/g' target/us/collectorApp/scripts/start_collector.sh >tmp_1.sh
mv tmp_1.sh target/us/collectorApp/scripts/start_collector.sh
cp config/collector-us.conf target/us/collectorApp/config/collector.conf
cp appspec.yml target/us/
cp deploy_aws_code_deploy_revision.sh target/us/
cd target/us
tar -cvf ../collector-us.zip *
cd ../..

cp -R target/common/* target/sit/collectorApp/
# Using the tmp direcotry creation so that it works on MacOs and Linux
sed -e 's/collector-THISWILLCHANGE-stdout.log/collector-ausit-stdout.log/g' target/sit/collectorApp/scripts/start_collector.sh >tmp_1.sh
mv tmp_1.sh target/sit/collectorApp/scripts/start_collector.sh
cp config/collector-sit.conf target/sit/collectorApp/config/collector.conf
cp appspec.yml target/sit/
cp deploy_aws_code_deploy_revision.sh target/sit/
cd target/sit
tar -cvf ../collector-sit.zip *
cd ../..
163 changes: 163 additions & 0 deletions config/collector-sit.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
# Copyright (c) 2013-2017 Snowplow Analytics Ltd. All rights reserved.
#
# This program is licensed to you under the Apache License Version 2.0, and
# you may not use this file except in compliance with the Apache License
# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at
# http://www.apache.org/licenses/LICENSE-2.0.
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the Apache License Version 2.0 is distributed on an "AS
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the Apache License Version 2.0 for the specific language
# governing permissions and limitations there under.

# This file (application.conf.example) contains a template with
# configuration options for the Scala Stream Collector.
#
# To use, copy this to 'application.conf' and modify the configuration options.

# 'collector' contains configuration options for the main Scala collector.
collector {
# The collector runs as a web service specified on the following interface and port.
interface = "0.0.0.0"
port = 5000

# Configure the P3P policy header.
p3p {
policyRef = "/w3c/p3p.xml"
CP = "NOI DSP COR NID PSA OUR IND COM NAV STA"
}

crossDomain {
enabled = false
domain = "*"
secure = true
}

# The collector returns a cookie to clients for user identification
# with the following domain and expiration.
cookie {
enabled = true
expiration = "365 days"
# Network cookie name
name = sp
# The domain is optional and will make the cookie accessible to other
# applications on the domain. Comment out this line to tie cookies to
# the collector's full domain
domain = ".newscgp.com"
}

# When enabled and the cookie specified above is missing, performs a redirect to itself to check
# if third-party cookies are blocked using the specified name. If they are indeed blocked,
# fallbackNetworkId is used instead of generating a new random one.
cookieBounce {
enabled = false
# The name of the request parameter which will be used on redirects checking that third-party
# cookies work.
name = "n3pc"
# Network user id to fallback to when third-party cookies are blocked.
fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000"
# Optionally, specify the name of the header containing the originating protocol for use in the
# bounce redirect location. Use this if behind a load balancer that performs SSL termination.
# The value of this header must be http or https. Example, if behind an AWS Classic ELB.
forwardedProtocolHeader = "X-Forwarded-Proto"
}

# When enabled, the redirect url passed via the `u` query parameter is scanned for a placeholder
# token. All instances of that token are replaced withe the network ID. If the placeholder isn't
# specified, the default value is `${SP_NUID}`.
redirectMacro {
enabled = false
# Optional custom placeholder token (defaults to the literal `${SP_NUID}`)
placeholder = "[TOKEN]"
}

streams {
# Events which have successfully been collected will be stored in the good stream/topic
good = "ncg-sit-raw-good"

# Events that are too big (w.r.t Kinesis 1MB limit) will be stored in the bad stream/topic
bad = "ncg-sit-raw-bad"

# Whether to use the incoming event's ip as the partition key for the good stream/topic
# Note: Nsq does not make use of partition key.
useIpAddressAsPartitionKey = false

# Enable the chosen sink by uncommenting the appropriate configuration
sink {
# Choose between kinesis, kafka, nsq, or stdout.
# To use stdout, comment or remove everything in the "collector.streams.sink" section except
# "enabled" which should be set to "stdout".
enabled = kinesis

# Region where the streams are located
region = "ap-southeast-2"

# Thread pool size for Kinesis API requests
threadPoolSize = 30

# The following are used to authenticate for the Amazon Kinesis sink.
# If both are set to 'default', the default provider chain is used
# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html)
# If both are set to 'iam', use AWS IAM Roles to provision credentials.
# If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
aws {
accessKey = iam
secretKey = iam
}

# Minimum and maximum backoff periods
backoffPolicy {
minBackoff = 3000 # 3 seconds
maxBackoff = 600000 # 5 minutes
}

# Or Kafka
#brokers = "{{kafkaBrokers}}"
## Number of retries to perform before giving up on sending a record
#retries = 0

# Or NSQ
## Host name for nsqd
#host = "{{nsqHost}}"
## TCP port for nsqd, 4150 by default
#port = {{nsqdPort}}
}

# Incoming events are stored in a buffer before being sent to Kinesis/Kafka.
# Note: Buffering is not supported by NSQ.
# The buffer is emptied whenever:
# - the number of stored records reaches record-limit or
# - the combined size of the stored records reaches byte-limit or
# - the time in milliseconds since the buffer was last emptied reaches time-limit
buffer {
byteLimit = 400000
recordLimit = 500
timeLimit = 5000
}
}
}

# Akka has a variety of possible configuration options defined at
# http://doc.akka.io/docs/akka/current/scala/general/configuration.html
akka {
loglevel = OFF # 'OFF' for no logging, 'DEBUG' for all logging.
loggers = ["akka.event.slf4j.Slf4jLogger"]

# akka-http is the server the Stream collector uses and has configurable options defined at
# http://doc.akka.io/docs/akka-http/current/scala/http/configuration.html
http.server {
# To obtain the hostname in the collector, the 'remote-address' header
# should be set. By default, this is disabled, and enabling it
# adds the 'Remote-Address' header to every request automatically.
remote-address-header = on

raw-request-uri-header = on

# Define the maximum request length (the default is 2048)
parsing {
max-uri-length = 32768
uri-parsing-mode = relaxed
}
}
}
145 changes: 145 additions & 0 deletions config/collector-stdout.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
# This conf was created to use in local dev setup, The output of the collector goes to stdout.
collector {
# The collector runs as a web service specified on the following interface and port.
interface = "0.0.0.0"
port = 5000

# Configure the P3P policy header.
p3p {
policyRef = "/w3c/p3p.xml"
CP = "NOI DSP COR NID PSA OUR IND COM NAV STA"
}

crossDomain {
enabled = false
domain = "*"
secure = true
}

# The collector returns a cookie to clients for user identification
# with the following domain and expiration.
cookie {
enabled = true
expiration = "365 days"
# Network cookie name
name = sp
# The domain is optional and will make the cookie accessible to other
# applications on the domain. Comment out this line to tie cookies to
# the collector's full domain
domain = ".newscgp.com"
}

# When enabled and the cookie specified above is missing, performs a redirect to itself to check
# if third-party cookies are blocked using the specified name. If they are indeed blocked,
# fallbackNetworkId is used instead of generating a new random one.
cookieBounce {
enabled = false
# The name of the request parameter which will be used on redirects checking that third-party
# cookies work.
name = "n3pc"
# Network user id to fallback to when third-party cookies are blocked.
fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000"
# Optionally, specify the name of the header containing the originating protocol for use in the
# bounce redirect location. Use this if behind a load balancer that performs SSL termination.
# The value of this header must be http or https. Example, if behind an AWS Classic ELB.
forwardedProtocolHeader = "X-Forwarded-Proto"
}

# When enabled, the redirect url passed via the `u` query parameter is scanned for a placeholder
# token. All instances of that token are replaced withe the network ID. If the placeholder isn't
# specified, the default value is `${SP_NUID}`.
redirectMacro {
enabled = false
# Optional custom placeholder token (defaults to the literal `${SP_NUID}`)
placeholder = "[TOKEN]"
}

streams {
# Events which have successfully been collected will be stored in the good stream/topic
good = "ncg-uat-raw-good"

# Events that are too big (w.r.t Kinesis 1MB limit) will be stored in the bad stream/topic
bad = "ncg-uat-raw-bad"

# Whether to use the incoming event's ip as the partition key for the good stream/topic
# Note: Nsq does not make use of partition key.
useIpAddressAsPartitionKey = false

# Enable the chosen sink by uncommenting the appropriate configuration
sink {
# Choose between kinesis, kafka, nsq, or stdout.
# To use stdout, comment or remove everything in the "collector.streams.sink" section except
# "enabled" which should be set to "stdout".
enabled = "stdout"

# Region where the streams are located
# region = "ap-southeast-2"

# Thread pool size for Kinesis API requests
# threadPoolSize = 30

# The following are used to authenticate for the Amazon Kinesis sink.
# If both are set to 'default', the default provider chain is used
# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html)
# If both are set to 'iam', use AWS IAM Roles to provision credentials.
# If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
# aws {
# accessKey = iam
# secretKey = iam
# }

# Minimum and maximum backoff periods
# backoffPolicy {
# minBackoff = 3000 # 3 seconds
# maxBackoff = 600000 # 5 minutes
# }

# Or Kafka
#brokers = "{{kafkaBrokers}}"
## Number of retries to perform before giving up on sending a record
#retries = 0

# Or NSQ
## Host name for nsqd
#host = "{{nsqHost}}"
## TCP port for nsqd, 4150 by default
#port = {{nsqdPort}}
}

# Incoming events are stored in a buffer before being sent to Kinesis/Kafka.
# Note: Buffering is not supported by NSQ.
# The buffer is emptied whenever:
# - the number of stored records reaches record-limit or
# - the combined size of the stored records reaches byte-limit or
# - the time in milliseconds since the buffer was last emptied reaches time-limit
buffer {
byteLimit = 400000
recordLimit = 500
timeLimit = 5000
}
}
}

# Akka has a variety of possible configuration options defined at
# http://doc.akka.io/docs/akka/current/scala/general/configuration.html
akka {
loglevel = OFF # 'OFF' for no logging, 'DEBUG' for all logging.
loggers = ["akka.event.slf4j.Slf4jLogger"]

# akka-http is the server the Stream collector uses and has configurable options defined at
# http://doc.akka.io/docs/akka-http/current/scala/http/configuration.html
http.server {
# To obtain the hostname in the collector, the 'remote-address' header
# should be set. By default, this is disabled, and enabling it
# adds the 'Remote-Address' header to every request automatically.
remote-address-header = on

raw-request-uri-header = on

# Define the maximum request length (the default is 2048)
parsing {
max-uri-length = 32768
uri-parsing-mode = relaxed
}
}
}
Loading

0 comments on commit 14568cd

Please sign in to comment.