forked from snowplow/snowplow
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
configuration, build, and deployment scripts
- Loading branch information
Ram Mohan
committed
Jul 8, 2019
1 parent
34c438e
commit 14568cd
Showing
9 changed files
with
687 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
version: 0.0 | ||
os: linux | ||
files: | ||
- source: collectorApp | ||
destination: collectorApp | ||
hooks: | ||
BeforeInstall: | ||
- location: collectorApp/scripts/kill_previous_one.sh | ||
timeout: 40 | ||
ApplicationStart: | ||
- location: collectorApp/scripts/start_collector.sh | ||
timeout: 40 | ||
ValidateService: | ||
- location: collectorApp/scripts/health_check.sh | ||
timeout: 70 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
#!/bin/bash | ||
|
||
|
||
WORKDIR="$(dirname "$0")" | ||
JAR_FILE="2-collectors/scala-stream-collector/kinesis/target/scala-2.11/snowplow-stream-collector-kinesis-0.15.0.jar" | ||
|
||
cd "$WORKDIR" | ||
mkdir -p target/common | ||
|
||
# TODO: include ivy-cache | ||
|
||
# change into scala-stream-collector directory | ||
cd 2-collectors/scala-stream-collector | ||
|
||
# build jar | ||
if [ -z "$1" ] | ||
then | ||
sbt "project kinesis" assembly | ||
else | ||
sbt -Dsbt.ivy.home=$1 "project kinesis" assembly | ||
fi | ||
|
||
cd ../.. | ||
cp -R $JAR_FILE target/common/snowplow-stream-collector.jar | ||
echo "jar copied to target/common" | ||
|
||
cp -r scripts target/common/ | ||
|
||
mkdir -p target/us/collectorApp/config | ||
mkdir -p target/sit/collectorApp/config | ||
|
||
cp -R target/common/* target/us/collectorApp/ | ||
# Using the tmp direcotry creation so that it works on MacOs and Linux | ||
sed -e 's/collector-THISWILLCHANGE-stdout.log/collector-usprod-stdout.log/g' target/us/collectorApp/scripts/start_collector.sh >tmp_1.sh | ||
mv tmp_1.sh target/us/collectorApp/scripts/start_collector.sh | ||
cp config/collector-us.conf target/us/collectorApp/config/collector.conf | ||
cp appspec.yml target/us/ | ||
cp deploy_aws_code_deploy_revision.sh target/us/ | ||
cd target/us | ||
tar -cvf ../collector-us.zip * | ||
cd ../.. | ||
|
||
cp -R target/common/* target/sit/collectorApp/ | ||
# Using the tmp direcotry creation so that it works on MacOs and Linux | ||
sed -e 's/collector-THISWILLCHANGE-stdout.log/collector-ausit-stdout.log/g' target/sit/collectorApp/scripts/start_collector.sh >tmp_1.sh | ||
mv tmp_1.sh target/sit/collectorApp/scripts/start_collector.sh | ||
cp config/collector-sit.conf target/sit/collectorApp/config/collector.conf | ||
cp appspec.yml target/sit/ | ||
cp deploy_aws_code_deploy_revision.sh target/sit/ | ||
cd target/sit | ||
tar -cvf ../collector-sit.zip * | ||
cd ../.. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,163 @@ | ||
# Copyright (c) 2013-2017 Snowplow Analytics Ltd. All rights reserved. | ||
# | ||
# This program is licensed to you under the Apache License Version 2.0, and | ||
# you may not use this file except in compliance with the Apache License | ||
# Version 2.0. You may obtain a copy of the Apache License Version 2.0 at | ||
# http://www.apache.org/licenses/LICENSE-2.0. | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the Apache License Version 2.0 is distributed on an "AS | ||
# IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
# implied. See the Apache License Version 2.0 for the specific language | ||
# governing permissions and limitations there under. | ||
|
||
# This file (application.conf.example) contains a template with | ||
# configuration options for the Scala Stream Collector. | ||
# | ||
# To use, copy this to 'application.conf' and modify the configuration options. | ||
|
||
# 'collector' contains configuration options for the main Scala collector. | ||
collector { | ||
# The collector runs as a web service specified on the following interface and port. | ||
interface = "0.0.0.0" | ||
port = 5000 | ||
|
||
# Configure the P3P policy header. | ||
p3p { | ||
policyRef = "/w3c/p3p.xml" | ||
CP = "NOI DSP COR NID PSA OUR IND COM NAV STA" | ||
} | ||
|
||
crossDomain { | ||
enabled = false | ||
domain = "*" | ||
secure = true | ||
} | ||
|
||
# The collector returns a cookie to clients for user identification | ||
# with the following domain and expiration. | ||
cookie { | ||
enabled = true | ||
expiration = "365 days" | ||
# Network cookie name | ||
name = sp | ||
# The domain is optional and will make the cookie accessible to other | ||
# applications on the domain. Comment out this line to tie cookies to | ||
# the collector's full domain | ||
domain = ".newscgp.com" | ||
} | ||
|
||
# When enabled and the cookie specified above is missing, performs a redirect to itself to check | ||
# if third-party cookies are blocked using the specified name. If they are indeed blocked, | ||
# fallbackNetworkId is used instead of generating a new random one. | ||
cookieBounce { | ||
enabled = false | ||
# The name of the request parameter which will be used on redirects checking that third-party | ||
# cookies work. | ||
name = "n3pc" | ||
# Network user id to fallback to when third-party cookies are blocked. | ||
fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000" | ||
# Optionally, specify the name of the header containing the originating protocol for use in the | ||
# bounce redirect location. Use this if behind a load balancer that performs SSL termination. | ||
# The value of this header must be http or https. Example, if behind an AWS Classic ELB. | ||
forwardedProtocolHeader = "X-Forwarded-Proto" | ||
} | ||
|
||
# When enabled, the redirect url passed via the `u` query parameter is scanned for a placeholder | ||
# token. All instances of that token are replaced withe the network ID. If the placeholder isn't | ||
# specified, the default value is `${SP_NUID}`. | ||
redirectMacro { | ||
enabled = false | ||
# Optional custom placeholder token (defaults to the literal `${SP_NUID}`) | ||
placeholder = "[TOKEN]" | ||
} | ||
|
||
streams { | ||
# Events which have successfully been collected will be stored in the good stream/topic | ||
good = "ncg-sit-raw-good" | ||
|
||
# Events that are too big (w.r.t Kinesis 1MB limit) will be stored in the bad stream/topic | ||
bad = "ncg-sit-raw-bad" | ||
|
||
# Whether to use the incoming event's ip as the partition key for the good stream/topic | ||
# Note: Nsq does not make use of partition key. | ||
useIpAddressAsPartitionKey = false | ||
|
||
# Enable the chosen sink by uncommenting the appropriate configuration | ||
sink { | ||
# Choose between kinesis, kafka, nsq, or stdout. | ||
# To use stdout, comment or remove everything in the "collector.streams.sink" section except | ||
# "enabled" which should be set to "stdout". | ||
enabled = kinesis | ||
|
||
# Region where the streams are located | ||
region = "ap-southeast-2" | ||
|
||
# Thread pool size for Kinesis API requests | ||
threadPoolSize = 30 | ||
|
||
# The following are used to authenticate for the Amazon Kinesis sink. | ||
# If both are set to 'default', the default provider chain is used | ||
# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) | ||
# If both are set to 'iam', use AWS IAM Roles to provision credentials. | ||
# If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY | ||
aws { | ||
accessKey = iam | ||
secretKey = iam | ||
} | ||
|
||
# Minimum and maximum backoff periods | ||
backoffPolicy { | ||
minBackoff = 3000 # 3 seconds | ||
maxBackoff = 600000 # 5 minutes | ||
} | ||
|
||
# Or Kafka | ||
#brokers = "{{kafkaBrokers}}" | ||
## Number of retries to perform before giving up on sending a record | ||
#retries = 0 | ||
|
||
# Or NSQ | ||
## Host name for nsqd | ||
#host = "{{nsqHost}}" | ||
## TCP port for nsqd, 4150 by default | ||
#port = {{nsqdPort}} | ||
} | ||
|
||
# Incoming events are stored in a buffer before being sent to Kinesis/Kafka. | ||
# Note: Buffering is not supported by NSQ. | ||
# The buffer is emptied whenever: | ||
# - the number of stored records reaches record-limit or | ||
# - the combined size of the stored records reaches byte-limit or | ||
# - the time in milliseconds since the buffer was last emptied reaches time-limit | ||
buffer { | ||
byteLimit = 400000 | ||
recordLimit = 500 | ||
timeLimit = 5000 | ||
} | ||
} | ||
} | ||
|
||
# Akka has a variety of possible configuration options defined at | ||
# http://doc.akka.io/docs/akka/current/scala/general/configuration.html | ||
akka { | ||
loglevel = OFF # 'OFF' for no logging, 'DEBUG' for all logging. | ||
loggers = ["akka.event.slf4j.Slf4jLogger"] | ||
|
||
# akka-http is the server the Stream collector uses and has configurable options defined at | ||
# http://doc.akka.io/docs/akka-http/current/scala/http/configuration.html | ||
http.server { | ||
# To obtain the hostname in the collector, the 'remote-address' header | ||
# should be set. By default, this is disabled, and enabling it | ||
# adds the 'Remote-Address' header to every request automatically. | ||
remote-address-header = on | ||
|
||
raw-request-uri-header = on | ||
|
||
# Define the maximum request length (the default is 2048) | ||
parsing { | ||
max-uri-length = 32768 | ||
uri-parsing-mode = relaxed | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,145 @@ | ||
# This conf was created to use in local dev setup, The output of the collector goes to stdout. | ||
collector { | ||
# The collector runs as a web service specified on the following interface and port. | ||
interface = "0.0.0.0" | ||
port = 5000 | ||
|
||
# Configure the P3P policy header. | ||
p3p { | ||
policyRef = "/w3c/p3p.xml" | ||
CP = "NOI DSP COR NID PSA OUR IND COM NAV STA" | ||
} | ||
|
||
crossDomain { | ||
enabled = false | ||
domain = "*" | ||
secure = true | ||
} | ||
|
||
# The collector returns a cookie to clients for user identification | ||
# with the following domain and expiration. | ||
cookie { | ||
enabled = true | ||
expiration = "365 days" | ||
# Network cookie name | ||
name = sp | ||
# The domain is optional and will make the cookie accessible to other | ||
# applications on the domain. Comment out this line to tie cookies to | ||
# the collector's full domain | ||
domain = ".newscgp.com" | ||
} | ||
|
||
# When enabled and the cookie specified above is missing, performs a redirect to itself to check | ||
# if third-party cookies are blocked using the specified name. If they are indeed blocked, | ||
# fallbackNetworkId is used instead of generating a new random one. | ||
cookieBounce { | ||
enabled = false | ||
# The name of the request parameter which will be used on redirects checking that third-party | ||
# cookies work. | ||
name = "n3pc" | ||
# Network user id to fallback to when third-party cookies are blocked. | ||
fallbackNetworkUserId = "00000000-0000-4000-A000-000000000000" | ||
# Optionally, specify the name of the header containing the originating protocol for use in the | ||
# bounce redirect location. Use this if behind a load balancer that performs SSL termination. | ||
# The value of this header must be http or https. Example, if behind an AWS Classic ELB. | ||
forwardedProtocolHeader = "X-Forwarded-Proto" | ||
} | ||
|
||
# When enabled, the redirect url passed via the `u` query parameter is scanned for a placeholder | ||
# token. All instances of that token are replaced withe the network ID. If the placeholder isn't | ||
# specified, the default value is `${SP_NUID}`. | ||
redirectMacro { | ||
enabled = false | ||
# Optional custom placeholder token (defaults to the literal `${SP_NUID}`) | ||
placeholder = "[TOKEN]" | ||
} | ||
|
||
streams { | ||
# Events which have successfully been collected will be stored in the good stream/topic | ||
good = "ncg-uat-raw-good" | ||
|
||
# Events that are too big (w.r.t Kinesis 1MB limit) will be stored in the bad stream/topic | ||
bad = "ncg-uat-raw-bad" | ||
|
||
# Whether to use the incoming event's ip as the partition key for the good stream/topic | ||
# Note: Nsq does not make use of partition key. | ||
useIpAddressAsPartitionKey = false | ||
|
||
# Enable the chosen sink by uncommenting the appropriate configuration | ||
sink { | ||
# Choose between kinesis, kafka, nsq, or stdout. | ||
# To use stdout, comment or remove everything in the "collector.streams.sink" section except | ||
# "enabled" which should be set to "stdout". | ||
enabled = "stdout" | ||
|
||
# Region where the streams are located | ||
# region = "ap-southeast-2" | ||
|
||
# Thread pool size for Kinesis API requests | ||
# threadPoolSize = 30 | ||
|
||
# The following are used to authenticate for the Amazon Kinesis sink. | ||
# If both are set to 'default', the default provider chain is used | ||
# (see http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/auth/DefaultAWSCredentialsProviderChain.html) | ||
# If both are set to 'iam', use AWS IAM Roles to provision credentials. | ||
# If both are set to 'env', use environment variables AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY | ||
# aws { | ||
# accessKey = iam | ||
# secretKey = iam | ||
# } | ||
|
||
# Minimum and maximum backoff periods | ||
# backoffPolicy { | ||
# minBackoff = 3000 # 3 seconds | ||
# maxBackoff = 600000 # 5 minutes | ||
# } | ||
|
||
# Or Kafka | ||
#brokers = "{{kafkaBrokers}}" | ||
## Number of retries to perform before giving up on sending a record | ||
#retries = 0 | ||
|
||
# Or NSQ | ||
## Host name for nsqd | ||
#host = "{{nsqHost}}" | ||
## TCP port for nsqd, 4150 by default | ||
#port = {{nsqdPort}} | ||
} | ||
|
||
# Incoming events are stored in a buffer before being sent to Kinesis/Kafka. | ||
# Note: Buffering is not supported by NSQ. | ||
# The buffer is emptied whenever: | ||
# - the number of stored records reaches record-limit or | ||
# - the combined size of the stored records reaches byte-limit or | ||
# - the time in milliseconds since the buffer was last emptied reaches time-limit | ||
buffer { | ||
byteLimit = 400000 | ||
recordLimit = 500 | ||
timeLimit = 5000 | ||
} | ||
} | ||
} | ||
|
||
# Akka has a variety of possible configuration options defined at | ||
# http://doc.akka.io/docs/akka/current/scala/general/configuration.html | ||
akka { | ||
loglevel = OFF # 'OFF' for no logging, 'DEBUG' for all logging. | ||
loggers = ["akka.event.slf4j.Slf4jLogger"] | ||
|
||
# akka-http is the server the Stream collector uses and has configurable options defined at | ||
# http://doc.akka.io/docs/akka-http/current/scala/http/configuration.html | ||
http.server { | ||
# To obtain the hostname in the collector, the 'remote-address' header | ||
# should be set. By default, this is disabled, and enabling it | ||
# adds the 'Remote-Address' header to every request automatically. | ||
remote-address-header = on | ||
|
||
raw-request-uri-header = on | ||
|
||
# Define the maximum request length (the default is 2048) | ||
parsing { | ||
max-uri-length = 32768 | ||
uri-parsing-mode = relaxed | ||
} | ||
} | ||
} |
Oops, something went wrong.