forked from GoogleCloudPlatform/DataflowTemplates
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdf
executable file
·95 lines (81 loc) · 3.57 KB
/
df
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
#!/bin/bash
# Author: Bin Wu <binwu@google.com>
PWD=`pwd`
JAVA=`which java`
BUCKET=$(<bucket)
IPADDR=$(hostname -I | cut -d ' ' -f 1)
__usage() {
echo "Usage: df {run|deploy}"
}
__deploy() {
# Batch - GCS -> BigQuery, create the table according to the schema, overwrite existing table contents
mvn compile exec:java \
-Dexec.mainClass=com.google.cloud.teleport.templates.BindiegoTextIOToBigQueryBatchCreate \
-Dexec.cleanupDaemonThreads=false \
-Dexec.args=" \
--project=google.com:bin-wus-learning-center \
--stagingLocation=gs://$BUCKET/staging \
--tempLocation=gs://$BUCKET/tmp \
--gcpTempLocation=gs://$BUCKET/tmp/gcp \
--templateLocation=gs://$BUCKET/templates/df-batch-create-csv2bq.json \
--runner=DataflowRunner"
# Batch - GCS -> BigQuery, append data to existing table
mvn compile exec:java \
-Dexec.mainClass=com.google.cloud.teleport.templates.BindiegoTextIOToBigQueryBatchAppend \
-Dexec.cleanupDaemonThreads=false \
-Dexec.args=" \
--project=google.com:bin-wus-learning-center \
--stagingLocation=gs://$BUCKET/staging \
--tempLocation=gs://$BUCKET/tmp \
--gcpTempLocation=gs://$BUCKET/tmp/gcp \
--templateLocation=gs://$BUCKET/templates/df-batch-append-csv2bq.json \
--runner=DataflowRunner"
}
__run_create() {
uuid=`uuidgen`
gcloud dataflow jobs run bindiego-create-$uuid \
--gcs-location=gs://bindiego/templates/df-batch-create-csv2bq.json \
--region=asia-east1 \
--parameters=javascriptTextTransformGcsPath=gs://$BUCKET/udf/trans.js,javascriptTextTransformFunctionName=trans,inputFilePattern=gs://$BUCKET/data/source*.log.gz,outputTable=google.com:bin-wus-learning-center:rt_test.test_batch,JSONPath=gs://$BUCKET/udf/schema.json,bigQueryLoadingTemporaryDirectory=gs://$BUCKET/tmp,errorRecordsPrefix=gs://$BUCKET/output/error/$uuid/,javascriptTextTransformParameter=bindigo20191204
}
__run_append() {
uuid=`uuidgen`
gcloud dataflow jobs run bindiego-append-$uuid \
--gcs-location=gs://bindiego/templates/df-batch-append-csv2bq.json \
--region=asia-east1 \
--parameters=javascriptTextTransformGcsPath=gs://$BUCKET/udf/trans.js,javascriptTextTransformFunctionName=trans,inputFilePattern=gs://$BUCKET/data/source*.log.gz,outputTable=google.com:bin-wus-learning-center:rt_test.test_batch,JSONPath=gs://$BUCKET/udf/schema.json,bigQueryLoadingTemporaryDirectory=gs://$BUCKET/tmp,errorRecordsPrefix=gs://$BUCKET/output/error/$uuid/,javascriptTextTransformParameter=bindigo20191204
}
# This is the native Google code
__run_google_gcs2bq() {
uuid=`uuidgen`
gcloud dataflow jobs run bindiego-googleGcs2Bq-$uuid \
--gcs-location=gs://dataflow-templates/latest/GCS_Text_to_BigQuery \
--region=asia-east1 \
--parameters=javascriptTextTransformGcsPath=gs://$BUCKET/udf/trans.js,javascriptTextTransformFunctionName=trans,inputFilePattern=gs://$BUCKET/data/source*.log.gz,outputTable=google.com:bin-wus-learning-center:rt_test.test_batch,JSONPath=gs://$BUCKET/udf/schema.json,bigQueryLoadingTemporaryDirectory=gs://$BUCKET/tmp
}
__main() {
if [ $# -eq 0 ]
then
__usage
else
case $1 in
deploy)
__deploy
;;
runc)
__run_create
;;
runa)
__run_append
;;
rung)
__run_google_gcs2bq
;;
*)
__usage
;;
esac
fi
}
__main $@
exit 0