-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathspark-submit.sh
executable file
·44 lines (35 loc) · 1.64 KB
/
spark-submit.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
#!/usr/bin/env bash
CURRENT_DIR=`pwd`
# Spark version (requires uncompressed tar.gz under $HOME)
SPARK_VERSION="3.3.3"
HADOOP_VERSION="3"
SCALA_VERSION="2.12"
# Read the version from version.sbt
SPARK_LUCENERDD_VERSION=`cat version.sbt | awk '{print $5}' | xargs`
echo "||==========================================================="
echo "||Loading LuceneRDD with version: ${SPARK_LUCENERDD_VERSION} "
echo "||Spark version: ${SPARK_VERSION} "
echo "||==========================================================="
# Assumes that spark is installed under home directory
HOME_DIR=`echo ~`
export SPARK_LOCAL_IP=localhost
SPARK_HOME=${HOME_DIR}/spark-${SPARK_VERSION}-bin-hadoop${HADOOP_VERSION}
# spark-lucenerdd assembly JAR
MAIN_JAR=${CURRENT_DIR}/target/scala-${SCALA_VERSION}/spark-lucenerdd-examples-assembly-${SPARK_LUCENERDD_VERSION}.jar
echo "Executing spark submit: $1"
# Run spark shell locally
${SPARK_HOME}/bin/spark-submit \
--conf "spark.executor.memory=1g" \
--conf "spark.executor.cores=4" \
--conf "spark.executor.instances=2" \
--conf "spark.driver.memory=1g" \
--conf "spark.rdd.compress=true" \
--conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
--conf "spark.kryoserializer.buffer=24mb" \
--conf "spark.kryo.registrator=org.zouzias.spark.lucenerdd.LuceneRDDKryoRegistrator" \
--conf "spark.driver.extraJavaOptions=-Dlucenerdd.index.store.mode=disk" \
--conf "spark.executor.extraJavaOptions=-Dlucenerdd.index.store.mode=disk" \
--master local[*] \
--class $1 \
--jars ${MAIN_JAR} \
"${MAIN_JAR}"