-
Notifications
You must be signed in to change notification settings - Fork 9
/
spark-run.sh
executable file
·46 lines (34 loc) · 1.31 KB
/
spark-run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/bin/bash
S3CONFIGFILE=$PWD/config/s3bucket.config
SCHEMAFILE1=$PWD/config/schema_for_raw_data.config
SCHEMAFILE2=$PWD/config/schema_for_streaming.config
STREAMCONFIGFILE=$PWD/config/stream.config
PSQLCONFIGFILE=$PWD/config/postgresql.config
KAFKACONFIGFILE=$PWD/config/kafka.config
AUX_FILES=$PWD/helpers/helpers.py
PGPASSWORD=`ssh ubuntu@$SPARK_STREAM_CLUSTER_0 cat ~/.pgpass | sed s/"\(.*:\)\{4\}"//g`
export PGPASSWORD
case $1 in
--batch)
spark-submit --master spark://$SPARK_BATCH_CLUSTER_0:7077 \
--jars $PWD/postgresql-42.2.2.jar \
--py-files $AUX_FILES \
--driver-memory 4G \
--executor-memory 4G \
batch_processing/main_batch.py \
$S3CONFIGFILE $SCHEMAFILE1 $PSQLCONFIGFILE
;;
--stream)
spark-submit --master spark://$SPARK_STREAM_CLUSTER_0:7077 \
--packages org.apache.spark:spark-streaming-kafka-0-8_2.11:2.2.0 \
--jars $PWD/postgresql-42.2.2.jar \
--py-files $AUX_FILES \
--driver-memory 4G \
--executor-memory 4G \
streaming/main_stream.py \
$KAFKACONFIGFILE $SCHEMAFILE2 $STREAMCONFIGFILE $PSQLCONFIGFILE
;;
*)
echo "Usage: ./spark-run.sh [--batch|--stream]"
;;
esac