forked from AdrianBesleaga/kube-spark
-
Notifications
You must be signed in to change notification settings - Fork 0
/
doglover.yaml
63 lines (63 loc) · 2.36 KB
/
doglover.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
apiVersion: "sparkoperator.k8s.io/v1beta2"
kind: SparkApplication
metadata:
name: doglover
namespace: default
spec:
type: Scala
mode: cluster
image: "uprush/apache-spark:2.4.5"
imagePullPolicy: Always
mainClass: com.uprush.example.DogLover
mainApplicationFile: "s3a://deephub/user/yijiang/doglover/doglover_2.12-0.1.0-SNAPSHOT.jar"
sparkVersion: "2.4.5"
restartPolicy:
type: Never
volumes:
- name: "staging-vol"
persistentVolumeClaim:
claimName: data-staging-share
driver:
cores: 1
coreLimit: "1200m"
memory: "512m"
volumeMounts:
- name: "staging-vol"
mountPath: "/home/spark/tmp"
labels:
version: "2.4.5"
serviceAccount: spark
executor:
cores: 1
instances: 2
memory: "512m"
volumeMounts:
- name: "staging-vol"
mountPath: "/home/spark/tmp"
labels:
version: "2.4.5"
# Workaround for issue #216
sparkConf:
"spark.hadoop.fs.s3a.endpoint": "192.168.170.12"
"spark.hadoop.fs.s3a.access.key": "S3_ACCESS_KEY"
"spark.hadoop.fs.s3a.secret.key": "S3_SECRET_KEY"
"spark.hadoop.fs.s3a.connection.ssl.enabled": "false"
"spark.hadoop.fs.s3a.fast.upload": "true"
"spark.hadoop.mapreduce.fileoutputcommitter.algorithm.version": "2"
"spark.hadoop.mapreduce.fileoutputcommitter.cleanup-failures.ignored": "true"
"spark.hadoop.mapreduce.outputcommitter.factory.scheme.s3a": "org.apache.hadoop.fs.s3a.commit.S3ACommitterFactory"
"spark.hadoop.fs.s3a.committer.name": "directory"
"spark.sql.sources.commitProtocolClass": "org.apache.spark.internal.io.cloud.PathOutputCommitProtocol"
"spark.sql.parquet.output.committer.class": "org.apache.spark.internal.io.cloud.BindingParquetOutputCommitter"
"spark.hadoop.fs.s3a.committer.tmp.path": "file:///home/spark/tmp/staging"
"spark.hadoop.fs.s3a.buffer.dir": "/home/spark/tmp/buffer"
"spark.eventLog.dir": "s3a://deephub/spark/spark-events/"
"spark.eventLog.enabled": "true"
"spark.sql.warehouse.dir": "s3a://deephub/spark/warehouse"
"spark.hadoop.parquet.enable.summary-metadata": "false"
"spark.sql.parquet.mergeSchema": "false"
"spark.sql.parquet.filterPushdown": "true"
"spark.sql.hive.metastorePartitionPruning": "true"
"spark.sql.orc.filterPushdown": "true"
"spark.sql.orc.splits.include.file.footer": "true"
"spark.sql.orc.cache.stripe.details.size": "10000"