Skip to content

Commit

Permalink
HIVE-8836 - Enable automatic tests with remote spark client [Spark Br…
Browse files Browse the repository at this point in the history
…anch]

git-svn-id: https://svn.apache.org/repos/asf/hive/branches/spark@1642263 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
Brock Noland committed Nov 28, 2014
1 parent 6c8feff commit 8cabe8d
Show file tree
Hide file tree
Showing 90 changed files with 1,077 additions and 873 deletions.
2 changes: 1 addition & 1 deletion data/conf/hive-log4j.properties
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
# Pattern format: Date LogLevel LoggerName LogMessage
#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
# Debugging Pattern format
log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n


#
Expand Down
12 changes: 11 additions & 1 deletion data/conf/spark/hive-site.xml
Original file line number Diff line number Diff line change
Expand Up @@ -197,7 +197,17 @@

<property>
<name>spark.master</name>
<value>local[4]</value>
<value>local-cluster[2,2,1024]</value>
</property>

<property>
<name>spark.akka.logLifecycleEvents</name>
<value>true</value>
</property>

<property>
<name>spark.root.dir</name>
<value>${spark.home}</value>
</property>

</configuration>
30 changes: 30 additions & 0 deletions data/conf/spark/log4j.properties
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Set everything to be logged to the console
log4j.rootCategory=INFO, DRFA
log4j.appender.console=org.apache.log4j.ConsoleAppender
log4j.appender.console.target=System.err
log4j.appender.console.layout=org.apache.log4j.PatternLayout
log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n

# Settings to quiet third party logs that are too verbose
log4j.logger.org.eclipse.jetty=WARN
log4j.logger.org.eclipse.jetty.util.component.AbstractLifeCycle=ERROR
log4j.logger.org.apache.spark.repl.SparkIMain$exprTyper=INFO
log4j.logger.org.apache.spark.repl.SparkILoop$SparkILoopInterpreter=INFO



log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender

log4j.appender.DRFA.File=${spark.root.dir}/logs/spark.log

# Rollver at midnight
log4j.appender.DRFA.DatePattern=.yyyy-MM-dd

# 30-day backup
#log4j.appender.DRFA.MaxBackupIndex=30
log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout

# Pattern format: Date LogLevel LoggerName LogMessage
#log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
# Debugging Pattern format
log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p [%t]: %c{2} (%F:%M(%L)) - %m%n
1 change: 1 addition & 0 deletions itests/qtest-spark/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
thirdparty/
38 changes: 38 additions & 0 deletions itests/qtest-spark/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,44 @@
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<executions>
<execution>
<id>download-spark</id>
<phase>generate-sources</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<target>
<echo file="target/download.sh">
env
set -e
set -x
/bin/pwd
BASE_DIR=./target
HIVE_ROOT=$BASE_DIR/../../../
DOWNLOAD_DIR=./thirdparty
download() {
url=$1;
finalName=$2
tarName=$(basename $url)
rm -rf $BASE_DIR/$finalName
if [[ ! -f $DOWNLOAD_DIR/$tarName ]]
then
curl -Sso $DOWNLOAD_DIR/$tarName $url
fi
tar -zxf $DOWNLOAD_DIR/$tarName -C $BASE_DIR
mv $BASE_DIR/${finalName}* $BASE_DIR/$finalName
}
mkdir -p $DOWNLOAD_DIR
download "http://ec2-50-18-79-139.us-west-1.compute.amazonaws.com/data/spark-1.2.1-SNAPSHOT-bin-hadoop2-without-hive.tgz" "spark"
cp -f $HIVE_ROOT/data/conf/spark/log4j.properties $BASE_DIR/spark/conf/
</echo>
<exec executable="bash" dir="${basedir}" failonerror="true">
<arg line="target/download.sh"/>
</exec>
</target>
</configuration>
</execution>
<execution>
<id>generate-tests-sources</id>
<phase>generate-test-sources</phase>
Expand Down
2 changes: 2 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@
<felix.version>2.4.0</felix.version>
<curator.version>2.6.0</curator.version>
<jsr305.version>3.0.0</jsr305.version>
<spark.home>${basedir}/${hive.path.to.root}/itests/qtest-spark/target/spark</spark.home>
</properties>

<repositories>
Expand Down Expand Up @@ -836,6 +837,7 @@
<log4j.debug>true</log4j.debug>
<!-- don't diry up /tmp -->
<java.io.tmpdir>${test.tmp.dir}</java.io.tmpdir>
<spark.home>${spark.home}</spark.home>
<!-- Hadoop's minidfs class uses this -->
<test.build.data>${test.tmp.dir}</test.build.data>
<!-- required by QTestUtil -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ private static Map<String, String> initiateSparkConf(Configuration hiveConf) {
for (Map.Entry<String, String> entry : hiveConf) {
String propertyName = entry.getKey();
if (propertyName.startsWith("spark")) {
String value = entry.getValue();
String value = hiveConf.get(propertyName);
sparkConf.put(propertyName, value);
LOG.info(String.format(
"load spark configuration from hive configuration (%s -> %s).",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ public int startMonitor() {
while (true) {
try {
JobExecutionStatus state = sparkJobStatus.getState();
if (LOG.isDebugEnabled()) {
console.printInfo("state = " + state);
}
if (state != null && state != JobExecutionStatus.UNKNOWN &&
(state != lastState || state == JobExecutionStatus.RUNNING)) {
lastState = state;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,14 @@ FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10
INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1);

FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10
INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1);

SELECT * FROM dest_g2;
Expand Down
2 changes: 1 addition & 1 deletion ql/src/test/queries/clientpositive/smb_mapjoin_3.q
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@

-- SORT_QUERY_RESULTS



Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,15 +265,15 @@ FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10
INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN
FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10
INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Expand Down Expand Up @@ -444,15 +444,12 @@ STAGE PLANS:
expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
File Output Operator
compressed: false
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe
Filter Operator
predicate: (KEY._col0 >= 5) (type: boolean)
Statistics: Num rows: 166 Data size: 1763 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -480,14 +477,15 @@ STAGE PLANS:
Map Operator Tree:
TableScan
Reduce Output Operator
sort order:
Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
value expressions: _col0 (type: string), _col1 (type: bigint), _col2 (type: string), _col3 (type: double), _col4 (type: bigint)
key expressions: _col0 (type: string), _col1 (type: bigint)
sort order: ++
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint)
Reduce Operator Tree:
Select Operator
expressions: VALUE._col0 (type: string), VALUE._col1 (type: bigint), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: bigint)
expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: string), VALUE._col1 (type: double), VALUE._col2 (type: bigint)
outputColumnNames: _col0, _col1, _col2, _col3, _col4
Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE
Limit
Number of rows: 10
Statistics: Num rows: 10 Data size: 100 Basic stats: COMPLETE Column stats: NONE
Expand Down Expand Up @@ -534,7 +532,7 @@ PREHOOK: query: FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10
INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
PREHOOK: type: QUERY
PREHOOK: Input: default@src
Expand All @@ -547,7 +545,7 @@ POSTHOOK: query: FROM src
INSERT OVERWRITE TABLE dest_g2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) < 5 GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_g4 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(DISTINCT substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1)
INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) GROUP BY substr(src.key,1,1), substr(src.key,2,1) LIMIT 10
INSERT OVERWRITE TABLE dest_h2 SELECT substr(src.key,1,1) as c1, count(DISTINCT substr(src.value,5)) as c2, concat(substr(src.key,1,1),sum(substr(src.value,5))) as c3, sum(substr(src.value, 5)) as c4, count(src.value) as c6 GROUP BY substr(src.key,1,1), substr(src.key,2,1) ORDER BY c1, c2 LIMIT 10
INSERT OVERWRITE TABLE dest_h3 SELECT substr(src.key,1,1), count(DISTINCT substr(src.value,5)), concat(substr(src.key,1,1),sum(substr(src.value,5))), sum(substr(src.value, 5)), count(src.value) WHERE substr(src.key,1,1) >= 5 GROUP BY substr(src.key,1,1), substr(src.key,2,1)
POSTHOOK: type: QUERY
POSTHOOK: Input: default@src
Expand Down
Loading

0 comments on commit 8cabe8d

Please sign in to comment.