[HOTFIX] Always download spark binary package from archive

### What is this PR for? * Not download Spark for first profile which does license check only * Always download Spark from archive not mirror - We need to check which spark versions are in mirror or not, and update [this line](https://github.com/apache/zeppelin/blob/master/testing/downloadSpark.sh#L79) which is unsustainable - Sometimes mirror site has problem such as `Not Found`, like we have issue in CI right now. * Remove unused variable `SPARK_VER_RANGE` from `testing/downloadSpark.sh` (apache#1578 (comment)) > Note: CI will still fail until apache#1595 is merged ### What type of PR is it? Hot Fix ### Questions: * Does the licenses files need update? no * Is there breaking changes for older versions? no * Does this needs documentation? no Author: Mina Lee <[email protected]> Closes apache#1599 from minahlee/downloadSparkFromArchive and squashes the following commits: 89a46ca [Mina Lee] Always download spark binary package from archive
jeffsteinmetz · Nov 5, 2016 · b6cd479 · b6cd479
1 parent c5ab10d
commit b6cd479
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 39 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -36,7 +36,7 @@ matrix:
   include:
     # Test License compliance using RAT tool
     - jdk: "oraclejdk7"
-      env: SCALA_VER="2.11" SPARK_VER="2.0.0" HADOOP_VER="2.3" PROFILE="-Prat" BUILD_FLAG="clean" TEST_FLAG="org.apache.rat:apache-rat-plugin:check" TEST_PROJECTS=""
+      env: SCALA_VER="2.11" PROFILE="-Prat" BUILD_FLAG="clean" TEST_FLAG="org.apache.rat:apache-rat-plugin:check" TEST_PROJECTS=""
 
     # Test all modules with spark 2.0.0 and scala 2.11
     - jdk: "oraclejdk7"

diff --git a/testing/downloadSpark.sh b/testing/downloadSpark.sh
@@ -16,28 +16,15 @@
 # limitations under the License.
 #
 
-
 if [[ "$#" -ne 2 ]]; then
     echo "usage) $0 [spark version] [hadoop version]"
     echo "   eg) $0 1.3.1 2.6"
-    exit 1
+    exit 0
 fi
 
 SPARK_VERSION="${1}"
 HADOOP_VERSION="${2}"
 
-echo "${SPARK_VERSION}" | grep "^1.[123].[0-9]" > /dev/null
-if [[ "$?" -eq 0 ]]; then
-  echo "${SPARK_VERSION}" | grep "^1.[12].[0-9]" > /dev/null
-  if [[ "$?" -eq 0 ]]; then
-    SPARK_VER_RANGE="<=1.2"
-  else
-    SPARK_VER_RANGE="<=1.3"
-  fi
-else
-  SPARK_VER_RANGE=">1.3"
-fi
-
 set -xe
 
 MAX_DOWNLOAD_TIME_SEC=590
@@ -75,30 +62,13 @@ if [[ ! -d "${SPARK_HOME}" ]]; then
         ls -la .
         echo "${SPARK_CACHE} does not have ${SPARK_ARCHIVE} downloading ..."
 
-        # download archive if not cached
-        if [[ "${SPARK_VERSION}" = "1.4.1" ]]; then
-            echo "${SPARK_VERSION} being downloaded from archives"
-            # spark old versions are only available only on the archives (prior to 1.5.2)
-            STARTTIME=`date +%s`
-            #timeout -s KILL "${MAX_DOWNLOAD_TIME_SEC}" wget "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
-            download_with_retry "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
-            ENDTIME=`date +%s`
-            DOWNLOADTIME="$((ENDTIME-STARTTIME))"
-        else
-            echo "${SPARK_VERSION} being downloaded from mirror"
-            # spark 1.5.2 and up and later can be downloaded from mirror
-            # get download address from mirror
-            MIRROR_INFO=$(curl -s "http://www.apache.org/dyn/closer.cgi/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz?asjson=1")
-
-            PREFFERED=$(echo "${MIRROR_INFO}" | grep preferred | sed 's/[^"]*.preferred.: .\([^"]*\).*/\1/g')
-            PATHINFO=$(echo "${MIRROR_INFO}" | grep path_info | sed 's/[^"]*.path_info.: .\([^"]*\).*/\1/g')
-
-            STARTTIME=`date +%s`
-            #timeout -s KILL "${MAX_DOWNLOAD_TIME_SEC}" wget -q "${PREFFERED}${PATHINFO}"
-            download_with_retry "${PREFFERED}${PATHINFO}"
-            ENDTIME=`date +%s`
-            DOWNLOADTIME="$((ENDTIME-STARTTIME))"
-        fi
+        # download spark from archive if not cached
+        echo "${SPARK_VERSION} being downloaded from archives"
+        STARTTIME=`date +%s`
+        #timeout -s KILL "${MAX_DOWNLOAD_TIME_SEC}" wget "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
+        download_with_retry "http://archive.apache.org/dist/spark/spark-${SPARK_VERSION}/${SPARK_ARCHIVE}.tgz"
+        ENDTIME=`date +%s`
+        DOWNLOADTIME="$((ENDTIME-STARTTIME))"
     fi
 
     # extract archive in un-cached root, clean-up on failure