Merge latest trunk into cbo branch. (Gunther Hagleitner)

git-svn-id: https://svn.apache.org/repos/asf/hive/branches/cbo@1625176 13f79535-47bb-0310-9956-ffa450edef68
gautamboddeda · Sep 15, 2014 · a329ec4 · a329ec4
2 parents 0a0cd46 + be5bdbe
commit a329ec4
Show file tree

Hide file tree

Showing 279 changed files with 21,096 additions and 3,838 deletions.
diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -446,12 +446,26 @@ public static boolean isActionPermittedForFileHierarchy(FileSystem fs, FileStatu
   public static boolean isLocalFile(HiveConf conf, String fileName) {
     try {
       // do best effor to determine if this is a local file
-      FileSystem fsForFile = FileSystem.get(new URI(fileName), conf);
-      return LocalFileSystem.class.isInstance(fsForFile);
+      return isLocalFile(conf, new URI(fileName));
     } catch (URISyntaxException e) {
       LOG.warn("Unable to create URI from " + fileName, e);
+    }
+    return false;
+  }
+
+  /**
+   * A best effort attempt to determine if if the file is a local file
+   * @param conf
+   * @param fileUri
+   * @return true if it was successfully able to determine that it is a local file
+   */
+  public static boolean isLocalFile(HiveConf conf, URI fileUri) {
+    try {
+      // do best effor to determine if this is a local file
+      FileSystem fsForFile = FileSystem.get(fileUri, conf);
+      return LocalFileSystem.class.isInstance(fsForFile);
     } catch (IOException e) {
-      LOG.warn("Unable to get FileSystem for " + fileName, e);
+      LOG.warn("Unable to get FileSystem for " + fileUri, e);
     }
     return false;
   }

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -301,6 +301,9 @@ public static enum ConfVars {
 
     HIVE_IN_TEST("hive.in.test", false, "internal usage only, true in test mode", true),
 
+    HIVE_IN_TEZ_TEST("hive.in.tez.test", false, "internal use only, true when in testing tez",
+        true),
+
     LOCALMODEAUTO("hive.exec.mode.local.auto", false,
         "Let Hive determine whether to run in local mode automatically"),
     LOCALMODEMAXBYTES("hive.exec.mode.local.auto.inputbytes.max", 134217728L,
@@ -806,18 +809,11 @@ public static enum ConfVars {
         "map-reduce job to merge the output files into bigger files. This is only done for map-only jobs \n" +
         "if hive.merge.mapfiles is true, and for map-reduce jobs if hive.merge.mapredfiles is true."),
     HIVEMERGERCFILEBLOCKLEVEL("hive.merge.rcfile.block.level", true, ""),
-    HIVEMERGEINPUTFORMATBLOCKLEVEL("hive.merge.input.format.block.level",
-        "org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileBlockMergeInputFormat", ""),
     HIVEMERGEORCFILESTRIPELEVEL("hive.merge.orcfile.stripe.level", true,
         "When hive.merge.mapfiles or hive.merge.mapredfiles is enabled while writing a\n" +
         " table with ORC file format, enabling this config will do stripe level fast merge\n" +
         " for small ORC files. Note that enabling this config will not honor padding tolerance\n" +
         " config (hive.exec.orc.block.padding.tolerance)."),
-    HIVEMERGEINPUTFORMATSTRIPELEVEL("hive.merge.input.format.stripe.level",
-        "org.apache.hadoop.hive.ql.io.orc.OrcFileStripeMergeInputFormat",
-        "Input file format to use for ORC stripe level merging (for internal use only)"),
-    HIVEMERGECURRENTJOBHASDYNAMICPARTITIONS(
-        "hive.merge.current.job.has.dynamic.partitions", false, ""),
 
     HIVEUSEEXPLICITRCFILEHEADER("hive.exec.rcfile.use.explicit.header", true,
         "If this is set the header for RCFiles will simply be RCF.  If this is not\n" +
@@ -1679,17 +1675,6 @@ public static enum ConfVars {
         "               it will now take 512 reducers, similarly if the max number of reducers is 511,\n" +
         "               and a job was going to use this many, it will now use 256 reducers."),
 
-    /* The following section contains all configurations used for list bucketing feature.*/
-    /* This is not for clients. but only for block merge task. */
-    /* This is used by BlockMergeTask to send out flag to RCFileMergeMapper */
-    /* about alter table...concatenate and list bucketing case. */
-    HIVEMERGECURRENTJOBCONCATENATELISTBUCKETING(
-        "hive.merge.current.job.concatenate.list.bucketing", true, ""),
-    /* This is not for clients. but only for block merge task. */
-    /* This is used by BlockMergeTask to send out flag to RCFileMergeMapper */
-    /* about depth of list bucketing. */
-    HIVEMERGECURRENTJOBCONCATENATELISTBUCKETINGDEPTH(
-            "hive.merge.current.job.concatenate.list.bucketing.depth", 0, ""),
     HIVEOPTLISTBUCKETING("hive.optimize.listbucketing", false,
         "Enable list bucketing optimizer. Default value is false so that we disable it by default."),
 

diff --git a/contrib/src/test/results/clientpositive/udf_example_arraymapstruct.q.out b/contrib/src/test/results/clientpositive/udf_example_arraymapstruct.q.out
@@ -34,14 +34,14 @@ STAGE PLANS:
       Map Operator Tree:
           TableScan
             alias: src_thrift
-            Statistics: Num rows: 11 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
+            Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE
             Select Operator
               expressions: example_arraysum(lint) (type: double), example_mapconcat(mstringstring) (type: string), example_structprint(lintstring[0]) (type: string)
               outputColumnNames: _col0, _col1, _col2
-              Statistics: Num rows: 11 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
+              Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE
               File Output Operator
                 compressed: false
-                Statistics: Num rows: 11 Data size: 1606 Basic stats: COMPLETE Column stats: NONE
+                Statistics: Num rows: 11 Data size: 3070 Basic stats: COMPLETE Column stats: NONE
                 table:
                     input format: org.apache.hadoop.mapred.TextInputFormat
                     output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat

diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml
@@ -19,6 +19,12 @@
 
 <configuration>
 
+<property>
+  <name>hive.in.test</name>
+  <value>true</value>
+  <description>Internal marker for test. Used for masking env-dependent values</description>
+</property>
+
 <!-- Hive Configuration can either be stored in this file or in the hadoop configuration files  -->
 <!-- that are implied by Hadoop setup variables.                                                -->
 <!-- Aside from Hadoop setup variables - this file is provided as a convenience so that Hive    -->
@@ -239,4 +245,13 @@
   </description>
 </property>
 
+<property>
+  <name>hive.in.tez.test</name>
+  <value>true</value>
+  <description>
+    Indicates that we are in tez testing mode.
+  </description>
+</property>
+
+
 </configuration>
diff --git a/data/files/complex.seq b/data/files/complex.seq
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/history/TestHiveHistory.java
@@ -103,7 +103,7 @@ protected void setUp() {
         db.dropTable(MetaStoreUtils.DEFAULT_DATABASE_NAME, src, true, true);
         db.createTable(src, cols, null, TextInputFormat.class,
             IgnoreKeyTextOutputFormat.class);
-        db.loadTable(hadoopDataFile[i], src, false, false, false, false);
+        db.loadTable(hadoopDataFile[i], src, false, false, false, false, false);
         i++;
       }
 

diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/txn/compactor/TestCompactor.java
@@ -147,10 +147,10 @@ public void testStatsAfterCompactionPartTbl() throws Exception {
     execSelectAndDumpData("select * from " + tblNameStg, driver, "Dumping data for " +
       tblNameStg + " after load:");
     executeStatementOnDriver("FROM " + tblNameStg +
-      " INSERT OVERWRITE TABLE " + tblName + " PARTITION(bkt=0) " +
+      " INSERT INTO TABLE " + tblName + " PARTITION(bkt=0) " +
       "SELECT a, b where a < 2", driver);
     executeStatementOnDriver("FROM " + tblNameStg +
-      " INSERT OVERWRITE TABLE " + tblName + " PARTITION(bkt=1) " +
+      " INSERT INTO TABLE " + tblName + " PARTITION(bkt=1) " +
       "SELECT a, b where a >= 2", driver);
     execSelectAndDumpData("select * from " + tblName, driver, "Dumping data for " +
       tblName + " after load:");

diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
@@ -56,13 +56,22 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
   bucket3.q,\
   bucket4.q,\
   cbo_correctness.q,\
+  correlationoptimizer1.q,\
   count.q,\
   create_merge_compressed.q,\
   cross_join.q,\
   cross_product_check_1.q,\
   cross_product_check_2.q,\
   ctas.q,\
   custom_input_output_format.q,\
+  delete_all_non_partitioned.q,\
+  delete_all_partitioned.q,\
+  delete_orig_table.q,\
+  delete_tmp_table.q,\
+  delete_where_no_match.q,\
+  delete_where_non_partitioned.q,\
+  delete_where_partitioned.q,\
+  delete_whole_partition.q,\
   disable_merge_for_bucketing.q,\
   dynpart_sort_opt_vectorization.q,\
   dynpart_sort_optimization.q,\
@@ -76,6 +85,13 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
   insert1.q,\
   insert_into1.q,\
   insert_into2.q,\
+  insert_orig_table.q,\
+  insert_values_dynamic_partitioned.q,\
+  insert_values_non_partitioned.q,\
+  insert_values_orig_table.q\
+  insert_values_partitioned.q,\
+  insert_values_tmp_table.q,\
+  insert_update_delete.q,\
   join0.q,\
   join1.q,\
   leftsemijoin.q,\
@@ -96,6 +112,11 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
   orc_merge2.q,\
   orc_merge3.q,\
   orc_merge4.q,\
+  orc_merge5.q,\
+  orc_merge6.q,\
+  orc_merge7.q,\
+  orc_merge_incompat1.q,\
+  orc_merge_incompat2.q,\
   parallel.q,\
   ptf.q,\
   sample1.q,\
@@ -122,6 +143,16 @@ minitez.query.files.shared=alter_merge_2_orc.q,\
   union7.q,\
   union8.q,\
   union9.q,\
+  update_after_multiple_inserts.q,\
+  update_all_non_partitioned.q,\
+  update_all_partitioned.q,\
+  update_all_types.q,\
+  update_orig_table.q,\
+  update_tmp_table.q,\
+  update_where_no_match.q,\
+  update_where_non_partitioned.q,\
+  update_where_partitioned.q,\
+  update_two_cols.q,\
   vector_cast_constant.q,\
   vector_data_types.q,\
   vector_decimal_aggregate.q,\

diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -21,8 +21,6 @@
 import static org.apache.commons.lang.StringUtils.join;
 import static org.apache.commons.lang.StringUtils.repeat;
 
-import java.math.BigDecimal;
-import java.nio.ByteBuffer;
 import java.sql.Connection;
 import java.sql.SQLException;
 import java.text.ParseException;
@@ -42,13 +40,11 @@
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
-import org.apache.commons.math3.stat.StatUtils;
 import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.Decimal;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Order;

diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java b/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
@@ -1081,7 +1081,8 @@ private enum LockAction {ACQUIRE, WAIT, KEEP_LOOKING}
   private static Map<LockType, Map<LockType, Map<LockState, LockAction>>> jumpTable;
 
   private void checkQFileTestHack() {
-    boolean hackOn = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_IN_TEST);
+    boolean hackOn = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_IN_TEST) ||
+        HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_IN_TEZ_TEST);
     if (hackOn) {
       LOG.info("Hacking in canned values for transaction manager");
       // Set up the transaction/locking db in the derby metastore

diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStorePartitionSpecs.java b/metastore/src/test/org/apache/hadoop/hive/metastore/TestHiveMetaStorePartitionSpecs.java
@@ -11,7 +11,6 @@
 import org.apache.hadoop.hive.metastore.partition.spec.CompositePartitionSpecProxy;
 import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
 import org.apache.hadoop.hive.serde2.columnar.LazyBinaryColumnarSerDe;
-import org.apache.hadoop.util.ExitUtil;
 import org.junit.AfterClass;
 import org.junit.Assert;
 import org.junit.BeforeClass;
@@ -52,7 +51,7 @@ public void checkPermission(Permission perm, Object context) {
     public void checkExit(int status) {
 
       super.checkExit(status);
-      throw new ExitUtil.ExitException(status, "System.exit() was called. Raising exception. ");
+      throw new RuntimeException("System.exit() was called. Raising exception. ");
     }
   }
 

diff --git a/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java b/ql/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/Context.java b/ql/src/java/org/apache/hadoop/hive/ql/Context.java
@@ -43,6 +43,7 @@
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.exec.TaskRunner;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
@@ -98,6 +99,11 @@ public class Context {
   // Transaction manager for this query
   protected HiveTxnManager hiveTxnManager;
 
+  // Used to track what type of acid operation (insert, update, or delete) we are doing.  Useful
+  // since we want to change where bucket columns are accessed in some operators and
+  // optimizations when doing updates and deletes.
+  private AcidUtils.Operation acidOperation = AcidUtils.Operation.NOT_ACID;
+
   private boolean needLockMgr;
 
   // Keep track of the mapping from load table desc to the output and the lock
@@ -612,4 +618,12 @@ public int getTryCount() {
   public void setTryCount(int tryCount) {
     this.tryCount = tryCount;
   }
+
+  public void setAcidOperation(AcidUtils.Operation op) {
+    acidOperation = op;
+  }
+
+  public AcidUtils.Operation getAcidOperation() {
+    return acidOperation;
+  }
 }