Skip to content

Commit

Permalink
HIVE-8642 : Hive stack() UDTF Doesn't Support NULL Insert Values (Nav…
Browse files Browse the repository at this point in the history
…is via Ashutosh Chauhan)

git-svn-id: https://svn.apache.org/repos/asf/hive/trunk@1640798 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
ashutoshc committed Nov 20, 2014
1 parent a099ea0 commit a4ccee2
Show file tree
Hide file tree
Showing 12 changed files with 94 additions and 80 deletions.
4 changes: 2 additions & 2 deletions ql/src/java/org/apache/hadoop/hive/ql/exec/UnionOperator.java
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ protected void initializeOp(Configuration hconf) throws HiveException {
columns);
for (int c = 0; c < columns; c++) {
// can be null for void type
ObjectInspector oi = columnTypeResolvers[c].get();
outputFieldOIs.add(oi == null ? parentFields[0].get(c).getFieldObjectInspector() : oi);
ObjectInspector fieldOI = parentFields[0].get(c).getFieldObjectInspector();
outputFieldOIs.add(columnTypeResolvers[c].get(fieldOI));
}

// create output row ObjectInspector
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.ListSinkDesc;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
Expand Down Expand Up @@ -244,7 +245,9 @@ private boolean checkExpressions(SelectOperator op) {
}

private boolean checkExpression(ExprNodeDesc expr) {
if (expr instanceof ExprNodeConstantDesc || expr instanceof ExprNodeColumnDesc) {
if (expr instanceof ExprNodeConstantDesc ||
expr instanceof ExprNodeNullDesc||
expr instanceof ExprNodeColumnDesc) {
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,6 @@
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeNullDesc;
import org.apache.hadoop.hive.ql.plan.ExtractDesc;
import org.apache.hadoop.hive.ql.plan.FileSinkDesc;
import org.apache.hadoop.hive.ql.plan.FilterDesc;
Expand Down Expand Up @@ -3702,11 +3701,6 @@ && isRegex(unescapeIdentifier(expr.getChild(1).getText()), conf)) {
ArrayList<String> columnNames = new ArrayList<String>();
Map<String, ExprNodeDesc> colExprMap = new HashMap<String, ExprNodeDesc>();
for (int i = 0; i < col_list.size(); i++) {
// Replace NULL with CAST(NULL AS STRING)
if (col_list.get(i) instanceof ExprNodeNullDesc) {
col_list.set(i, new ExprNodeConstantDesc(
TypeInfoFactory.stringTypeInfo, null));
}
String outputCol = getColumnInternalName(i);
colExprMap.put(outputCol, col_list.get(i));
columnNames.add(outputCol);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

/**
Expand Down Expand Up @@ -59,11 +58,8 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen

converters = new Converter[arguments.length];

ObjectInspector returnOI = returnOIResolver.get();
if (returnOI == null) {
returnOI = PrimitiveObjectInspectorFactory
.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
}
ObjectInspector returnOI =
returnOIResolver.get(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
for (int i = 0; i < arguments.length; i++) {
converters[i] = ObjectInspectorConverters.getConverter(arguments[i],
returnOI);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,17 +82,10 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
}
}

ObjectInspector keyOI = keyOIResolver.get();
ObjectInspector valueOI = valueOIResolver.get();

if (keyOI == null) {
keyOI = PrimitiveObjectInspectorFactory
.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
}
if (valueOI == null) {
valueOI = PrimitiveObjectInspectorFactory
.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
}
ObjectInspector keyOI =
keyOIResolver.get(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
ObjectInspector valueOI =
valueOIResolver.get(PrimitiveObjectInspectorFactory.javaStringObjectInspector);

converters = new Converter[arguments.length];

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen
((ListObjectInspector)(arguments[0])).getListElementObjectInspector();
argumentOIs = arguments;
converters = new Converter[arguments.length];
ObjectInspector returnOI = returnOIResolver.get();
if (returnOI == null) {
returnOI = elementObjectInspector;
}
ObjectInspector returnOI = returnOIResolver.get(elementObjectInspector);
converters[0] = ObjectInspectorConverters.getConverter(elementObjectInspector, returnOI);

return ObjectInspectorFactory.getStandardListObjectInspector(returnOI);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import java.lang.reflect.Method;
import java.lang.reflect.ParameterizedType;
import java.lang.reflect.Type;
import java.nio.ByteBuffer;
import java.util.HashMap;

import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
Expand All @@ -41,6 +40,7 @@
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
Expand Down Expand Up @@ -191,7 +191,11 @@ private boolean update(ObjectInspector oi, boolean isUnionAll) throws UDFArgumen
* Returns the ObjectInspector of the return value.
*/
public ObjectInspector get() {
return returnObjectInspector;
return get(PrimitiveObjectInspectorFactory.javaVoidObjectInspector);
}

public ObjectInspector get(ObjectInspector defaultOI) {
return returnObjectInspector != null ? returnObjectInspector : defaultOI;
}

/**
Expand Down
4 changes: 4 additions & 0 deletions ql/src/test/queries/clientpositive/udtf_stack.q
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,7 @@ EXPLAIN SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z', array(4))

SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z') a AS x, y LIMIT 2;
SELECT x, y FROM src LATERAL VIEW STACK(2, 'x', array(1), 'z', array(4)) a AS x, y LIMIT 2;

EXPLAIN
SELECT stack(1, "en", "dbpedia", NULL );
SELECT stack(1, "en", "dbpedia", NULL );
32 changes: 10 additions & 22 deletions ql/src/test/results/clientpositive/annotate_stats_select.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ STAGE PLANS:
alias: alltypes_orc
Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: null (type: string)
expressions: null (type: void)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
ListSink
Expand Down Expand Up @@ -497,33 +497,21 @@ POSTHOOK: query: -- numRows: 2 rawDataSize: 112
explain select cast("1970-12-31 15:59:58.174" as DATE) from alltypes_orc
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-0 is a root stage

STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: alltypes_orc
Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: null (type: void)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink
TableScan
alias: alltypes_orc
Statistics: Num rows: 2 Data size: 1686 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: null (type: void)
outputColumnNames: _col0
Statistics: Num rows: 2 Data size: 0 Basic stats: PARTIAL Column stats: COMPLETE
ListSink

PREHOOK: query: -- numRows: 2 rawDataSize: 224
explain select cast("58.174" as DECIMAL) from alltypes_orc
Expand Down
2 changes: 1 addition & 1 deletion ql/src/test/results/clientpositive/input9.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ STAGE PLANS:
predicate: (null = null) (type: boolean)
Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE
Select Operator
expressions: null (type: string), UDFToInteger(key) (type: int)
expressions: null (type: void), UDFToInteger(key) (type: int)
outputColumnNames: _col0, _col1
Statistics: Num rows: 12 Data size: 91 Basic stats: COMPLETE Column stats: NONE
File Output Operator
Expand Down
48 changes: 48 additions & 0 deletions ql/src/test/results/clientpositive/udtf_stack.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,51 @@ POSTHOOK: Input: default@src
#### A masked pattern was here ####
x [1]
z [4]
PREHOOK: query: EXPLAIN
SELECT stack(1, "en", "dbpedia", NULL )
PREHOOK: type: QUERY
POSTHOOK: query: EXPLAIN
SELECT stack(1, "en", "dbpedia", NULL )
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1

STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: _dummy_table
Row Limit Per Split: 1
Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
Select Operator
expressions: 1 (type: int), 'en' (type: string), 'dbpedia' (type: string), null (type: void)
outputColumnNames: _col0, _col1, _col2, _col3
Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
UDTF Operator
Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
function name: stack
File Output Operator
compressed: false
Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe

Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink

PREHOOK: query: SELECT stack(1, "en", "dbpedia", NULL )
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
POSTHOOK: query: SELECT stack(1, "en", "dbpedia", NULL )
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
#### A masked pattern was here ####
en dbpedia NULL
37 changes: 12 additions & 25 deletions ql/src/test/results/clientpositive/vector_elt.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -91,37 +91,24 @@ SELECT elt(2, 'abc', 'defg'),
FROM alltypesorc LIMIT 1
POSTHOOK: type: QUERY
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1
Stage-0 is a root stage

STAGE PLANS:
Stage: Stage-1
Map Reduce
Map Operator Tree:
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: void), null (type: void)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 12288 Data size: 8687616 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 1
Statistics: Num rows: 1 Data size: 707 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 707 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.TextInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized

Stage: Stage-0
Fetch Operator
limit: 1
Processor Tree:
ListSink
TableScan
alias: alltypesorc
Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: 'defg' (type: string), 'cc' (type: string), 'abc' (type: string), '2' (type: string), '12345' (type: string), '123456789012' (type: string), '1.25' (type: string), '16.0' (type: string), null (type: void), null (type: void)
outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9
Statistics: Num rows: 12288 Data size: 8687616 Basic stats: COMPLETE Column stats: COMPLETE
Limit
Number of rows: 1
Statistics: Num rows: 1 Data size: 707 Basic stats: COMPLETE Column stats: COMPLETE
ListSink

PREHOOK: query: SELECT elt(2, 'abc', 'defg'),
elt(3, 'aa', 'bb', 'cc', 'dd', 'ee', 'ff', 'gg'),
Expand Down

0 comments on commit a4ccee2

Please sign in to comment.