Skip to content

Commit

Permalink
Skip CXformPushGbBelowSetOp when arity > threshold
Browse files Browse the repository at this point in the history
In case of queries containing a UNION over a large number of children,
CXformPushGbBelowSetOp pushes the Group By op to each child, creating
that many new groups in the memo. While not a problem by itself, it
exponentially increases the number of bindings for other xforms that use
CPatternTree to extract it. When extracting the bindings, each child of
UNION can now be selected with and without the group by. The higher the
number of such children, the more the number of possibilities, the
higher the optimization time.

An example of xform that is affected: CXformLeftSemiApply2LeftSemiJoin.

To prevent this exponential increase in binding extraction, restrict
when the CXformPushGbBelowSetOp may be executed by a configurable
parameter.
  • Loading branch information
Shreedhar Hardikar authored and hardikar committed Jun 14, 2019
1 parent 9ca4827 commit 80b9608
Show file tree
Hide file tree
Showing 9 changed files with 1,095 additions and 9 deletions.
4 changes: 2 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ project(gpopt LANGUAGES CXX C)

set(CMAKE_CXX_STANDARD 98)
set(GPORCA_VERSION_MAJOR 3)
set(GPORCA_VERSION_MINOR 48)
set(GPORCA_VERSION_PATCH 1)
set(GPORCA_VERSION_MINOR 49)
set(GPORCA_VERSION_PATCH 0)
set(GPORCA_VERSION_STRING "${GPORCA_VERSION_MAJOR}.${GPORCA_VERSION_MINOR}.${GPORCA_VERSION_PATCH}")

# Whenever an ABI-breaking change is made to GPORCA, this should be incremented.
Expand Down
1,061 changes: 1,061 additions & 0 deletions data/dxl/minidump/ManyTextUnionsInSubquery.mdp

Large diffs are not rendered by default.

19 changes: 16 additions & 3 deletions libgpopt/include/gpopt/engine/CHint.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

#define JOIN_ORDER_DP_THRESHOLD ULONG(10)
#define BROADCAST_THRESHOLD ULONG(10000000)
#define PUSH_GROUP_BY_BELOW_SETOP_THRESHOLD ULONG(10)


namespace gpopt
{
Expand Down Expand Up @@ -47,6 +49,8 @@ namespace gpopt

BOOL m_fEnforceConstraintsOnDML;

ULONG m_ulPushGroupByBelowSetopThreshold;

// private copy ctor
CHint(const CHint &);

Expand All @@ -60,15 +64,17 @@ namespace gpopt
ULONG array_expansion_threshold,
ULONG ulJoinOrderDPLimit,
ULONG broadcast_threshold,
BOOL enforce_constraint_on_dml
BOOL enforce_constraint_on_dml,
ULONG push_group_by_below_setop_threshold
)
:
m_ulMinNumOfPartsToRequireSortOnInsert(min_num_of_parts_to_require_sort_on_insert),
m_ulJoinArityForAssociativityCommutativity(join_arity_for_associativity_commutativity),
m_ulArrayExpansionThreshold(array_expansion_threshold),
m_ulJoinOrderDPLimit(ulJoinOrderDPLimit),
m_ulBroadcastThreshold(broadcast_threshold),
m_fEnforceConstraintsOnDML(enforce_constraint_on_dml)
m_fEnforceConstraintsOnDML(enforce_constraint_on_dml),
m_ulPushGroupByBelowSetopThreshold(push_group_by_below_setop_threshold)
{
}

Expand Down Expand Up @@ -121,6 +127,12 @@ namespace gpopt
return m_fEnforceConstraintsOnDML;
}

// Skip CXformPushGbBelowSetOp if set op arity is greater than this
ULONG UlPushGroupByBelowSetopThreshold() const
{
return m_ulPushGroupByBelowSetopThreshold;
}

// generate default hint configurations, which disables sort during insert on
// append only row-oriented partitioned tables by default
static
Expand All @@ -132,7 +144,8 @@ namespace gpopt
gpos::int_max, /* array_expansion_threshold */
JOIN_ORDER_DP_THRESHOLD, /*ulJoinOrderDPLimit*/
BROADCAST_THRESHOLD, /*broadcast_threshold*/
true /* enforce_constraint_on_dml */
true, /* enforce_constraint_on_dml */
PUSH_GROUP_BY_BELOW_SETOP_THRESHOLD /* push_group_by_below_setop_threshold */
);
}

Expand Down
9 changes: 8 additions & 1 deletion libgpopt/include/gpopt/xforms/CXformPushGbBelowSetOp.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "gpos/base.h"
#include "gpopt/operators/ops.h"
#include "gpopt/xforms/CXformExploration.h"
#include "gpopt/optimizer/COptimizerConfig.h"

namespace gpopt
{
Expand Down Expand Up @@ -92,9 +93,15 @@ namespace gpopt
GPOS_ASSERT(FCheckPattern(pexpr));

CMemoryPool *mp = pxfctxt->Pmp();
COptimizerConfig *optconfig = COptCtxt::PoctxtFromTLS()->GetOptimizerConfig();

CExpression *pexprSetOp = (*pexpr)[0];
CExpression *pexprPrjList = (*pexpr)[1];
if (pexprSetOp->Arity() > optconfig->GetHint()->UlPushGroupByBelowSetopThreshold())
{
// bail-out if set op has many children
return;
}
CExpression *pexprPrjList = (*pexpr)[1];
if (0 < pexprPrjList->Arity())
{
// bail-out if group-by has any aggregate functions
Expand Down
1 change: 1 addition & 0 deletions libgpopt/src/optimizer/COptimizerConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ COptimizerConfig::Serialize(CMemoryPool *mp, CXMLSerializer *xml_serializer, CBi
xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenJoinOrderDPThreshold), m_hint->UlJoinOrderDPLimit());
xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenBroadcastThreshold), m_hint->UlBroadcastThreshold());
xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenEnforceConstraintsOnDML), m_hint->FEnforceConstraintsOnDML());
xml_serializer->AddAttribute(CDXLTokens::GetDXLTokenStr(EdxltokenPushGroupByBelowSetopThreshold), m_hint->UlPushGroupByBelowSetopThreshold());
xml_serializer->CloseElement(CDXLTokens::GetDXLTokenStr(EdxltokenNamespacePrefix), CDXLTokens::GetDXLTokenStr(EdxltokenHint));

// Serialize traceflags represented in bitset into stream
Expand Down
1 change: 1 addition & 0 deletions libnaucrates/include/naucrates/dxl/xml/dxltokens.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ namespace gpdxl
EdxltokenJoinOrderDPThreshold,
EdxltokenBroadcastThreshold,
EdxltokenEnforceConstraintsOnDML,
EdxltokenPushGroupByBelowSetopThreshold,
EdxltokenWindowOids,
EdxltokenOidRowNumber,
EdxltokenOidRank,
Expand Down
6 changes: 4 additions & 2 deletions libnaucrates/src/parser/CParseHandlerHint.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,8 @@ CParseHandlerHint::StartElement
ULONG array_expansion_threshold = CDXLOperatorFactory::ExtractConvertAttrValueToUlong(m_parse_handler_mgr->GetDXLMemoryManager(), attrs, EdxltokenArrayExpansionThreshold, EdxltokenHint, true, gpos::int_max);
ULONG join_order_dp_threshold = CDXLOperatorFactory::ExtractConvertAttrValueToUlong(m_parse_handler_mgr->GetDXLMemoryManager(), attrs, EdxltokenJoinOrderDPThreshold, EdxltokenHint, true, JOIN_ORDER_DP_THRESHOLD);
ULONG broadcast_threshold = CDXLOperatorFactory::ExtractConvertAttrValueToUlong(m_parse_handler_mgr->GetDXLMemoryManager(), attrs, EdxltokenBroadcastThreshold, EdxltokenHint, true, BROADCAST_THRESHOLD);
ULONG enforce_constraint_on_dml = CDXLOperatorFactory::ExtractConvertAttrValueToBool(m_parse_handler_mgr->GetDXLMemoryManager(), attrs, EdxltokenEnforceConstraintsOnDML, EdxltokenHint, true, true);
BOOL enforce_constraint_on_dml = CDXLOperatorFactory::ExtractConvertAttrValueToBool(m_parse_handler_mgr->GetDXLMemoryManager(), attrs, EdxltokenEnforceConstraintsOnDML, EdxltokenHint, true, true);
ULONG push_group_by_below_setop_threshold = CDXLOperatorFactory::ExtractConvertAttrValueToUlong(m_parse_handler_mgr->GetDXLMemoryManager(), attrs, EdxltokenPushGroupByBelowSetopThreshold, EdxltokenHint, true, PUSH_GROUP_BY_BELOW_SETOP_THRESHOLD);

m_hint = GPOS_NEW(m_mp) CHint
(
Expand All @@ -96,7 +97,8 @@ CParseHandlerHint::StartElement
array_expansion_threshold,
join_order_dp_threshold,
broadcast_threshold,
enforce_constraint_on_dml
enforce_constraint_on_dml,
push_group_by_below_setop_threshold
);
}

Expand Down
1 change: 1 addition & 0 deletions libnaucrates/src/xml/dxltokens.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ CDXLTokens::Init
{EdxltokenJoinOrderDPThreshold, GPOS_WSZ_LIT("JoinOrderDynamicProgThreshold")},
{EdxltokenBroadcastThreshold, GPOS_WSZ_LIT("BroadcastThreshold")},
{EdxltokenEnforceConstraintsOnDML, GPOS_WSZ_LIT("EnforceConstraintsOnDML")},
{EdxltokenPushGroupByBelowSetopThreshold, GPOS_WSZ_LIT("PushGroupByBelowSetopThreshold")},
{EdxltokenWindowOids, GPOS_WSZ_LIT("WindowOids")},
{EdxltokenOidRowNumber, GPOS_WSZ_LIT("RowNumber")},
{EdxltokenOidRank, GPOS_WSZ_LIT("Rank")},
Expand Down
2 changes: 1 addition & 1 deletion server/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ PartTbl-WindowFuncNoDisjunctPredPushDown PartTbl-WindowFuncSinglePredPushDown;
CSetop1Test:
ValueScanWithDuplicateAndSelfComparison PushGbBelowNaryUnionAll
PushGbBelowNaryUnion-1 PushGbBelowNaryUnion-2 MS-UnionAll-1
MS-UnionAll-5 MS-UnionAll-6 MS-UnionAll-7;
MS-UnionAll-5 MS-UnionAll-6 MS-UnionAll-7 ManyTextUnionsInSubquery;
CSetop2Test:
Cascaded-UnionAll-Same-Cols-Order Cascaded-UnionAll-Differing-Cols
Expand Down

0 comments on commit 80b9608

Please sign in to comment.