Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions pkg/expression/grouping_sets.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,8 @@ func (gs GroupingSet) AllColIDs() *intset.FastIntSet {
}

// ExtractCols is used to extract basic columns from one grouping set.
func (gs GroupingSet) ExtractCols() []*Column {
cols := make([]*Column, 0, len(gs))
// the param cols is used for reuse the slice.
func (gs GroupingSet) ExtractCols(cols []*Column) []*Column {
for _, groupingExprs := range gs {
for _, one := range groupingExprs {
cols = append(cols, one.(*Column))
Expand Down
63 changes: 63 additions & 0 deletions pkg/expression/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,70 @@ func ExtractCorColumns(expr Expression) (cols []*CorrelatedColumn) {
// To avoid allocation for cols that not need.
func ExtractColumnsFromExpressions(result []*Column, exprs []Expression, filter func(*Column) bool) []*Column {
for _, expr := range exprs {
<<<<<<< HEAD
result = extractColumns(result, expr, filter)
=======
extractColumns(m, expr, filter)
}
result := slices.Collect(maps.Values(m))
// The keys in a map are unordered, so to ensure stability, we need to sort them here.
slices.SortFunc(result, func(a, b *Column) int {
return cmp.Compare(a.UniqueID, b.UniqueID)
})
return result
}

// ExtractColumnsMapFromExpressions it the same as ExtractColumnsFromExpressions, but return a map
func ExtractColumnsMapFromExpressions(filter func(*Column) bool, exprs ...Expression) map[int64]*Column {
if len(exprs) == 0 {
return nil
}
m := make(map[int64]*Column, len(exprs))
for _, expr := range exprs {
extractColumns(m, expr, filter)
}
return m
}

// ExtractColumnsMapFromExpressionsWithReusedMap is the same as ExtractColumnsFromExpressions, but map can be reused.
func ExtractColumnsMapFromExpressionsWithReusedMap(m map[int64]*Column, filter func(*Column) bool, exprs ...Expression) {
if len(exprs) == 0 {
return
}
if m == nil {
m = make(map[int64]*Column, len(exprs))
}
for _, expr := range exprs {
extractColumns(m, expr, filter)
}
}

// ExtractAllColumnsFromExpressionsInUsedSlices is the same as ExtractColumns. but it can reuse the memory.
func ExtractAllColumnsFromExpressionsInUsedSlices(reuse []*Column, filter func(*Column) bool, exprs ...Expression) []*Column {
if len(exprs) == 0 {
return nil
}
for _, expr := range exprs {
reuse = extractColumnsSlices(reuse, expr, filter)
}
slices.SortFunc(reuse, func(a, b *Column) int {
return cmp.Compare(a.UniqueID, b.UniqueID)
})
reuse = slices.CompactFunc(reuse, func(a, b *Column) bool {
return a.UniqueID == b.UniqueID
})
return reuse
}

// ExtractAllColumnsFromExpressions is the same as ExtractColumnsFromExpressions. But this will not remove duplicates.
func ExtractAllColumnsFromExpressions(exprs []Expression, filter func(*Column) bool) []*Column {
if len(exprs) == 0 {
return nil
}
result := make([]*Column, 0, 8)
for _, expr := range exprs {
result = extractColumnsSlices(result, expr, filter)
>>>>>>> 3a54eaa3ffb (planner: fix LogicalProjection.DeriveStats allocate too many memories (#63829))
}
return result
}
Expand Down
8 changes: 8 additions & 0 deletions pkg/planner/core/operator/logicalop/logical_projection.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,17 @@ func (p *LogicalProjection) DeriveStats(childStats []*property.StatsInfo, selfSc
RowCount: childProfile.RowCount,
ColNDVs: make(map[int64]float64, len(p.Exprs)),
})
cols := make([]*expression.Column, 0, 8)
for i, expr := range p.Exprs {
<<<<<<< HEAD
cols := expression.ExtractColumns(expr)
p.StatsInfo().ColNDVs[selfSchema.Columns[i].UniqueID], _ = cardinality.EstimateColsNDVWithMatchedLen(cols, childSchema[0], childProfile)
=======
cols = expression.ExtractAllColumnsFromExpressionsInUsedSlices(cols, nil, expr)
p.StatsInfo().ColNDVs[selfSchema.Columns[i].UniqueID], _ = cardinality.EstimateColsNDVWithMatchedLen(
p.SCtx(), cols, childSchema[0], childProfile)
cols = cols[:0]
>>>>>>> 3a54eaa3ffb (planner: fix LogicalProjection.DeriveStats allocate too many memories (#63829))
}
p.StatsInfo().GroupNDVs = p.getGroupNDVs(colGroups, childProfile, selfSchema)
return p.StatsInfo(), nil
Expand Down
8 changes: 7 additions & 1 deletion pkg/planner/core/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -1941,11 +1941,17 @@ func (p *PhysicalHashAgg) scaleStats4GroupingSets(groupingSets expression.Groupi
}
}
sumNDV := float64(0)
groupingSetCols := make([]*expression.Column, 0, 4)
for _, groupingSet := range groupingSets {
// for every grouping set, pick its cols out, and combine with normal group cols to get the ndv.
groupingSetCols := groupingSet.ExtractCols()
groupingSetCols = groupingSet.ExtractCols(groupingSetCols)
groupingSetCols = append(groupingSetCols, normalGbyCols...)
<<<<<<< HEAD
ndv, _ := cardinality.EstimateColsNDVWithMatchedLen(groupingSetCols, childSchema, childStats)
=======
ndv, _ := cardinality.EstimateColsNDVWithMatchedLen(p.SCtx(), groupingSetCols, childSchema, childStats)
groupingSetCols = groupingSetCols[:0]
>>>>>>> 3a54eaa3ffb (planner: fix LogicalProjection.DeriveStats allocate too many memories (#63829))
sumNDV += ndv
}
// After group operator, all same rows are grouped into one row, that means all
Expand Down