pingcap · ti-chi-bot · Oct 9, 2025
diff --git a/pkg/expression/grouping_sets.go b/pkg/expression/grouping_sets.go
@@ -242,8 +242,8 @@ func (gs GroupingSet) AllColIDs() *intset.FastIntSet {
 }
 
 // ExtractCols is used to extract basic columns from one grouping set.
-func (gs GroupingSet) ExtractCols() []*Column {
-	cols := make([]*Column, 0, len(gs))
+// the param cols is used for reuse the slice.
+func (gs GroupingSet) ExtractCols(cols []*Column) []*Column {
 	for _, groupingExprs := range gs {
 		for _, one := range groupingExprs {
 			cols = append(cols, one.(*Column))

diff --git a/pkg/expression/util.go b/pkg/expression/util.go
@@ -153,7 +153,70 @@ func ExtractCorColumns(expr Expression) (cols []*CorrelatedColumn) {
 // To avoid allocation for cols that not need.
 func ExtractColumnsFromExpressions(result []*Column, exprs []Expression, filter func(*Column) bool) []*Column {
 	for _, expr := range exprs {
+<<<<<<< HEAD
 		result = extractColumns(result, expr, filter)
+=======
+		extractColumns(m, expr, filter)
+	}
+	result := slices.Collect(maps.Values(m))
+	// The keys in a map are unordered, so to ensure stability, we need to sort them here.
+	slices.SortFunc(result, func(a, b *Column) int {
+		return cmp.Compare(a.UniqueID, b.UniqueID)
+	})
+	return result
+}
+
+// ExtractColumnsMapFromExpressions it the same as ExtractColumnsFromExpressions, but return a map
+func ExtractColumnsMapFromExpressions(filter func(*Column) bool, exprs ...Expression) map[int64]*Column {
+	if len(exprs) == 0 {
+		return nil
+	}
+	m := make(map[int64]*Column, len(exprs))
+	for _, expr := range exprs {
+		extractColumns(m, expr, filter)
+	}
+	return m
+}
+
+// ExtractColumnsMapFromExpressionsWithReusedMap is the same as ExtractColumnsFromExpressions, but map can be reused.
+func ExtractColumnsMapFromExpressionsWithReusedMap(m map[int64]*Column, filter func(*Column) bool, exprs ...Expression) {
+	if len(exprs) == 0 {
+		return
+	}
+	if m == nil {
+		m = make(map[int64]*Column, len(exprs))
+	}
+	for _, expr := range exprs {
+		extractColumns(m, expr, filter)
+	}
+}
+
+// ExtractAllColumnsFromExpressionsInUsedSlices is the same as ExtractColumns. but it can reuse the memory.
+func ExtractAllColumnsFromExpressionsInUsedSlices(reuse []*Column, filter func(*Column) bool, exprs ...Expression) []*Column {
+	if len(exprs) == 0 {
+		return nil
+	}
+	for _, expr := range exprs {
+		reuse = extractColumnsSlices(reuse, expr, filter)
+	}
+	slices.SortFunc(reuse, func(a, b *Column) int {
+		return cmp.Compare(a.UniqueID, b.UniqueID)
+	})
+	reuse = slices.CompactFunc(reuse, func(a, b *Column) bool {
+		return a.UniqueID == b.UniqueID
+	})
+	return reuse
+}
+
+// ExtractAllColumnsFromExpressions is the same as ExtractColumnsFromExpressions. But this will not remove duplicates.
+func ExtractAllColumnsFromExpressions(exprs []Expression, filter func(*Column) bool) []*Column {
+	if len(exprs) == 0 {
+		return nil
+	}
+	result := make([]*Column, 0, 8)
+	for _, expr := range exprs {
+		result = extractColumnsSlices(result, expr, filter)
+>>>>>>> 3a54eaa3ffb (planner: fix LogicalProjection.DeriveStats allocate too many memories (#63829))
 	}
 	return result
 }

diff --git a/pkg/planner/core/operator/logicalop/logical_projection.go b/pkg/planner/core/operator/logicalop/logical_projection.go
@@ -333,9 +333,17 @@ func (p *LogicalProjection) DeriveStats(childStats []*property.StatsInfo, selfSc
 		RowCount: childProfile.RowCount,
 		ColNDVs:  make(map[int64]float64, len(p.Exprs)),
 	})
+	cols := make([]*expression.Column, 0, 8)
 	for i, expr := range p.Exprs {
+<<<<<<< HEAD
 		cols := expression.ExtractColumns(expr)
 		p.StatsInfo().ColNDVs[selfSchema.Columns[i].UniqueID], _ = cardinality.EstimateColsNDVWithMatchedLen(cols, childSchema[0], childProfile)
+=======
+		cols = expression.ExtractAllColumnsFromExpressionsInUsedSlices(cols, nil, expr)
+		p.StatsInfo().ColNDVs[selfSchema.Columns[i].UniqueID], _ = cardinality.EstimateColsNDVWithMatchedLen(
+			p.SCtx(), cols, childSchema[0], childProfile)
+		cols = cols[:0]
+>>>>>>> 3a54eaa3ffb (planner: fix LogicalProjection.DeriveStats allocate too many memories (#63829))
 	}
 	p.StatsInfo().GroupNDVs = p.getGroupNDVs(colGroups, childProfile, selfSchema)
 	return p.StatsInfo(), nil

diff --git a/pkg/planner/core/task.go b/pkg/planner/core/task.go
@@ -1941,11 +1941,17 @@ func (p *PhysicalHashAgg) scaleStats4GroupingSets(groupingSets expression.Groupi
 		}
 	}
 	sumNDV := float64(0)
+	groupingSetCols := make([]*expression.Column, 0, 4)
 	for _, groupingSet := range groupingSets {
 		// for every grouping set, pick its cols out, and combine with normal group cols to get the ndv.
-		groupingSetCols := groupingSet.ExtractCols()
+		groupingSetCols = groupingSet.ExtractCols(groupingSetCols)
 		groupingSetCols = append(groupingSetCols, normalGbyCols...)
+<<<<<<< HEAD
 		ndv, _ := cardinality.EstimateColsNDVWithMatchedLen(groupingSetCols, childSchema, childStats)
+=======
+		ndv, _ := cardinality.EstimateColsNDVWithMatchedLen(p.SCtx(), groupingSetCols, childSchema, childStats)
+		groupingSetCols = groupingSetCols[:0]
+>>>>>>> 3a54eaa3ffb (planner: fix LogicalProjection.DeriveStats allocate too many memories (#63829))
 		sumNDV += ndv
 	}
 	// After group operator, all same rows are grouped into one row, that means all