Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kubelet: consider real CPU topology when check Core/Socket free #124561

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 2 additions & 2 deletions pkg/kubelet/cm/cpumanager/cpu_assignment.go
Expand Up @@ -251,13 +251,13 @@ func (a *cpuAccumulator) isNUMANodeFree(numaID int) bool {
// Returns true if the supplied socket is fully available in `a.details`.
// "fully available" means that all the CPUs in it are free.
func (a *cpuAccumulator) isSocketFree(socketID int) bool {
return a.details.CPUsInSockets(socketID).Size() == a.topo.CPUsPerSocket()
return a.details.CPUsInSockets(socketID).Size() == a.topo.CPUDetails.CPUsInSockets(socketID).Size()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if CPUsPerSocket is no longer used (it should be the case) let's delete it

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done it

}

// Returns true if the supplied core is fully available in `a.details`.
// "fully available" means that all the CPUs in it are free.
func (a *cpuAccumulator) isCoreFree(coreID int) bool {
return a.details.CPUsInCores(coreID).Size() == a.topo.CPUsPerCore()
return a.details.CPUsInCores(coreID).Size() == a.topo.CPUDetails.CPUsInCores(coreID).Size()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

considering the issue #124066 as example, with this change the offlined core (id=25) won't be used, right?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. Yes, the CPU(id=25) would not be used by container allocation because Core(id=25) has already allocated CPU(id=25) in previous Reserved Core allocation process. The buggy part is in acc.takeFullCores invoked by takeByTopologyNUMAPacked during container allocation. It thought Core(id=25) was free before this fix and used CPU(id=25),CPU(id=73) directly by refering raw Topo.
  2. the core containing offline CPU(id=95) is not id=25, but id=47. it just affects isCoreFree. the offlined CPU(id=95) is never used because it does not appears in Topo(discovered by advisor) anyway.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok, thanks

}

// Returns free NUMA Node IDs as a slice sorted by sortAvailableNUMANodes().
Expand Down
66 changes: 65 additions & 1 deletion pkg/kubelet/cm/cpumanager/cpu_assignment_test.go
Expand Up @@ -110,6 +110,18 @@ func TestCPUAccumulatorFreeSockets(t *testing.T) {
mustParseCPUSet(t, "0-40,42-49,51-68,71-79"),
[]int{},
},
{
"multi numa, dual socket, HT, last CPU offline, socket-0 free",
topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline,
mustParseCPUSet(t, "0-63,128-191"),
[]int{0},
},
{
"multi numa, dual socket, HT, last CPU offline, socket-1 free",
topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline,
mustParseCPUSet(t, "64-127,192-254"),
[]int{1},
},
}

for _, tc := range testCases {
Expand All @@ -119,7 +131,6 @@ func TestCPUAccumulatorFreeSockets(t *testing.T) {
sort.Ints(result)
if !reflect.DeepEqual(result, tc.expect) {
t.Errorf("expected %v to equal %v", result, tc.expect)

}
})
}
Expand Down Expand Up @@ -210,6 +221,24 @@ func TestCPUAccumulatorFreeNUMANodes(t *testing.T) {
mustParseCPUSet(t, "0-9,11-59,61-79"),
[]int{},
},
{
"multi numa, dual socket, HT, last CPU offline, NUMA node-0 free",
topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline,
mustParseCPUSet(t, "0-15,128-143"),
[]int{0},
},
{
"multi numa, dual socket, HT, last CPU offline, NUMA node-7 free",
topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline,
mustParseCPUSet(t, "112-127,240-254"),
[]int{7},
},
{
"multi numa, dual socket, HT, last CPU offline, 0 NUMA nodes free(1 CPU consumed)",
topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline,
mustParseCPUSet(t, "0-15,128-142"),
[]int{},
},
}

for _, tc := range testCases {
Expand Down Expand Up @@ -331,6 +360,18 @@ func TestCPUAccumulatorFreeCores(t *testing.T) {
cpuset.New(2, 3, 4, 5, 8, 9, 10, 11),
[]int{2, 4, 3, 5},
},
{
"multi numa, dual socket, HT, last CPU offline, 0 cores free (1 partially consumed)",
topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline,
mustParseCPUSet(t, "0"),
[]int{},
},
{
"multi numa, dual socket, HT, last CPU offline, 1 cores free (1 online CPU, 1 offline CPU)",
topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline,
mustParseCPUSet(t, "127"),
[]int{127},
},
}

for _, tc := range testCases {
Expand Down Expand Up @@ -854,6 +895,29 @@ func TestTakeByTopologyNUMADistributed(t *testing.T) {
"",
mustParseCPUSet(t, "43-47,75-79,96,101-105,171-174,203-206,229-232"),
},
// this case demonstrates that the policy does not guarantee Full Physical CPUs yet. Once the behavior changes, it can be deleted
// the provided available Numas are:
// - 0,1 both aliged 5 Cores, and 2 single CPUs within 2 Cores
// - 2,3 both aliged 6 Cores
// the result chooses all 24 CPUs in Numa 0,1, but Numa 2,3 are better
{
"allocate 10 full cores distributed across first 2 NUMA nodes and 4 CPUs spilling over to each of NUMA 0,1",
topoDualSocketMultiNumaPerSocketHTLarge,
mustParseCPUSet(t, "0-5,129-134,16-21,144-149,32-37,160-165,48-53,176-181"),
24,
2,
"",
mustParseCPUSet(t, "0-5,16-21,129-134,144-149"),
},
{
"allocate 10 full cores distributed across first 2 NUMA nodes and 4 CPUs spilling over to each of NUMA 0,1(CPU 255 offline)",
topoDualSocketMultiNumaPerSocketHTLargeWithSingleCPUOffline,
mustParseCPUSet(t, "0-5,129-134,16-21,144-149,32-37,160-165,48-53,176-181"),
24,
2,
"",
mustParseCPUSet(t, "0-5,16-21,129-134,144-149"),
},
}...)

for _, tc := range testCases {
Expand Down
4 changes: 2 additions & 2 deletions pkg/kubelet/cm/cpumanager/policy_static.go
Expand Up @@ -310,7 +310,7 @@ func (p *staticPolicy) Allocate(s state.State, pod *v1.Pod, container *v1.Contai
}()

if p.options.FullPhysicalCPUsOnly {
CPUsPerCore := p.topology.CPUsPerCore()
CPUsPerCore := p.topology.MaxCPUsPerCore()
if (numCPUs % CPUsPerCore) != 0 {
// Since CPU Manager has been enabled requesting strict SMT alignment, it means a guaranteed pod can only be admitted
// if the CPU requested is a multiple of the number of virtual cpus per physical cores.
Expand Down Expand Up @@ -486,7 +486,7 @@ func (p *staticPolicy) takeByTopology(availableCPUs cpuset.CPUSet, numCPUs int)
if p.options.DistributeCPUsAcrossNUMA {
cpuGroupSize := 1
if p.options.FullPhysicalCPUsOnly {
cpuGroupSize = p.topology.CPUsPerCore()
cpuGroupSize = p.topology.MaxCPUsPerCore()
}
return takeByTopologyNUMADistributed(p.topology, availableCPUs, numCPUs, cpuGroupSize)
}
Expand Down