Skip to content

Commit

Permalink
Merge pull request #47775 from vvoland/v26.1-47771
Browse files Browse the repository at this point in the history
[26.1 backport] Option to avoid deleting the kernel_ll address from bridges.
  • Loading branch information
vvoland committed Apr 30, 2024
2 parents 9a2b531 + 2f5bbbe commit ac2de55
Show file tree
Hide file tree
Showing 3 changed files with 64 additions and 34 deletions.
63 changes: 39 additions & 24 deletions integration/networking/bridge_test.go
Expand Up @@ -469,7 +469,6 @@ func TestDefaultBridgeAddresses(t *testing.T) {
skip.If(t, testEnv.DaemonInfo.OSType == "windows")

ctx := setupTest(t)
d := daemon.New(t)

type testStep struct {
stepName string
Expand All @@ -487,21 +486,21 @@ func TestDefaultBridgeAddresses(t *testing.T) {
{
stepName: "Set up initial UL prefix",
fixedCIDRV6: "fd1c:f1a0:5d8d:aaaa::/64",
expAddrs: []string{"fd1c:f1a0:5d8d:aaaa::1/64", "fe80::1/64"},
expAddrs: []string{"fd1c:f1a0:5d8d:aaaa::1/64", "fe80::"},
},
{
// Modify that prefix, the default bridge's address must be deleted and re-added.
stepName: "Modify UL prefix - address change",
fixedCIDRV6: "fd1c:f1a0:5d8d:bbbb::/64",
expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::1/64"},
expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::"},
},
{
// Modify the prefix length, the default bridge's address should not change.
stepName: "Modify UL prefix - no address change",
fixedCIDRV6: "fd1c:f1a0:5d8d:bbbb::/80",
// The prefix length displayed by 'ip a' is not updated - it's informational, and
// can't be changed without unnecessarily deleting and re-adding the address.
expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::1/64"},
expAddrs: []string{"fd1c:f1a0:5d8d:bbbb::1/64", "fe80::"},
},
},
},
Expand All @@ -511,47 +510,63 @@ func TestDefaultBridgeAddresses(t *testing.T) {
{
stepName: "Standard LL subnet prefix",
fixedCIDRV6: "fe80::/64",
expAddrs: []string{"fe80::1/64"},
expAddrs: []string{"fe80::"},
},
{
// Modify that prefix, the default bridge's address must be deleted and re-added.
// The bridge must still have an address in the required (standard) LL subnet.
stepName: "Nonstandard LL prefix - address change",
fixedCIDRV6: "fe80:1234::/32",
expAddrs: []string{"fe80:1234::1/32", "fe80::1/64"},
expAddrs: []string{"fe80:1234::1/32", "fe80::"},
},
{
// Modify the prefix length, the addresses should not change.
stepName: "Modify LL prefix - no address change",
fixedCIDRV6: "fe80:1234::/64",
// The prefix length displayed by 'ip a' is not updated - it's informational, and
// can't be changed without unnecessarily deleting and re-adding the address.
expAddrs: []string{"fe80:1234::1/", "fe80::1/64"},
expAddrs: []string{"fe80:1234::1/", "fe80::"},
},
},
},
}

for _, tc := range testcases {
t.Run(tc.name, func(t *testing.T) {
for _, preserveKernelLL := range []bool{false, true} {
var dopts []daemon.Option
if preserveKernelLL {
dopts = append(dopts, daemon.WithEnvVars("DOCKER_BRIDGE_PRESERVE_KERNEL_LL=1"))
}
d := daemon.New(t, dopts...)
c := d.NewClientT(t)

for _, tc := range testcases {
for _, step := range tc.steps {
// Check that the daemon starts - regression test for:
// https://github.com/moby/moby/issues/46829
d.Start(t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6)
d.Stop(t)

// Check that the expected addresses have been applied to the bridge. (Skip in
// rootless mode, because the bridge is in a different network namespace.)
if !testEnv.IsRootless() {
res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0")
assert.Equal(t, res.ExitCode, 0, step.stepName)
stdout := res.Stdout()
for _, expAddr := range step.expAddrs {
assert.Check(t, is.Contains(stdout, expAddr))
tcName := fmt.Sprintf("kernel_ll_%v/%s/%s", preserveKernelLL, tc.name, step.stepName)
t.Run(tcName, func(t *testing.T) {
ctx := testutil.StartSpan(ctx, t)
// Check that the daemon starts - regression test for:
// https://github.com/moby/moby/issues/46829
d.StartWithBusybox(ctx, t, "--experimental", "--ipv6", "--ip6tables", "--fixed-cidr-v6="+step.fixedCIDRV6)

// Start a container, so that the bridge is set "up" and gets a kernel_ll address.
cID := container.Run(ctx, t, c)
defer c.ContainerRemove(ctx, cID, containertypes.RemoveOptions{Force: true})

d.Stop(t)

// Check that the expected addresses have been applied to the bridge. (Skip in
// rootless mode, because the bridge is in a different network namespace.)
if !testEnv.IsRootless() {
res := testutil.RunCommand(ctx, "ip", "-6", "addr", "show", "docker0")
assert.Equal(t, res.ExitCode, 0, step.stepName)
stdout := res.Stdout()
for _, expAddr := range step.expAddrs {
assert.Check(t, is.Contains(stdout, expAddr))
}
}
}
})
}
})
}
}
}

Expand Down
31 changes: 21 additions & 10 deletions libnetwork/drivers/bridge/interface_linux.go
Expand Up @@ -5,6 +5,7 @@ import (
"fmt"
"net"
"net/netip"
"os"

"github.com/containerd/log"
"github.com/docker/docker/errdefs"
Expand Down Expand Up @@ -73,18 +74,20 @@ func (i *bridgeInterface) addresses(family int) ([]netlink.Addr, error) {
func getRequiredIPv6Addrs(config *networkConfiguration) (requiredAddrs map[netip.Addr]netip.Prefix, err error) {
requiredAddrs = make(map[netip.Addr]netip.Prefix)

// Always give the bridge 'fe80::1' - every interface is required to have an
// address in 'fe80::/64'. Linux may assign an address, but we'll replace it with
// 'fe80::1'. Then, if the configured prefix is 'fe80::/64', the IPAM pool
// assigned address will not be a second address in the LL subnet.
ra, ok := netiputil.ToPrefix(bridgeIPv6)
if !ok {
err = fmt.Errorf("Failed to convert Link-Local IPv6 address to netip.Prefix")
return nil, err
if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") != "1" {
// Always give the bridge 'fe80::1' - every interface is required to have an
// address in 'fe80::/64'. Linux may assign an address, but we'll replace it with
// 'fe80::1'. Then, if the configured prefix is 'fe80::/64', the IPAM pool
// assigned address will not be a second address in the LL subnet.
ra, ok := netiputil.ToPrefix(bridgeIPv6)
if !ok {
err = fmt.Errorf("Failed to convert Link-Local IPv6 address to netip.Prefix")
return nil, err
}
requiredAddrs[ra.Addr()] = ra
}
requiredAddrs[ra.Addr()] = ra

ra, ok = netiputil.ToPrefix(config.AddressIPv6)
ra, ok := netiputil.ToPrefix(config.AddressIPv6)
if !ok {
err = fmt.Errorf("failed to convert bridge IPv6 address '%s' to netip.Prefix", config.AddressIPv6.String())
return nil, err
Expand Down Expand Up @@ -116,6 +119,14 @@ func (i *bridgeInterface) programIPv6Addresses(config *networkConfiguration) err
if !ok {
return errdefs.System(fmt.Errorf("Failed to convert IPv6 address '%s' to netip.Addr", config.AddressIPv6))
}
// Optionally, avoid deleting the kernel-assigned link local address.
// (Don't delete fe80::1 either - if it was previously assigned to the bridge, and the
// kernel_ll address was deleted, the bridge won't get a new kernel_ll address.)
if os.Getenv("DOCKER_BRIDGE_PRESERVE_KERNEL_LL") == "1" {
if p, _ := ea.Prefix(64); p == linkLocalPrefix {
continue
}
}
// Ignore the prefix length when comparing addresses, it's informational
// (RFC-5942 section 4), and removing/re-adding an address that's still valid
// would disrupt traffic on live-restore.
Expand Down
4 changes: 4 additions & 0 deletions libnetwork/drivers/bridge/setup_ipv6_linux.go
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"net"
"net/netip"
"os"

"github.com/containerd/log"
Expand All @@ -13,6 +14,9 @@ import (
// bridgeIPv6 is the default, link-local IPv6 address for the bridge (fe80::1/64)
var bridgeIPv6 = &net.IPNet{IP: net.ParseIP("fe80::1"), Mask: net.CIDRMask(64, 128)}

// Standard link local prefix
var linkLocalPrefix = netip.MustParsePrefix("fe80::/64")

const (
ipv6ForwardConfPerm = 0o644
ipv6ForwardConfDefault = "/proc/sys/net/ipv6/conf/default/forwarding"
Expand Down

0 comments on commit ac2de55

Please sign in to comment.