Skip to content

Commit d004d39

Browse files
committed
Add e2e downgrade automatic cancellation test
Verify that the downgrade can be cancelled automatically when the downgrade is completed (using `no inflight downgrade job`` as the indicator) Please see: #19365 (comment) Reference: #17976 Signed-off-by: Chun-Hung Tseng <[email protected]>
1 parent 9f1709e commit d004d39

File tree

2 files changed

+110
-10
lines changed

2 files changed

+110
-10
lines changed

tests/e2e/cluster_downgrade_test.go

Lines changed: 85 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
127127
})
128128
}
129129
cc := epc.Etcdctl()
130-
t.Logf("Cluster created")
130+
t.Log("Cluster created")
131131
if len(epc.Procs) > 1 {
132132
t.Log("Waiting health interval to required to make membership changes")
133133
time.Sleep(etcdserver.HealthInterval)
@@ -140,7 +140,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
140140
resp, err := cc.MemberAddAsLearner(context.Background(), "fake1", []string{"http://127.0.0.1:1001"})
141141
require.NoError(t, err)
142142
if triggerSnapshot {
143-
t.Logf("Generating snapshot")
143+
t.Log("Generating snapshot")
144144
generateSnapshot(t, snapshotCount, cc)
145145
verifySnapshot(t, epc)
146146
}
@@ -150,7 +150,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
150150
beforeMembers, beforeKV := getMembersAndKeys(t, cc)
151151

152152
if triggerCancellation == cancelRightBeforeEnable {
153-
t.Logf("Cancelling downgrade before enabling")
153+
t.Log("Cancelling downgrade before enabling")
154154
e2e.DowngradeCancel(t, epc)
155155
t.Log("Downgrade cancelled, validating if cluster is in the right state")
156156
e2e.ValidateMemberVersions(t, epc, generateIdenticalVersions(clusterSize, currentVersion))
@@ -163,7 +163,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
163163
e2e.ValidateDowngradeInfo(t, epc, &pb.DowngradeInfo{Enabled: true, TargetVersion: lastClusterVersion.String()})
164164

165165
if triggerCancellation == cancelRightAfterEnable {
166-
t.Logf("Cancelling downgrade right after enabling (no node is downgraded yet)")
166+
t.Log("Cancelling downgrade right after enabling (no node is downgraded yet)")
167167
e2e.DowngradeCancel(t, epc)
168168
t.Log("Downgrade cancelled, validating if cluster is in the right state")
169169
e2e.ValidateMemberVersions(t, epc, generateIdenticalVersions(clusterSize, currentVersion))
@@ -200,7 +200,7 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
200200
resp, err = cc.MemberAddAsLearner(context.Background(), "fake2", []string{"http://127.0.0.1:1002"})
201201
require.NoError(t, err)
202202
if triggerSnapshot {
203-
t.Logf("Generating snapshot")
203+
t.Log("Generating snapshot")
204204
generateSnapshot(t, snapshotCount, cc)
205205
verifySnapshot(t, epc)
206206
}
@@ -228,6 +228,77 @@ func testDowngradeUpgrade(t *testing.T, numberOfMembersToDowngrade int, clusterS
228228
assert.Equal(t, beforeMembers.Members, afterMembers.Members)
229229
}
230230

231+
func TestDowngradeAutoCancelAfterCompletion(t *testing.T) {
232+
clusterSize := 3
233+
234+
currentEtcdBinary := e2e.BinPath.Etcd
235+
lastReleaseBinary := e2e.BinPath.EtcdLastRelease
236+
if !fileutil.Exist(lastReleaseBinary) {
237+
t.Skipf("%q does not exist", lastReleaseBinary)
238+
}
239+
240+
currentVersion, err := e2e.GetVersionFromBinary(currentEtcdBinary)
241+
require.NoError(t, err)
242+
// wipe any pre-release suffix like -alpha.0 we see commonly in builds
243+
currentVersion.PreRelease = ""
244+
245+
lastVersion, err := e2e.GetVersionFromBinary(lastReleaseBinary)
246+
require.NoError(t, err)
247+
248+
require.Equalf(t, lastVersion.Minor, currentVersion.Minor-1, "unexpected minor version difference")
249+
currentVersionStr := currentVersion.String()
250+
lastVersionStr := lastVersion.String()
251+
252+
lastClusterVersion := semver.New(lastVersionStr)
253+
lastClusterVersion.Patch = 0
254+
255+
e2e.BeforeTest(t)
256+
257+
t.Logf("Create cluster with version %s", currentVersionStr)
258+
var snapshotCount uint64 = 10
259+
epc := newCluster(t, clusterSize, snapshotCount)
260+
for i := 0; i < len(epc.Procs); i++ {
261+
e2e.ValidateVersion(t, epc.Cfg, epc.Procs[i], version.Versions{
262+
Cluster: currentVersionStr,
263+
Server: version.Version,
264+
Storage: currentVersionStr,
265+
})
266+
}
267+
cc := epc.Etcdctl()
268+
t.Log("Cluster created")
269+
if len(epc.Procs) > 1 {
270+
t.Log("Waiting health interval to required to make membership changes")
271+
time.Sleep(etcdserver.HealthInterval)
272+
}
273+
274+
t.Log("Adding member to test membership, but a learner avoid breaking quorum")
275+
resp, err := cc.MemberAddAsLearner(context.Background(), "fake1", []string{"http://127.0.0.1:1001"})
276+
require.NoError(t, err)
277+
t.Log("Removing learner to test membership")
278+
_, err = cc.MemberRemove(context.Background(), resp.Member.ID)
279+
require.NoError(t, err)
280+
beforeMembers, beforeKV := getMembersAndKeys(t, cc)
281+
282+
e2e.DowngradeEnable(t, epc, lastVersion)
283+
284+
t.Logf("Starting downgrade process for all nodes to %q", lastVersionStr)
285+
err = e2e.DowngradeUpgradeMembersByID(t, nil, epc, []int{0, 1, 2}, true, currentVersion, lastClusterVersion)
286+
require.NoError(t, err)
287+
288+
afterMembers, afterKV := getMembersAndKeys(t, cc)
289+
assert.Equal(t, beforeKV.Kvs, afterKV.Kvs)
290+
assert.Equal(t, beforeMembers.Members, afterMembers.Members)
291+
292+
if len(epc.Procs) > 1 {
293+
t.Log("Waiting health interval to required to make membership changes")
294+
time.Sleep(etcdserver.HealthInterval)
295+
}
296+
297+
e2e.DowngradeAutoCancelCheck(t, epc)
298+
t.Log("Downgrade cancellation is automatically cancelled since the cluster has been downgraded, validating if cluster is in the right state")
299+
e2e.ValidateMemberVersions(t, epc, generateIdenticalVersions(clusterSize, lastClusterVersion))
300+
}
301+
231302
func newCluster(t *testing.T, clusterSize int, snapshotCount uint64) *e2e.EtcdProcessCluster {
232303
epc, err := e2e.NewEtcdProcessCluster(context.TODO(), t,
233304
e2e.WithClusterSize(clusterSize),
@@ -250,7 +321,7 @@ func generateSnapshot(t *testing.T, snapshotCount uint64, cc *e2e.EtcdctlV3) {
250321
defer cancel()
251322

252323
var i uint64
253-
t.Logf("Adding keys")
324+
t.Log("Adding keys")
254325
for i = 0; i < snapshotCount*3; i++ {
255326
err := cc.Put(ctx, fmt.Sprintf("%d", i), "1", config.PutOptions{})
256327
assert.NoError(t, err)
@@ -264,7 +335,7 @@ func verifySnapshot(t *testing.T, epc *e2e.EtcdProcessCluster) {
264335
_, err := ss.Load()
265336
require.NoError(t, err)
266337
}
267-
t.Logf("All members have a valid snapshot")
338+
t.Log("All members have a valid snapshot")
268339
}
269340

270341
func verifySnapshotMembers(t *testing.T, epc *e2e.EtcdProcessCluster, expectedMembers *clientv3.MemberListResponse) {
@@ -301,11 +372,17 @@ func getMembersAndKeys(t *testing.T, cc *e2e.EtcdctlV3) (*clientv3.MemberListRes
301372
func generateIdenticalVersions(clusterSize int, ver *semver.Version) []*version.Versions {
302373
ret := make([]*version.Versions, clusterSize)
303374

375+
// storage version string is non-empty starting from 3.6.0
376+
storageStr := ver.String()
377+
if ver.LessThan(version.V3_6) {
378+
storageStr = ""
379+
}
380+
304381
for i := range clusterSize {
305382
ret[i] = &version.Versions{
306383
Cluster: ver.String(),
307384
Server: ver.String(),
308-
Storage: ver.String(),
385+
Storage: storageStr,
309386
}
310387
}
311388

tests/framework/e2e/downgrade.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ import (
2929

3030
pb "go.etcd.io/etcd/api/v3/etcdserverpb"
3131
"go.etcd.io/etcd/api/v3/version"
32+
"go.etcd.io/etcd/pkg/v3/expect"
3233
"go.etcd.io/etcd/server/v3/etcdserver"
3334
"go.etcd.io/etcd/tests/v3/framework/testutils"
3435
)
@@ -59,7 +60,7 @@ func DowngradeCancel(t *testing.T, epc *EtcdProcessCluster) {
5960
var err error
6061
testutils.ExecuteWithTimeout(t, 1*time.Minute, func() {
6162
for {
62-
t.Logf("etcdctl downgrade cancel")
63+
t.Log("etcdctl downgrade cancel")
6364
err = c.DowngradeCancel(context.TODO())
6465
if err != nil {
6566
if strings.Contains(err.Error(), "no inflight downgrade job") {
@@ -73,7 +74,7 @@ func DowngradeCancel(t *testing.T, epc *EtcdProcessCluster) {
7374
continue
7475
}
7576

76-
t.Logf("etcdctl downgrade cancel executed successfully")
77+
t.Log("etcdctl downgrade cancel executed successfully")
7778
break
7879
}
7980
})
@@ -128,6 +129,19 @@ func ValidateDowngradeInfo(t *testing.T, clus *EtcdProcessCluster, expected *pb.
128129
}
129130
}
130131

132+
func DowngradeAutoCancelCheck(t *testing.T, epc *EtcdProcessCluster) {
133+
c := epc.Etcdctl()
134+
135+
var err error
136+
testutils.ExecuteWithTimeout(t, 1*time.Minute, func() {
137+
t.Log("etcdctl downgrade cancel")
138+
err = c.DowngradeCancel(context.TODO())
139+
require.ErrorContains(t, err, "no inflight downgrade job")
140+
})
141+
142+
t.Log("Cluster downgrade is completed")
143+
}
144+
131145
func DowngradeUpgradeMembers(t *testing.T, lg *zap.Logger, clus *EtcdProcessCluster, numberOfMembersToChange int, downgradeEnabled bool, currentVersion, targetVersion *semver.Version) error {
132146
membersToChange := rand.Perm(len(clus.Procs))[:numberOfMembersToChange]
133147
t.Logf("Elect members for operations on members: %v", membersToChange)
@@ -166,6 +180,15 @@ func DowngradeUpgradeMembersByID(t *testing.T, lg *zap.Logger, clus *EtcdProcess
166180
t.Log("Waiting health interval to make sure the leader propagates version to new processes")
167181
time.Sleep(etcdserver.HealthInterval)
168182

183+
if opString == "downgrading" && len(membersToChange) == len(clus.Procs) {
184+
testutils.ExecuteWithTimeout(t, 30*time.Second, func() {
185+
lg.Info("Waiting for downgrade completion log line")
186+
leader := clus.WaitLeader(t)
187+
_, err := clus.Procs[leader].Logs().ExpectWithContext(context.Background(), expect.ExpectedResponse{Value: "the cluster has been downgraded"})
188+
require.NoError(t, err)
189+
})
190+
}
191+
169192
lg.Info("Validating versions")
170193
clusterVersion := targetVersion
171194
if !isDowngrade {

0 commit comments

Comments
 (0)