Skip to content

Commit 0f83514

Browse files
ayushr2gvisor-bot
authored andcommitted
Handle restore failure more robustly.
- This change most notably adds a new loaderState "restoreFailed". As of writing, this is used to classify restore failures which occur after the last container's restore command is received. See future work section. - `runsc wait --restore` now returns the restore error, if one occurred. - Added more explicit checks for all values of loaderState and error out when an unexpected state is reached. - `restorer` has been simplified to save a pointer to `cm` instead of using callback functions, which are harder to follow (and step-into). Future work: We still do not handle failures that occur when intermediate container's restore command fails. Or if we receive an unexpected "start" command instead of "restore". In such scenarios, it is unlikely that we will recover to have a successful restore as it suggests some issue with the higher level tooling issuing these commands. PiperOrigin-RevId: 831757593
1 parent 2d23faf commit 0f83514

File tree

3 files changed

+92
-48
lines changed

3 files changed

+92
-48
lines changed

runsc/boot/controller.go

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -584,12 +584,11 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
584584
}
585585

586586
cm.restorer = &restorer{
587-
readyToStart: cm.onStart,
588-
onRestoreDone: cm.onRestoreDone,
589-
stateFile: reader,
590-
background: o.Background,
591-
timer: timer,
592-
mainMF: mf,
587+
cm: cm,
588+
stateFile: reader,
589+
background: o.Background,
590+
timer: timer,
591+
mainMF: mf,
593592
}
594593
cm.l.restoreDone = sync.NewCond(&cm.l.mu)
595594
cm.l.state = restoringUnstarted
@@ -687,6 +686,15 @@ func getRestoreReadersForLocalCheckpointFiles(o *RestoreOpts) (io.ReadCloser, io
687686
nil
688687
}
689688

689+
func (cm *containerManager) onRestoreFailed(err error) {
690+
cm.l.mu.Lock()
691+
cm.l.state = restoreFailed
692+
cm.l.restoreErr = err
693+
cm.l.mu.Unlock()
694+
cm.l.restoreDone.Broadcast()
695+
cm.restorer = nil
696+
}
697+
690698
func (cm *containerManager) onRestoreDone() {
691699
cm.l.mu.Lock()
692700
cm.l.state = restored

runsc/boot/loader.go

Lines changed: 63 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -167,14 +167,17 @@ type loaderState int
167167
const (
168168
// created indicates that the Loader has been created, but not started yet.
169169
created loaderState = iota
170-
// started indicates that the Loader has been started.
170+
// started indicates that the Loader has been started. This means that the
171+
// root container is running. Subsequent containers may still be unstarted.
171172
started
172173
// restoringUnstarted indicates that the Loader has been created and is
173174
// restoring containers, but not started yet.
174175
restoringUnstarted
175-
// restoringStarted indicates that the Loader has been created and started,
176-
// while restore continues in the background.
176+
// restoringStarted indicates that the Loader has been created and started
177+
// along with all containers, while restore continues in the background.
177178
restoringStarted
179+
// restoreFailed indicates that the Loader has failed to restore.
180+
restoreFailed
178181
// restored indicates that the Loader has been fully restored.
179182
restored
180183
)
@@ -190,6 +193,8 @@ func (s loaderState) String() string {
190193
return "restoringUnstarted"
191194
case restoringStarted:
192195
return "restoringStarted"
196+
case restoreFailed:
197+
return "restoreFailed"
193198
case restored:
194199
return "restored"
195200
default:
@@ -283,6 +288,11 @@ type Loader struct {
283288
// during restore.
284289
saveRestoreNet bool
285290

291+
// restoreErr is the error that occurred during restore.
292+
//
293+
// +checklocks:mu
294+
restoreErr error
295+
286296
LoaderExtra
287297
}
288298

@@ -1007,8 +1017,8 @@ func (l *Loader) run() error {
10071017
return fmt.Errorf("trying to start deleted container %q", l.sandboxID)
10081018
}
10091019

1010-
// If we are restoring, we do not want to create a process.
1011-
if l.state != restoringUnstarted {
1020+
switch l.state {
1021+
case created:
10121022
if l.root.conf.ProfileEnable {
10131023
pprof.Initialize()
10141024
}
@@ -1049,6 +1059,10 @@ func (l *Loader) run() error {
10491059
return c.ContainerStart(context.Background(), fields, &evt)
10501060
})
10511061
}
1062+
case restoringUnstarted:
1063+
// If we are restoring, we do not want to create a process.
1064+
default:
1065+
return fmt.Errorf("Loader.Run() called in unexpected state=%s", l.state)
10521066
}
10531067

10541068
ep.tg = l.k.GlobalInit()
@@ -1083,10 +1097,13 @@ func (l *Loader) run() error {
10831097
if err := l.k.Start(); err != nil {
10841098
return err
10851099
}
1086-
if l.state == restoringUnstarted {
1087-
l.state = restoringStarted
1088-
} else {
1100+
switch l.state {
1101+
case created:
10891102
l.state = started
1103+
case restoringUnstarted:
1104+
l.state = restoringStarted
1105+
default:
1106+
panic(fmt.Sprintf("state=%s in Loader.run() should be impossible", l.state))
10901107
}
10911108
return nil
10921109
}
@@ -1478,29 +1495,43 @@ func (l *Loader) executeAsync(args *control.ExecArgs) (kernel.ThreadID, error) {
14781495

14791496
// waitContainer waits for the init process of a container to exit.
14801497
func (l *Loader) waitContainer(cid string, waitStatus *uint32) error {
1481-
// Don't defer unlock, as doing so would make it impossible for
1482-
// multiple clients to wait on the same container.
1483-
key := execID{cid: cid}
1484-
tg, err := l.threadGroupFromID(key)
1485-
if err != nil {
1486-
l.mu.Lock()
1487-
// Extra handling is needed if the restoring container has not started yet.
1488-
if l.state != restoringUnstarted {
1489-
l.mu.Unlock()
1490-
return err
1491-
}
1492-
// Container could be restoring, first check if container exists.
1493-
if _, err := l.findProcessLocked(key); err != nil {
1494-
l.mu.Unlock()
1495-
return err
1496-
}
1498+
l.mu.Lock()
1499+
state := l.state
1500+
if state == restoringUnstarted {
14971501
log.Infof("Waiting for the container to restore, CID: %q", cid)
14981502
l.restoreDone.Wait()
14991503
l.mu.Unlock()
1500-
15011504
log.Infof("Restore is completed, trying to wait for container %q again.", cid)
15021505
return l.waitContainer(cid, waitStatus)
15031506
}
1507+
tg, err := l.tryThreadGroupFromIDLocked(execID{cid: cid})
1508+
l.mu.Unlock()
1509+
if err != nil {
1510+
// The container does not exist.
1511+
return err
1512+
}
1513+
if tg == nil {
1514+
// The container has not been started.
1515+
switch state {
1516+
case created, started:
1517+
// Note that state=started means the root container has been started,
1518+
// but other containers may not have started yet.
1519+
return fmt.Errorf("container %q not started", cid)
1520+
case restoringStarted, restored:
1521+
// The container has restored, we *should* have found the init process...
1522+
return fmt.Errorf("could not find init process of restored container %q in state %q", cid, state)
1523+
case restoreFailed:
1524+
// If restore failed, we should return the a non-zero exit status here to
1525+
// indicate that the container failed and transition to "stopped" state.
1526+
log.Warningf("Restore failed, returning from waitContainer with non-zero exit status")
1527+
*waitStatus = 1
1528+
return nil
1529+
case restoringUnstarted:
1530+
panic("impossible")
1531+
default:
1532+
panic(fmt.Sprintf("Invalid state: %s", state))
1533+
}
1534+
}
15041535

15051536
// If the thread either has already exited or exits during waiting,
15061537
// consider the container exited.
@@ -1520,15 +1551,15 @@ func (l *Loader) waitContainer(cid string, waitStatus *uint32) error {
15201551
func (l *Loader) waitRestore() error {
15211552
l.mu.Lock()
15221553
defer l.mu.Unlock()
1523-
if l.state == restored {
1524-
return nil
1554+
if l.state == restored || l.state == restoreFailed {
1555+
return l.restoreErr
15251556
}
15261557
if l.state != restoringUnstarted && l.state != restoringStarted {
15271558
return fmt.Errorf("sandbox is not being restored, cannot wait for restore: state=%s", l.state)
15281559
}
15291560
log.Infof("Waiting for the sandbox to restore")
15301561
l.restoreDone.Wait()
1531-
return nil
1562+
return l.restoreErr
15321563
}
15331564

15341565
func (l *Loader) waitPID(tgid kernel.ThreadID, cid string, waitStatus *uint32) error {
@@ -2097,7 +2128,10 @@ func (l *Loader) containerRuntimeState(cid string) ContainerRuntimeState {
20972128
return RuntimeStateStopped
20982129
}
20992130
if exec.tg == nil {
2100-
// Container has no thread group assigned, so it has started yet.
2131+
if l.state == restoreFailed {
2132+
return RuntimeStateStopped
2133+
}
2134+
// Container has no thread group assigned, so it has not started yet.
21012135
return RuntimeStateCreating
21022136
}
21032137
if exec.tg.Leader().ExitState() == kernel.TaskExitNone {

runsc/boot/restore.go

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,8 @@ type restorer struct {
9898
// deviceFile is the required to start the platform.
9999
deviceFile *fd.FD
100100

101-
// readyToStart is a callback triggered when the sandbox is ready to start.
102-
readyToStart func() error
103-
104-
// onRestoreDone is a callback triggered when the restore is done.
105-
onRestoreDone func()
101+
// cm is the container manager that is used to restore the sandbox.
102+
cm *containerManager
106103

107104
// checkpointedSpecs contains the map of container specs used during
108105
// checkpoint.
@@ -160,13 +157,11 @@ func (r *restorer) restoreContainerInfo(l *Loader, info *containerInfo, containe
160157
// Non-container-specific restore work:
161158

162159
if len(r.containers) == r.totalContainers {
163-
if err := specutils.RestoreValidateSpec(r.checkpointedSpecs, l.GetContainerSpecs(), l.root.conf); err != nil {
164-
return fmt.Errorf("failed to handle restore spec validation: %w", err)
165-
}
166-
r.timer.Reached("specs validated")
167-
168160
// Trigger the restore if this is the last container.
169-
return r.restore(l)
161+
if err := r.restore(l); err != nil {
162+
r.cm.onRestoreFailed(err)
163+
return err
164+
}
170165
}
171166
return nil
172167
}
@@ -183,6 +178,13 @@ func createNetworkStackForRestore(l *Loader) (*stack.Stack, inet.Stack) {
183178
func (r *restorer) restore(l *Loader) error {
184179
log.Infof("Starting to restore %d containers", len(r.containers))
185180

181+
// Validate the container specs to ensure they did not meaningfully change
182+
// between checkpoint and restore.
183+
if err := specutils.RestoreValidateSpec(r.checkpointedSpecs, l.GetContainerSpecs(), l.root.conf); err != nil {
184+
return fmt.Errorf("failed to handle restore spec validation: %w", err)
185+
}
186+
r.timer.Reached("specs validated")
187+
186188
// Create a new root network namespace with the network stack of the
187189
// old kernel to preserve the existing network configuration.
188190
oldStack, oldInetStack := createNetworkStackForRestore(l)
@@ -353,7 +355,7 @@ func (r *restorer) restore(l *Loader) error {
353355
cu.Clean()
354356

355357
r.timer.Reached("Starting sandbox")
356-
if err := r.readyToStart(); err != nil {
358+
if err := r.cm.onStart(); err != nil {
357359
return fmt.Errorf("restorer.readyToStart callback failed: %w", err)
358360
}
359361

@@ -386,7 +388,7 @@ func (r *restorer) restore(l *Loader) error {
386388
}
387389
}
388390

389-
r.onRestoreDone()
391+
r.cm.onRestoreDone()
390392
postRestoreThread.Reached("kernel notified")
391393

392394
log.Infof("Restore successful")

0 commit comments

Comments
 (0)