Skip to content

Commit ed08c26

Browse files
nixprimegvisor-bot
authored andcommitted
Internal change.
PiperOrigin-RevId: 817500534
1 parent efdb4ce commit ed08c26

File tree

6 files changed

+130
-79
lines changed

6 files changed

+130
-79
lines changed

runsc/boot/controller.go

Lines changed: 62 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ package boot
1717
import (
1818
"errors"
1919
"fmt"
20+
"io"
2021
"path"
2122
"strconv"
2223
"sync"
@@ -489,7 +490,7 @@ func (cm *containerManager) PortForward(opts *PortForwardOpts, _ *struct{}) erro
489490

490491
// RestoreOpts contains options related to restoring a container's file system.
491492
type RestoreOpts struct {
492-
// FilePayload contains the state file to be restored, followed in order by:
493+
// FilePayload contains, in order:
493494
// 1. checkpoint state file.
494495
// 2. optional checkpoint pages metadata file.
495496
// 3. optional checkpoint pages file.
@@ -498,6 +499,8 @@ type RestoreOpts struct {
498499
HavePagesFile bool
499500
HaveDeviceFile bool
500501
Background bool
502+
503+
RestoreOptsExtra
501504
}
502505

503506
// Restore loads a container from a statefile.
@@ -521,27 +524,32 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
521524
return fmt.Errorf("at least one file must be passed to Restore")
522525
}
523526

524-
stateFile, err := o.ReleaseFD(0)
527+
stateFile, pagesMetadata, pagesFile, err := getRestoreReadersImpl(o)
525528
if err != nil {
526529
return err
527530
}
531+
defer func() {
532+
if stateFile != nil {
533+
stateFile.Close()
534+
}
535+
if pagesMetadata != nil {
536+
pagesMetadata.Close()
537+
}
538+
if pagesFile != nil {
539+
pagesFile.Close()
540+
}
541+
}()
528542

529-
var stat unix.Stat_t
530-
if err := unix.Fstat(stateFile.FD(), &stat); err != nil {
531-
return err
532-
}
533-
if stat.Size == 0 {
534-
return fmt.Errorf("statefile cannot be empty")
535-
}
536-
537-
reader, metadata, err := state.NewStatefileReader(stateFile, nil)
543+
reader, metadata, err := state.NewStatefileReader(stateFile /* transfers ownership on success */, nil)
538544
if err != nil {
539545
return fmt.Errorf("creating statefile reader: %w", err)
540546
}
547+
stateFile = nil
541548

542549
// Create the main MemoryFile.
543550
mf, err := createMemoryFile(cm.l.root.conf.AppHugePages, cm.l.hostTHP)
544551
if err != nil {
552+
reader.Close()
545553
return fmt.Errorf("creating memory file: %v", err)
546554
}
547555

@@ -559,42 +567,18 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
559567
// Release `cm.l.mu`.
560568
cu.Clean()
561569

562-
fileIdx := 1
563570
if o.HavePagesFile {
564-
pagesMetadataFD, err := o.ReleaseFD(fileIdx)
565-
if err != nil {
566-
return err
567-
}
568-
fileIdx++
569-
570-
pagesFileFD, err := o.ReleaseFD(fileIdx)
571-
if err != nil {
572-
return err
573-
}
574-
fileIdx++
575-
576-
// //pkg/state/wire reads one byte at a time; buffer these reads to
577-
// avoid making one syscall per read. For the state file, this
578-
// buffering is handled by statefile.NewReader() => compressio.Reader
579-
// or compressio.NewSimpleReader().
580-
pagesMetadata := stateio.NewBufioReadCloser(pagesMetadataFD)
581-
// TODO: Allow `runsc restore` to override I/O parameters.
582-
pagesFile := stateio.NewPagesFileFDReaderDefault(int32(pagesFileFD.Release()))
583-
584571
// This immediately starts loading the main MemoryFile asynchronously.
585-
cm.restorer.asyncMFLoader = kernel.NewAsyncMFLoader(pagesMetadata, pagesFile, cm.restorer.mainMF, timer.Fork("PagesFileLoader"))
572+
cm.restorer.asyncMFLoader = kernel.NewAsyncMFLoader(pagesMetadata, pagesFile, cm.restorer.mainMF, timer.Fork("PagesFileLoader")) // transfers ownership
573+
pagesMetadata = nil
574+
pagesFile = nil
586575
}
587576

588577
if o.HaveDeviceFile {
589-
cm.restorer.deviceFile, err = o.ReleaseFD(fileIdx)
578+
cm.restorer.deviceFile, err = o.ReleaseFD(len(o.Files) - 1)
590579
if err != nil {
591580
return err
592581
}
593-
fileIdx++
594-
}
595-
596-
if fileIdx < len(o.Files) {
597-
return fmt.Errorf("more files passed to Restore than expected")
598582
}
599583
timer.Reached("restorer ok")
600584

@@ -634,6 +618,45 @@ func (cm *containerManager) Restore(o *RestoreOpts, _ *struct{}) error {
634618
return cm.restorer.restoreContainerInfo(cm.l, &cm.l.root, timer.Fork("cont:root"))
635619
}
636620

621+
func getRestoreReadersForLocalCheckpointFiles(o *RestoreOpts) (io.ReadCloser, io.ReadCloser, stateio.AsyncReader, error) {
622+
stateFile, err := o.ReleaseFD(0)
623+
if err != nil {
624+
return nil, nil, nil, err
625+
}
626+
cu := cleanup.Make(func() { stateFile.Close() })
627+
defer cu.Clean()
628+
var stat unix.Stat_t
629+
if err := unix.Fstat(stateFile.FD(), &stat); err != nil {
630+
return nil, nil, nil, err
631+
}
632+
if stat.Size == 0 {
633+
return nil, nil, nil, fmt.Errorf("statefile cannot be empty")
634+
}
635+
636+
if !o.HavePagesFile {
637+
cu.Release()
638+
return stateFile, nil, nil, nil
639+
}
640+
pagesMetadataFile, err := o.ReleaseFD(1)
641+
if err != nil {
642+
return nil, nil, nil, err
643+
}
644+
cu.Add(func() { pagesMetadataFile.Close() })
645+
pagesFile, err := o.ReleaseFD(2)
646+
if err != nil {
647+
return nil, nil, nil, err
648+
}
649+
cu.Release()
650+
// //pkg/state/wire reads one byte at a time; buffer reads from
651+
// pagesMetadataFile to avoid making one syscall per read. For the state
652+
// file, this buffering is handled by statefile.NewReader() =>
653+
// compressio.Reader or compressio.NewSimpleReader().
654+
return stateFile,
655+
stateio.NewBufioReadCloser(pagesMetadataFile),
656+
stateio.NewPagesFileFDReaderDefault(int32(pagesFile.Release())),
657+
nil
658+
}
659+
637660
func (cm *containerManager) onRestoreDone() {
638661
cm.l.mu.Lock()
639662
cm.l.state = restored

runsc/boot/restore_impl.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,11 @@
1818
package boot
1919

2020
import (
21+
"io"
22+
2123
specs "github.com/opencontainers/runtime-spec/specs-go"
2224
"gvisor.dev/gvisor/pkg/sentry/fsimpl/proc"
25+
"gvisor.dev/gvisor/pkg/sentry/state/stateio"
2326
"gvisor.dev/gvisor/runsc/config"
2427
)
2528

@@ -30,3 +33,9 @@ func newProcInternalData(conf *config.Config, _ *specs.Spec) *proc.InternalData
3033
}
3134

3235
func (l *Loader) kernelInitExtra() {}
36+
37+
type RestoreOptsExtra struct{}
38+
39+
func getRestoreReadersImpl(o *RestoreOpts) (io.ReadCloser, io.ReadCloser, stateio.AsyncReader, error) {
40+
return getRestoreReadersForLocalCheckpointFiles(o)
41+
}

runsc/cmd/util/BUILD

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@ go_library(
1212
"util.go",
1313
],
1414
visibility = [
15-
"//runsc/cli:__subpackages__",
16-
"//runsc/cmd:__subpackages__",
15+
"//runsc:__subpackages__",
1716
"//tools:__subpackages__",
1817
],
1918
deps = [

runsc/sandbox/sandbox.go

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -502,44 +502,16 @@ func (s *Sandbox) Restore(conf *config.Config, spec *specs.Spec, cid string, ima
502502

503503
log.Debugf("Restore sandbox %q from path %q", s.ID, imagePath)
504504

505-
stateFileName := path.Join(imagePath, checkpointfiles.StateFileName)
506-
sf, err := os.Open(stateFileName)
507-
if err != nil {
508-
return fmt.Errorf("opening state file %q failed: %v", stateFileName, err)
509-
}
510-
defer sf.Close()
511-
512505
opt := boot.RestoreOpts{
513-
FilePayload: urpc.FilePayload{
514-
Files: []*os.File{sf},
515-
},
516506
Background: background,
517507
}
518-
519-
// If the pages file exists, we must pass it in.
520-
pagesFileName := path.Join(imagePath, checkpointfiles.PagesFileName)
521-
pagesReadFlags := os.O_RDONLY
522-
if direct {
523-
// The contents are page-aligned, so it can be opened with O_DIRECT.
524-
pagesReadFlags |= syscall.O_DIRECT
525-
}
526-
if pf, err := os.OpenFile(pagesFileName, pagesReadFlags, 0); err == nil {
527-
defer pf.Close()
528-
pagesMetadataFileName := path.Join(imagePath, checkpointfiles.PagesMetadataFileName)
529-
pmf, err := os.Open(pagesMetadataFileName)
530-
if err != nil {
531-
return fmt.Errorf("opening restore image file %q failed: %v", pagesMetadataFileName, err)
508+
defer func() {
509+
for _, f := range opt.FilePayload.Files {
510+
_ = f.Close()
532511
}
533-
defer pmf.Close()
534-
535-
opt.HavePagesFile = true
536-
opt.FilePayload.Files = append(opt.FilePayload.Files, pmf, pf)
537-
log.Infof("Found page files for sandbox %q. Page metadata: %q, pages: %q", s.ID, pagesMetadataFileName, pagesFileName)
538-
539-
} else if !os.IsNotExist(err) {
540-
return fmt.Errorf("opening restore pages file %q failed: %v", pagesFileName, err)
541-
} else {
542-
log.Infof("Using single checkpoint file for sandbox %q", s.ID)
512+
}()
513+
if err := s.setRestoreOptsImpl(conf, imagePath, direct, &opt); err != nil {
514+
return err
543515
}
544516

545517
// If the platform needs a device FD we must pass it in.
@@ -575,6 +547,39 @@ func (s *Sandbox) Restore(conf *config.Config, spec *specs.Spec, cid string, ima
575547
return nil
576548
}
577549

550+
func (s *Sandbox) setRestoreOptsForLocalCheckpointFiles(conf *config.Config, imagePath string, direct bool, opt *boot.RestoreOpts) error {
551+
stateFileName := path.Join(imagePath, checkpointfiles.StateFileName)
552+
sf, err := os.Open(stateFileName)
553+
if err != nil {
554+
return fmt.Errorf("opening state file %q failed: %w", stateFileName, err)
555+
}
556+
opt.FilePayload.Files = append(opt.FilePayload.Files, sf)
557+
558+
// If either the pages metadata file or pages file exist, both must exist,
559+
// and we must pass them in.
560+
pagesMetadataFileName := path.Join(imagePath, checkpointfiles.PagesMetadataFileName)
561+
if pmf, err := os.Open(pagesMetadataFileName); err == nil {
562+
opt.FilePayload.Files = append(opt.FilePayload.Files, pmf)
563+
pagesFileName := path.Join(imagePath, checkpointfiles.PagesFileName)
564+
pagesReadFlags := os.O_RDONLY
565+
if direct {
566+
// The contents are page-aligned, so it can be opened with O_DIRECT.
567+
pagesReadFlags |= syscall.O_DIRECT
568+
}
569+
pf, err := os.OpenFile(pagesFileName, pagesReadFlags, 0)
570+
if err != nil {
571+
return fmt.Errorf("opening pages file %q failed: %w", pagesFileName, err)
572+
}
573+
opt.FilePayload.Files = append(opt.FilePayload.Files, pf)
574+
opt.HavePagesFile = true
575+
} else if !os.IsNotExist(err) {
576+
return fmt.Errorf("opening pages metadata file %q failed: %w", pagesMetadataFileName, err)
577+
} else {
578+
log.Infof("Using single checkpoint file for sandbox %q", s.ID)
579+
}
580+
return nil
581+
}
582+
578583
// RestoreSubcontainer sends the restore call for a sub-container in the sandbox.
579584
func (s *Sandbox) RestoreSubcontainer(spec *specs.Spec, conf *config.Config, cid string, stdios, goferFiles, goferFilestoreFiles []*os.File, devIOFile *os.File, goferMountConf []boot.GoferMountConf) error {
580585
log.Debugf("Restore sub-container %q in sandbox %q, PID: %d", cid, s.ID, s.Pid.Load())

runsc/sandbox/sandbox_impl.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,15 @@
1818
package sandbox
1919

2020
import (
21+
"gvisor.dev/gvisor/runsc/boot"
2122
"gvisor.dev/gvisor/runsc/config"
2223
"gvisor.dev/gvisor/runsc/donation"
2324
)
2425

2526
func createSandboxProcessExtra(conf *config.Config, args *Args, donations *donation.Agency) error {
2627
return nil
2728
}
29+
30+
func (s *Sandbox) setRestoreOptsImpl(conf *config.Config, imagePath string, direct bool, opt *boot.RestoreOpts) error {
31+
return s.setRestoreOptsForLocalCheckpointFiles(conf, imagePath, direct, opt)
32+
}

tools/embeddedbinary/embeddedbinary_template.go

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,11 @@ var compressedBinary []byte
3838

3939
// Options is the set of options to execute the embedded binary.
4040
type Options struct {
41+
// If TempDir is non-empty, the embedded binary will be extracted to a
42+
// subdirectory of TempDir. Otherwise, the embedded binary will be
43+
// extracted to a subdirectory of os.TempDir().
44+
TempDir string
45+
4146
// Argv is the set of arguments to exec with.
4247
// `Argv[0]` is the name of the binary as invoked.
4348
// If Argv is empty, it will default to a single-element slice, with
@@ -50,6 +55,10 @@ type Options struct {
5055
// Files is the set of file descriptors to pass to forked processes.
5156
// Only used when forking, not pure exec'ing.
5257
Files []uintptr
58+
59+
// SysProcAttr provides OS-specific options to the executed process.
60+
// Only used when forking, not pure exec'ing.
61+
SysProcAttr *unix.SysProcAttr
5362
}
5463

5564
// Bogus import to satisfy the compiler that we are using the flate import,
@@ -60,7 +69,7 @@ const _ = flate.NoCompression
6069
// If fork is true, the binary runs in a separate process, and its PID is
6170
// returned.
6271
// Otherwise, the binary is exec'd, so the current process stops executing.
63-
func run(options Options, fork bool) (int, error) {
72+
func run(options *Options, fork bool) (int, error) {
6473
if len(options.Argv) == 0 {
6574
options.Argv = []string{BinaryName}
6675
}
@@ -71,7 +80,7 @@ func run(options Options, fork bool) (int, error) {
7180
defer runtime.UnlockOSThread()
7281
oldMask := unix.Umask(0077)
7382
defer unix.Umask(oldMask)
74-
tmpDir, err := os.MkdirTemp("", "gvisor.*.tmp")
83+
tmpDir, err := os.MkdirTemp(options.TempDir, "gvisor.*.tmp")
7584
if err != nil {
7685
return 0, fmt.Errorf("cannot create temp directory: %w", err)
7786
}
@@ -112,6 +121,7 @@ func run(options Options, fork bool) (int, error) {
112121
return syscall.ForkExec(fdPath, options.Argv, &syscall.ProcAttr{
113122
Env: options.Envv,
114123
Files: options.Files,
124+
Sys: options.SysProcAttr,
115125
})
116126
}
117127
if err := unix.Exec(fdPath, options.Argv, options.Envv); err != nil {
@@ -123,12 +133,12 @@ func run(options Options, fork bool) (int, error) {
123133
// Exec execs the embedded binary. The current process is replaced.
124134
// This function only returns if unsuccessful.
125135
func Exec(options Options) error {
126-
_, err := run(options, false)
136+
_, err := run(&options, false)
127137
return err
128138
}
129139

130140
// ForkExec runs the embedded binary in a separate process.
131141
// Returns the PID of the child process.
132142
func ForkExec(options Options) (int, error) {
133-
return run(options, true)
143+
return run(&options, true)
134144
}

0 commit comments

Comments
 (0)