Skip to content

Commit ae21d52

Browse files
committed
added vgpu-combined
1 parent 82b9164 commit ae21d52

File tree

2 files changed

+140
-108
lines changed

2 files changed

+140
-108
lines changed
Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
package vgpu_combined
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/NVIDIA/go-nvlib/pkg/nvmdev"
7+
"github.com/NVIDIA/go-nvlib/pkg/nvpci"
8+
"github.com/NVIDIA/vgpu-device-manager/internal/nvlib"
9+
"github.com/NVIDIA/vgpu-device-manager/internal/vfio"
10+
)
11+
12+
type VGPUCombinedManager struct {
13+
isVFIOMode bool
14+
vfio *vfio.VFIOManager
15+
nvlib nvlib.Interface
16+
}
17+
18+
func NewVGPUCombinedManager() (*VGPUCombinedManager, error) {
19+
nvlibInstance := nvlib.New()
20+
vfioManager := vfio.NewVFIOManager(nvlibInstance)
21+
22+
// Determine mode once at initialization
23+
isVFIOMode, err := vfioManager.IsVFIOEnabled(0)
24+
if err != nil {
25+
return nil, fmt.Errorf("error checking if VFIO is enabled: %v", err)
26+
}
27+
28+
return &VGPUCombinedManager{
29+
isVFIOMode: isVFIOMode,
30+
vfio: vfioManager,
31+
nvlib: nvlibInstance,
32+
}, nil
33+
}
34+
35+
// ParentDeviceInterface represents a common interface for both VFIO and MDEV parent devices
36+
type ParentDeviceInterface interface {
37+
GetPhysicalFunction() *nvpci.NvidiaPCIDevice
38+
IsVGPUTypeAvailable(string) (bool, error)
39+
CreateVGPUDevice(string, string) error
40+
GetAvailableVGPUInstances(string) (int, error)
41+
}
42+
43+
// DeviceInterface represents a common interface for both VFIO and MDEV vGPU device instances
44+
type DeviceInterface interface {
45+
GetPhysicalFunction() *nvpci.NvidiaPCIDevice
46+
Delete() error
47+
}
48+
49+
type mdevParentAdapter struct {
50+
*nvmdev.ParentDevice
51+
}
52+
53+
func (a *mdevParentAdapter) IsVGPUTypeAvailable(mdevType string) (bool, error) {
54+
return a.ParentDevice.IsMDEVTypeAvailable(mdevType)
55+
}
56+
57+
func (a *mdevParentAdapter) CreateVGPUDevice(mdevType string, id string) error {
58+
return a.ParentDevice.CreateMDEVDevice(mdevType, id)
59+
}
60+
61+
func (a *mdevParentAdapter) GetAvailableVGPUInstances(mdevType string) (int, error) {
62+
return a.ParentDevice.GetAvailableMDEVInstances(mdevType)
63+
}
64+
65+
// IsVFIOMode returns true if the manager is running in VFIO mode, false for MDEV mode
66+
func (m *VGPUCombinedManager) IsVFIOMode() bool {
67+
return m.isVFIOMode
68+
}
69+
70+
// GetNvpci returns the nvpci interface for GPU enumeration
71+
func (m *VGPUCombinedManager) GetNvpci() nvpci.Interface {
72+
return m.nvlib.Nvpci
73+
}
74+
75+
// GetNvmdev returns the nvmdev interface for MDEV operations
76+
func (m *VGPUCombinedManager) GetNvmdev() nvmdev.Interface {
77+
return m.nvlib.Nvmdev
78+
}
79+
80+
// GetAllParentDevices returns all parent devices as a common interface type
81+
func (m *VGPUCombinedManager) GetAllParentDevices() ([]ParentDeviceInterface, error) {
82+
if m.isVFIOMode {
83+
vfioDevices, err := m.vfio.GetAllParentDevices()
84+
if err != nil {
85+
return nil, err
86+
}
87+
result := make([]ParentDeviceInterface, len(vfioDevices))
88+
for i, d := range vfioDevices {
89+
result[i] = d
90+
}
91+
return result, nil
92+
}
93+
mdevDevices, err := m.nvlib.Nvmdev.GetAllParentDevices()
94+
if err != nil {
95+
return nil, err
96+
}
97+
result := make([]ParentDeviceInterface, len(mdevDevices))
98+
for i, d := range mdevDevices {
99+
result[i] = &mdevParentAdapter{ParentDevice: d}
100+
}
101+
return result, nil
102+
}
103+
104+
// GetAllDevices returns all vGPU device instances as a common interface type
105+
func (m *VGPUCombinedManager) GetAllDevices() ([]DeviceInterface, error) {
106+
if m.isVFIOMode {
107+
vfioDevices, err := m.vfio.GetAllDevices()
108+
if err != nil {
109+
return nil, err
110+
}
111+
result := make([]DeviceInterface, len(vfioDevices))
112+
for i, d := range vfioDevices {
113+
result[i] = d
114+
}
115+
return result, nil
116+
}
117+
mdevDevices, err := m.nvlib.Nvmdev.GetAllDevices()
118+
if err != nil {
119+
return nil, err
120+
}
121+
result := make([]DeviceInterface, len(mdevDevices))
122+
for i, d := range mdevDevices {
123+
result[i] = d
124+
}
125+
return result, nil
126+
}

pkg/vgpu/config.go

Lines changed: 14 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -21,49 +21,16 @@ import (
2121
"strconv"
2222
"strings"
2323

24-
"github.com/NVIDIA/go-nvlib/pkg/nvmdev"
25-
"github.com/NVIDIA/go-nvlib/pkg/nvpci"
2624
"github.com/google/uuid"
2725

28-
"github.com/NVIDIA/vgpu-device-manager/internal/nvlib"
29-
"github.com/NVIDIA/vgpu-device-manager/internal/vfio"
26+
vgpu_combined "github.com/NVIDIA/vgpu-device-manager/internal/vgpu-combined"
3027
"github.com/NVIDIA/vgpu-device-manager/pkg/types"
3128
)
3229

3330
const (
3431
HostPCIDevicesRoot = "/host/sys/bus/pci/devices"
3532
)
3633

37-
// ParentDeviceInterface represents a common interface for both VFIO and MDEV parent devices
38-
type ParentDeviceInterface interface {
39-
GetPhysicalFunction() *nvpci.NvidiaPCIDevice
40-
IsVGPUTypeAvailable(string) (bool, error)
41-
CreateVGPUDevice(string, string) error
42-
GetAvailableVGPUInstances(string) (int, error)
43-
}
44-
45-
// DeviceInterface represents a common interface for both VFIO and MDEV vGPU device instances
46-
type DeviceInterface interface {
47-
GetPhysicalFunction() *nvpci.NvidiaPCIDevice
48-
Delete() error
49-
}
50-
51-
type mdevParentAdapter struct {
52-
*nvmdev.ParentDevice
53-
}
54-
55-
func (a *mdevParentAdapter) IsVGPUTypeAvailable(mdevType string) (bool, error) {
56-
return a.ParentDevice.IsMDEVTypeAvailable(mdevType)
57-
}
58-
59-
func (a *mdevParentAdapter) CreateVGPUDevice(mdevType string, id string) error {
60-
return a.ParentDevice.CreateMDEVDevice(mdevType, id)
61-
}
62-
63-
func (a *mdevParentAdapter) GetAvailableVGPUInstances(mdevType string) (int, error) {
64-
return a.ParentDevice.GetAvailableMDEVInstances(mdevType)
65-
}
66-
6734
// Manager represents a set of functions for managing vGPU configurations on a node
6835
type Manager interface {
6936
GetVGPUConfig(gpu int) (types.VGPUConfig, error)
@@ -72,42 +39,34 @@ type Manager interface {
7239
}
7340

7441
type nvlibVGPUConfigManager struct {
75-
nvlib nvlib.Interface
76-
vfio *vfio.VFIOManager
77-
isVFIOMode bool
42+
combined *vgpu_combined.VGPUCombinedManager
7843
}
7944

8045
var _ Manager = (*nvlibVGPUConfigManager)(nil)
8146

8247
// NewNvlibVGPUConfigManager returns a new vGPU Config Manager which uses go-nvlib when creating / deleting vGPU devices
8348
func NewNvlibVGPUConfigManager() (Manager, error) {
84-
nvlibInstance := nvlib.New()
85-
vfioManager := vfio.NewVFIOManager(nvlibInstance)
86-
87-
// Determine mode once at initialization
88-
isVFIOMode, err := vfioManager.IsVFIOEnabled(0)
49+
combined, err := vgpu_combined.NewVGPUCombinedManager()
8950
if err != nil {
90-
return nil, fmt.Errorf("error checking if VFIO is enabled: %v", err)
51+
return nil, err
9152
}
9253

9354
return &nvlibVGPUConfigManager{
94-
nvlib: nvlibInstance,
95-
vfio: vfioManager,
96-
isVFIOMode: isVFIOMode,
55+
combined: combined,
9756
}, nil
9857
}
9958

10059
// GetVGPUConfig gets the 'VGPUConfig' currently applied to a GPU at a particular index
10160
func (m *nvlibVGPUConfigManager) GetVGPUConfig(gpu int) (types.VGPUConfig, error) {
102-
if m.isVFIOMode {
61+
if m.combined.IsVFIOMode() {
10362
return types.VGPUConfig{}, nil
10463
}
105-
device, err := m.nvlib.Nvpci.GetGPUByIndex(gpu)
64+
device, err := m.combined.GetNvpci().GetGPUByIndex(gpu)
10665
if err != nil {
10766
return nil, fmt.Errorf("error getting device at index '%d': %v", gpu, err)
10867
}
10968

110-
vgpuDevs, err := m.nvlib.Nvmdev.GetAllDevices()
69+
vgpuDevs, err := m.combined.GetNvmdev().GetAllDevices()
11170
if err != nil {
11271
return nil, fmt.Errorf("error getting all vGPU devices: %v", err)
11372
}
@@ -125,18 +84,18 @@ func (m *nvlibVGPUConfigManager) GetVGPUConfig(gpu int) (types.VGPUConfig, error
12584

12685
// SetVGPUConfig applies the selected `VGPUConfig` to a GPU at a particular index if it is not already applied
12786
func (m *nvlibVGPUConfigManager) SetVGPUConfig(gpu int, config types.VGPUConfig) error {
128-
device, err := m.nvlib.Nvpci.GetGPUByIndex(gpu)
87+
device, err := m.combined.GetNvpci().GetGPUByIndex(gpu)
12988
if err != nil {
13089
return fmt.Errorf("error getting device at index '%d': %v", gpu, err)
13190
}
13291

133-
allParents, err := m.GetAllParentDevices()
92+
allParents, err := m.combined.GetAllParentDevices()
13493
if err != nil {
13594
return fmt.Errorf("error getting all parent devices: %v", err)
13695
}
13796

13897
// Filter for 'parent' devices that are backed by the physical function
139-
parents := []ParentDeviceInterface{}
98+
parents := []vgpu_combined.ParentDeviceInterface{}
14099
for _, p := range allParents {
141100
pf := p.GetPhysicalFunction()
142101
if pf.Address == device.Address {
@@ -200,7 +159,7 @@ func (m *nvlibVGPUConfigManager) SetVGPUConfig(gpu int, config types.VGPUConfig)
200159

201160
numToCreate := min(remainingToCreate, available)
202161
for i := 0; i < numToCreate; i++ {
203-
if m.isVFIOMode {
162+
if m.combined.IsVFIOMode() {
204163
err = parent.CreateVGPUDevice(key, strconv.Itoa(i))
205164
if err != nil {
206165
return fmt.Errorf("unable to create %s vGPU device on parent device %s: %v", key, parent.GetPhysicalFunction().Address, err)
@@ -224,12 +183,12 @@ func (m *nvlibVGPUConfigManager) SetVGPUConfig(gpu int, config types.VGPUConfig)
224183

225184
// ClearVGPUConfig clears the 'VGPUConfig' for a GPU at a particular index by deleting all vGPU devices associated with it
226185
func (m *nvlibVGPUConfigManager) ClearVGPUConfig(gpu int) error {
227-
device, err := m.nvlib.Nvpci.GetGPUByIndex(gpu)
186+
device, err := m.combined.GetNvpci().GetGPUByIndex(gpu)
228187
if err != nil {
229188
return fmt.Errorf("error getting device at index '%d': %v", gpu, err)
230189
}
231190

232-
vgpuDevs, err := m.GetAllDevices()
191+
vgpuDevs, err := m.combined.GetAllDevices()
233192
if err != nil {
234193
return fmt.Errorf("error getting all vGPU devices: %v", err)
235194
}
@@ -270,56 +229,3 @@ func stripVGPUConfigSuffix(configType string) string {
270229
}
271230
return configType
272231
}
273-
274-
// IsVFIOMode returns true if the manager is running in VFIO mode, false for MDEV mode
275-
func (m *nvlibVGPUConfigManager) IsVFIOMode() bool {
276-
return m.isVFIOMode
277-
}
278-
279-
// GetAllParentDevices returns all parent devices as a common interface type
280-
func (m *nvlibVGPUConfigManager) GetAllParentDevices() ([]ParentDeviceInterface, error) {
281-
if m.isVFIOMode {
282-
vfioDevices, err := m.vfio.GetAllParentDevices()
283-
if err != nil {
284-
return nil, err
285-
}
286-
result := make([]ParentDeviceInterface, len(vfioDevices))
287-
for i, d := range vfioDevices {
288-
result[i] = d
289-
}
290-
return result, nil
291-
}
292-
mdevDevices, err := m.nvlib.Nvmdev.GetAllParentDevices()
293-
if err != nil {
294-
return nil, err
295-
}
296-
result := make([]ParentDeviceInterface, len(mdevDevices))
297-
for i, d := range mdevDevices {
298-
result[i] = &mdevParentAdapter{ParentDevice: d}
299-
}
300-
return result, nil
301-
}
302-
303-
// GetAllDevices returns all vGPU device instances as a common interface type
304-
func (m *nvlibVGPUConfigManager) GetAllDevices() ([]DeviceInterface, error) {
305-
if m.isVFIOMode {
306-
vfioDevices, err := m.vfio.GetAllDevices()
307-
if err != nil {
308-
return nil, err
309-
}
310-
result := make([]DeviceInterface, len(vfioDevices))
311-
for i, d := range vfioDevices {
312-
result[i] = d
313-
}
314-
return result, nil
315-
}
316-
mdevDevices, err := m.nvlib.Nvmdev.GetAllDevices()
317-
if err != nil {
318-
return nil, err
319-
}
320-
result := make([]DeviceInterface, len(mdevDevices))
321-
for i, d := range mdevDevices {
322-
result[i] = d
323-
}
324-
return result, nil
325-
}

0 commit comments

Comments
 (0)