Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Centralize GPU configuration vars #4264

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
9 changes: 5 additions & 4 deletions gpu/amd_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"strings"

"github.com/ollama/ollama/format"
"github.com/ollama/ollama/server/envconfig"
)

// Discovery logic for AMD/ROCm GPUs
Expand Down Expand Up @@ -51,9 +52,9 @@ func AMDGetGPUInfo() []GpuInfo {

// Determine if the user has already pre-selected which GPUs to look at, then ignore the others
var visibleDevices []string
hipVD := os.Getenv("HIP_VISIBLE_DEVICES") // zero based index only
rocrVD := os.Getenv("ROCR_VISIBLE_DEVICES") // zero based index or UUID, but consumer cards seem to not support UUID
gpuDO := os.Getenv("GPU_DEVICE_ORDINAL") // zero based index
hipVD := envconfig.HipVisibleDevices // zero based index only
rocrVD := envconfig.RocrVisibleDevices // zero based index or UUID, but consumer cards seem to not support UUID
gpuDO := envconfig.GpuDeviceOrdinal // zero based index
switch {
// TODO is this priorty order right?
case hipVD != "":
Expand All @@ -66,7 +67,7 @@ func AMDGetGPUInfo() []GpuInfo {
visibleDevices = strings.Split(gpuDO, ",")
}

gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION")
gfxOverride := envconfig.HsaOverrideGfxVersion
var supported []string
libDir := ""

Expand Down
3 changes: 2 additions & 1 deletion gpu/amd_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"strings"

"github.com/ollama/ollama/format"
"github.com/ollama/ollama/server/envconfig"
)

const (
Expand Down Expand Up @@ -54,7 +55,7 @@ func AMDGetGPUInfo() []GpuInfo {
}

var supported []string
gfxOverride := os.Getenv("HSA_OVERRIDE_GFX_VERSION")
gfxOverride := envconfig.HsaOverrideGfxVersion
if gfxOverride == "" {
supported, err = GetSupportedGFX(libDir)
if err != nil {
Expand Down
30 changes: 28 additions & 2 deletions server/envconfig/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,21 @@ var (
RunnersDir string
// Set via OLLAMA_TMPDIR in the environment
TmpDir string

// Set via CUDA_VISIBLE_DEVICES in the environment
CudaVisibleDevices string
// Set via HIP_VISIBLE_DEVICES in the environment
HipVisibleDevices string
// Set via ROCR_VISIBLE_DEVICES in the environment
RocrVisibleDevices string
// Set via GPU_DEVICE_ORDINAL in the environment
GpuDeviceOrdinal string
// Set via HSA_OVERRIDE_GFX_VERSION in the environment
HsaOverrideGfxVersion string
)

func AsMap() map[string]string {
return map[string]string{
ret := map[string]string{
"OLLAMA_ORIGINS": fmt.Sprintf("%v", AllowOrigins),
"OLLAMA_DEBUG": fmt.Sprintf("%v", Debug),
"OLLAMA_LLM_LIBRARY": fmt.Sprintf("%v", LLMLibrary),
Expand All @@ -46,6 +57,14 @@ func AsMap() map[string]string {
"OLLAMA_RUNNERS_DIR": fmt.Sprintf("%v", RunnersDir),
"OLLAMA_TMPDIR": fmt.Sprintf("%v", TmpDir),
}
if runtime.GOOS != "darwin" {
ret["CUDA_VISIBLE_DEVICES"] = fmt.Sprintf("%v", CudaVisibleDevices)
ret["HIP_VISIBLE_DEVICES"] = fmt.Sprintf("%v", HipVisibleDevices)
ret["ROCR_VISIBLE_DEVICES"] = fmt.Sprintf("%v", RocrVisibleDevices)
ret["GPU_DEVICE_ORDINAL"] = fmt.Sprintf("%v", GpuDeviceOrdinal)
ret["HSA_OVERRIDE_GFX_VERSION"] = fmt.Sprintf("%v", HsaOverrideGfxVersion)
}
return ret
}

var defaultAllowOrigins = []string{
Expand Down Expand Up @@ -163,12 +182,19 @@ func LoadConfig() {
}
}

if onp := os.Getenv("OLLAMA_MAX_QUEUE"); onp != "" {
if onp := clean("OLLAMA_MAX_QUEUE"); onp != "" {
p, err := strconv.Atoi(onp)
if err != nil || p <= 0 {
slog.Error("invalid setting", "OLLAMA_MAX_QUEUE", onp, "error", err)
} else {
MaxQueuedRequests = p
}
}

CudaVisibleDevices = clean("CUDA_VISIBLE_DEVICES")
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dhiltgen is this public variable CudaVisibleDevices meant to be used in cudaGetVisibleDevicesEnv?

func cudaGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good question.

For NVIDIA GPUs, we use their C libraries to discover the GPUs. We've recently switched to using the Driver API as our primary source, and fall back to the cuda runtime library if that fails (most likely we'll remove the cuda runtime code in a few releases as long as the Driver API looks reliable) Those libraries already implement filtering based on this environment variable, so the calls here and here will only return a subset of GPUs based on that variable if the user has set it.

In our scheduler, we pick which GPU (of the GPUs exposed) to run a model on, and when we run the subprocess for inference, we wire up the environment variable to ensure that subprocess uses exactly the set of GPUs we want it to use.

HipVisibleDevices = clean("HIP_VISIBLE_DEVICES")
RocrVisibleDevices = clean("ROCR_VISIBLE_DEVICES")
GpuDeviceOrdinal = clean("GPU_DEVICE_ORDINAL")
HsaOverrideGfxVersion = clean("HSA_OVERRIDE_GFX_VERSION")

}