launch: use vram bytes for model recommendations (#15885)

This commit is contained in:
Parth Sareen 2026-04-29 18:40:14 -07:00 committed by GitHub
parent bad32c7244
commit b6447caebc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 36 additions and 20 deletions

View file

@ -813,7 +813,7 @@ type ModelRecommendation struct {
Description string `json:"description"`
ContextLength int `json:"context_length,omitempty"`
MaxOutputTokens int `json:"max_output_tokens,omitempty"`
VRAM string `json:"vram,omitempty"`
VRAMBytes int64 `json:"vram_bytes,omitempty"`
}
// ProcessResponse is the response from [Client.Process].

View file

@ -1659,7 +1659,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
for _, item := range items {
if item.Name == "qwen3.5" {
if !strings.Contains(item.Description, "~11GB") {
if !strings.Contains(item.Description, "~14GB") {
t.Errorf("not-installed qwen3.5 should show VRAM hint, got %q", item.Description)
}
return
@ -1676,7 +1676,7 @@ func TestBuildModelList_Descriptions(t *testing.T) {
for _, item := range items {
if item.Name == "qwen3.5" {
if strings.Contains(item.Description, "~11GB") {
if strings.Contains(item.Description, "~14GB") {
t.Errorf("installed qwen3.5 should not show VRAM hint, got %q", item.Description)
}
return

View file

@ -186,7 +186,7 @@ type ModelItem struct {
Name string
Description string
Recommended bool
VRAM string
VRAMBytes int64
ContextLength int
MaxOutputTokens int
}
@ -783,7 +783,7 @@ func (c *launcherClient) requestRecommendations(ctx context.Context) ([]ModelIte
Name: name,
Description: description,
Recommended: true,
VRAM: strings.TrimSpace(rec.VRAM),
VRAMBytes: rec.VRAMBytes,
ContextLength: rec.ContextLength,
MaxOutputTokens: rec.MaxOutputTokens,
})

View file

@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"math"
"net/http"
"os"
"os/exec"
@ -16,6 +17,7 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/cmd/config"
"github.com/ollama/ollama/cmd/internal/fileutil"
"github.com/ollama/ollama/format"
internalcloud "github.com/ollama/ollama/internal/cloud"
"github.com/ollama/ollama/internal/modelref"
"github.com/ollama/ollama/progress"
@ -26,8 +28,19 @@ var recommendedModels = []ModelItem{
{Name: "qwen3.5:cloud", Description: "Reasoning, coding, and agentic tool use with vision", Recommended: true, ContextLength: 262_144, MaxOutputTokens: 32_768},
{Name: "glm-5.1:cloud", Description: "Reasoning and code generation", Recommended: true, ContextLength: 202_752, MaxOutputTokens: 131_072},
{Name: "minimax-m2.7:cloud", Description: "Fast, efficient coding and real-world productivity", Recommended: true, ContextLength: 204_800, MaxOutputTokens: 128_000},
{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAM: "~16GB"},
{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAM: "~11GB"},
{Name: "gemma4", Description: "Reasoning and code generation locally", Recommended: true, VRAMBytes: 12 * format.GigaByte},
{Name: "qwen3.5", Description: "Reasoning, coding, and visual understanding locally", Recommended: true, VRAMBytes: 14 * format.GigaByte},
}
func displayVRAM(vramBytes int64) string {
if vramBytes <= 0 {
return ""
}
gb := float64(vramBytes) / format.GigaByte
if gb == math.Trunc(gb) {
return fmt.Sprintf("~%.0fGB", gb)
}
return fmt.Sprintf("~%.1fGB", gb)
}
// cloudModelLimit holds context and output token limits for a cloud model.
@ -403,8 +416,8 @@ func buildModelListWithRecommendations(existing []modelInfo, recommendations []M
if items[i].Description != "" {
parts = append(parts, items[i].Description)
}
if items[i].VRAM != "" {
parts = append(parts, items[i].VRAM)
if vram := displayVRAM(items[i].VRAMBytes); vram != "" {
parts = append(parts, vram)
}
parts = append(parts, "(not downloaded)")
items[i].Description = strings.Join(parts, ", ")

View file

@ -17,9 +17,12 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
)
const modelRecommendationsURL = "https://ollama.com/api/experimental/model-recommendations"
const (
modelRecommendationsURL = "https://ollama.com/api/experimental/model-recommendations"
)
var (
modelRecommendationsRefreshInterval = 4 * time.Hour
@ -320,7 +323,6 @@ func validateModelRecommendations(recs []api.ModelRecommendation) ([]api.ModelRe
for _, rec := range recs {
rec.Model = strings.TrimSpace(rec.Model)
rec.Description = strings.TrimSpace(rec.Description)
rec.VRAM = strings.TrimSpace(rec.VRAM)
if rec.Model == "" {
return nil, errors.New("recommendation missing model")
@ -391,11 +393,11 @@ var defaultModelRecommendations = []api.ModelRecommendation{
{
Model: "gemma4",
Description: "Reasoning and code generation locally",
VRAM: "~16GB",
VRAMBytes: 12 * format.GigaByte,
},
{
Model: "qwen3.5",
Description: "Reasoning, coding, and visual understanding locally",
VRAM: "~11GB",
VRAMBytes: 14 * format.GigaByte,
},
}

View file

@ -19,6 +19,7 @@ import (
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
"github.com/ollama/ollama/format"
)
func TestModelRecommendationsDefaultOrder(t *testing.T) {
@ -41,11 +42,11 @@ func TestModelRecommendationsCacheRefreshAppliesServerSideChanges(t *testing.T)
first := []api.ModelRecommendation{
{Model: " first-cloud:cloud ", Description: " first ", ContextLength: 2048, MaxOutputTokens: 512},
{Model: " first-local ", Description: " first local ", VRAM: " ~3GB "},
{Model: " first-local ", Description: " first local ", VRAMBytes: 3 * format.GigaByte},
}
second := []api.ModelRecommendation{
{Model: "second-cloud:cloud", Description: "second", ContextLength: 4096, MaxOutputTokens: 1024},
{Model: "second-local", Description: "second local", VRAM: "~6GB"},
{Model: "second-local", Description: "second local", VRAMBytes: 6 * format.GigaByte},
}
calls := 0
@ -76,7 +77,7 @@ func TestModelRecommendationsCacheRefreshAppliesServerSideChanges(t *testing.T)
}
if got, want := cache.Get(), []api.ModelRecommendation{
{Model: "first-cloud:cloud", Description: "first", ContextLength: 2048, MaxOutputTokens: 512},
{Model: "first-local", Description: "first local", VRAM: "~3GB"},
{Model: "first-local", Description: "first local", VRAMBytes: 3 * format.GigaByte},
}; !slices.Equal(got, want) {
t.Fatalf("after first refresh recommendations = %#v, want %#v", got, want)
}
@ -160,7 +161,7 @@ func TestModelRecommendationsCacheRefreshErrorCasesPreserveCurrentData(t *testin
setupModelRecommendationsTestEnv(t, "")
cache := newModelRecommendationsCache()
stable := []api.ModelRecommendation{{Model: "stable-local", Description: "stable desc", VRAM: "~2GB"}}
stable := []api.ModelRecommendation{{Model: "stable-local", Description: "stable desc", VRAMBytes: 2 * format.GigaByte}}
cache.set(stable)
cache.client = &http.Client{Transport: tc.transport}
@ -211,7 +212,7 @@ func TestModelRecommendationsSnapshotPersistAndLoad(t *testing.T) {
want := []api.ModelRecommendation{
{Model: "persist-cloud:cloud", Description: "persisted", ContextLength: 8192, MaxOutputTokens: 2048},
{Model: "persist-local", Description: "persisted local", VRAM: "~5GB"},
{Model: "persist-local", Description: "persisted local", VRAMBytes: 5 * format.GigaByte},
}
writer := newModelRecommendationsCache()
@ -256,7 +257,7 @@ func TestValidateModelRecommendationsTrimsAndDropsInvalidCloudEntries(t *testing
input := []api.ModelRecommendation{
{Model: " good-cloud:cloud ", Description: " good cloud ", ContextLength: 1024, MaxOutputTokens: 256},
{Model: "bad-cloud:cloud", Description: "missing limits"},
{Model: " good-local ", Description: " good local ", VRAM: " ~2GB "},
{Model: " good-local ", Description: " good local ", VRAMBytes: 2 * format.GigaByte},
}
got, err := validateModelRecommendations(input)
@ -266,7 +267,7 @@ func TestValidateModelRecommendationsTrimsAndDropsInvalidCloudEntries(t *testing
want := []api.ModelRecommendation{
{Model: "good-cloud:cloud", Description: "good cloud", ContextLength: 1024, MaxOutputTokens: 256},
{Model: "good-local", Description: "good local", VRAM: "~2GB"},
{Model: "good-local", Description: "good local", VRAMBytes: 2 * format.GigaByte},
}
if !slices.Equal(got, want) {
t.Fatalf("validated recommendations = %#v, want %#v", got, want)