Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

split binaries into metadata and data layers #4110

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 2 additions & 1 deletion convert/convert.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log/slog"
"os"
"path/filepath"
Expand Down Expand Up @@ -47,7 +48,7 @@ type ByteOrder interface {
type ModelArch interface {
GetTensors() error
LoadVocab() error
WriteGGUF() (string, error)
WriteGGUF(io.WriteSeeker) error
}

type ModelFormat interface {
Expand Down
15 changes: 2 additions & 13 deletions convert/gemma.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ func (m *GemmaModel) LoadVocab() error {
return nil
}

func (m *GemmaModel) WriteGGUF() (string, error) {
func (m *GemmaModel) WriteGGUF(ws io.WriteSeeker) error {
kv := llm.KV{
"general.architecture": "gemma",
"general.name": m.Name,
Expand Down Expand Up @@ -122,16 +122,5 @@ func (m *GemmaModel) WriteGGUF() (string, error) {
"tokenizer.ggml.add_eos_token": false,
}

f, err := os.CreateTemp("", "ollama-gguf")
if err != nil {
return "", err
}
defer f.Close()

mod := llm.NewGGUFV3(m.Params.ByteOrder)
if err := mod.Encode(f, kv, m.Tensors); err != nil {
return "", err
}

return f.Name(), nil
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
}
13 changes: 3 additions & 10 deletions convert/llama.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func (m *LlamaModel) LoadVocab() error {
return nil
}

func (m *LlamaModel) WriteGGUF() (string, error) {
func (m *LlamaModel) WriteGGUF(ws io.WriteSeeker) error {
kv := llm.KV{
"general.architecture": "llama",
"general.name": m.Name,
Expand Down Expand Up @@ -161,16 +161,9 @@ func (m *LlamaModel) WriteGGUF() (string, error) {

f, err := os.CreateTemp("", "ollama-gguf")
if err != nil {
return "", err
return err
}
defer f.Close()

mod := llm.NewGGUFV3(m.Params.ByteOrder)
if err := mod.Encode(f, kv, m.Tensors); err != nil {
return "", err
}

slog.Debug(fmt.Sprintf("gguf file = %s", f.Name()))

return f.Name(), nil
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(f, kv, m.Tensors)
}
15 changes: 2 additions & 13 deletions convert/mistral.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ func (m *MistralModel) LoadVocab() error {
return nil
}

func (m *MistralModel) WriteGGUF() (string, error) {
func (m *MistralModel) WriteGGUF(ws io.WriteSeeker) error {
kv := llm.KV{
"general.architecture": "llama",
"general.name": m.Name,
Expand All @@ -158,16 +158,5 @@ func (m *MistralModel) WriteGGUF() (string, error) {
"tokenizer.ggml.unknown_token_id": uint32(0),
}

f, err := os.CreateTemp("", "ollama-gguf")
if err != nil {
return "", err
}
defer f.Close()

mod := llm.NewGGUFV3(m.Params.ByteOrder)
if err := mod.Encode(f, kv, m.Tensors); err != nil {
return "", err
}

return f.Name(), nil
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
}
17 changes: 3 additions & 14 deletions convert/mixtral.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package convert

import (
"os"
"io"
"regexp"

"github.com/ollama/ollama/llm"
Expand Down Expand Up @@ -47,7 +47,7 @@ func (m *MixtralModel) LoadVocab() error {
return nil
}

func (m *MixtralModel) WriteGGUF() (string, error) {
func (m *MixtralModel) WriteGGUF(ws io.WriteSeeker) error {
kv := llm.KV{
"general.architecture": "llama",
"general.name": m.Name,
Expand Down Expand Up @@ -81,16 +81,5 @@ func (m *MixtralModel) WriteGGUF() (string, error) {
"tokenizer.ggml.add_eos_token": false,
}

f, err := os.CreateTemp("", "ollama-gguf")
if err != nil {
return "", err
}
defer f.Close()

mod := llm.NewGGUFV3(m.Params.ByteOrder)
if err := mod.Encode(f, kv, m.Tensors); err != nil {
return "", err
}

return f.Name(), nil
return llm.NewGGUFV3(m.Params.ByteOrder).Encode(ws, kv, m.Tensors)
}
2 changes: 1 addition & 1 deletion integration/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func startServer(ctx context.Context, ollamaHost string) error {

if tmp := os.Getenv("OLLAMA_HOST"); tmp != ollamaHost {
slog.Info("setting env", "OLLAMA_HOST", ollamaHost)
os.Setenv("OLLAMA_HOST", ollamaHost)
t.Setenv("OLLAMA_HOST", ollamaHost)
}

slog.Info("starting server", "url", ollamaHost)
Expand Down
140 changes: 140 additions & 0 deletions llm/filetype.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package llm

import "fmt"

type fileType uint32

const (
fileTypeF32 fileType = iota
fileTypeF16
fileTypeQ4_0
fileTypeQ4_1
fileTypeQ4_1_F16
fileTypeQ4_2 // unused
fileTypeQ4_3 // unused
fileTypeQ8_0
fileTypeQ5_0
fileTypeQ5_1
fileTypeQ2_K
fileTypeQ3_K_S
fileTypeQ3_K_M
fileTypeQ3_K_L
fileTypeQ4_K_S
fileTypeQ4_K_M
fileTypeQ5_K_S
fileTypeQ5_K_M
fileTypeQ6_K
fileTypeIQ2_XXS
fileTypeIQ2_XS
fileTypeQ2_K_S
fileTypeQ3_K_XS
fileTypeIQ3_XXS

fileTypeUnknown
)

func ParseFileType(s string) (fileType, error) {
switch s {
case "F32":
return fileTypeF32, nil
case "F16":
return fileTypeF16, nil
case "Q4_0":
return fileTypeQ4_0, nil
case "Q4_1":
return fileTypeQ4_1, nil
case "Q4_1_F16":
return fileTypeQ4_1_F16, nil
case "Q8_0":
return fileTypeQ8_0, nil
case "Q5_0":
return fileTypeQ5_0, nil
case "Q5_1":
return fileTypeQ5_1, nil
case "Q2_K":
return fileTypeQ2_K, nil
case "Q3_K_S":
return fileTypeQ3_K_S, nil
case "Q3_K_M":
return fileTypeQ3_K_M, nil
case "Q3_K_L":
return fileTypeQ3_K_L, nil
case "Q4_K_S":
return fileTypeQ4_K_S, nil
case "Q4_K_M":
return fileTypeQ4_K_M, nil
case "Q5_K_S":
return fileTypeQ5_K_S, nil
case "Q5_K_M":
return fileTypeQ5_K_M, nil
case "Q6_K":
return fileTypeQ6_K, nil
case "IQ2_XXS":
return fileTypeIQ2_XXS, nil
case "IQ2_XS":
return fileTypeIQ2_XS, nil
case "Q2_K_S":
return fileTypeQ2_K_S, nil
case "Q3_K_XS":
return fileTypeQ3_K_XS, nil
case "IQ3_XXS":
return fileTypeIQ3_XXS, nil
default:
return fileTypeUnknown, fmt.Errorf("unknown fileType: %s", s)
}
}

func (t fileType) String() string {
switch t {
case fileTypeF32:
return "F32"
case fileTypeF16:
return "F16"
case fileTypeQ4_0:
return "Q4_0"
case fileTypeQ4_1:
return "Q4_1"
case fileTypeQ4_1_F16:
return "Q4_1_F16"
case fileTypeQ8_0:
return "Q8_0"
case fileTypeQ5_0:
return "Q5_0"
case fileTypeQ5_1:
return "Q5_1"
case fileTypeQ2_K:
return "Q2_K"
case fileTypeQ3_K_S:
return "Q3_K_S"
case fileTypeQ3_K_M:
return "Q3_K_M"
case fileTypeQ3_K_L:
return "Q3_K_L"
case fileTypeQ4_K_S:
return "Q4_K_S"
case fileTypeQ4_K_M:
return "Q4_K_M"
case fileTypeQ5_K_S:
return "Q5_K_S"
case fileTypeQ5_K_M:
return "Q5_K_M"
case fileTypeQ6_K:
return "Q6_K"
case fileTypeIQ2_XXS:
return "IQ2_XXS"
case fileTypeIQ2_XS:
return "IQ2_XS"
case fileTypeQ2_K_S:
return "Q2_K_S"
case fileTypeQ3_K_XS:
return "Q3_K_XS"
case fileTypeIQ3_XXS:
return "IQ3_XXS"
default:
return "unknown"
}
}

func (t fileType) Value() uint32 {
return uint32(t)
}
12 changes: 12 additions & 0 deletions llm/ggla.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {

type ggla struct {
*containerGGLA
offset int64

kv KV
tensors []*Tensor
Expand All @@ -53,6 +54,10 @@ func (llm *ggla) Tensors() Tensors {
return llm.tensors
}

func (llm *ggla) Offset() int64 {
return llm.offset
}

func (llm *ggla) decode(rs io.ReadSeeker) error {
var r uint32
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
Expand All @@ -66,6 +71,13 @@ func (llm *ggla) decode(rs io.ReadSeeker) error {
}
llm.kv["alpha"] = alpha

offset, err := rs.Seek(0, io.SeekCurrent)
if err != nil {
return err
}

llm.offset = offset

for {
var dims uint32
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
Expand Down