api

package
v0.0.0-...-2e2e86e Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jan 11, 2026 License: MIT Imports: 21 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func FormatParams

func FormatParams(params map[string][]string) (map[string]any, error)

FormatParams converts specified parameter options to their correct types

Types

type AuthorizationError

type AuthorizationError struct {
	StatusCode int
	Status     string
	SigninURL  string `json:"signin_url"`
}

func (AuthorizationError) Error

func (e AuthorizationError) Error() string

type ChatRequest

type ChatRequest struct {
	// Model is the model name, as in [GenerateRequest].
	Model string `json:"model"`

	// Messages is the messages of the chat - can be used to keep a chat memory.
	Messages []Message `json:"messages"`

	// Stream enables streaming of returned responses; true by default.
	Stream *bool `json:"stream,omitempty"`

	// Format is the format to return the response in (e.g. "json").
	Format json.RawMessage `json:"format,omitempty"`

	// KeepAlive controls how long the model will stay loaded into memory
	// following the request.
	KeepAlive *Duration `json:"keep_alive,omitempty"`

	// Tools is an optional list of tools the model has access to.
	Tools `json:"tools,omitempty"`

	// Options lists model-specific options.
	Options map[string]any `json:"options"`

	// Think controls whether thinking/reasoning models will think before
	// responding. Can be a boolean (true/false) or a string ("high", "medium", "low")
	// for supported models.
	Think *ThinkValue `json:"think,omitempty"`

	// DebugRenderOnly is a debug option that, when set to true, returns the rendered
	// template instead of calling the model.
	DebugRenderOnly bool `json:"_debug_render_only,omitempty"`
}

ChatRequest describes a request sent by Client.Chat.

type ChatResponse

type ChatResponse struct {
	// Model is the model name that generated the response.
	Model string `json:"model"`

	// RemoteModel is the name of the upstream model that generated the response.
	RemoteModel string `json:"remote_model,omitempty"`

	// RemoteHost is the URL of the upstream llama.go host that generated the response.
	RemoteHost string `json:"remote_host,omitempty"`

	// CreatedAt is the timestamp of the response.
	CreatedAt time.Time `json:"created_at"`

	// Message contains the message or part of a message from the model.
	Message Message `json:"message"`

	// Done specifies if the response is complete.
	Done bool `json:"done"`

	// DoneReason is the reason the model stopped generating text.
	DoneReason string `json:"done_reason,omitempty"`

	DebugInfo *DebugInfo `json:"_debug_info,omitempty"`

	Metrics
}

ChatResponse is the response returned by Client.Chat. Its fields are similar to GenerateResponse.

type ChatResponseFunc

type ChatResponseFunc func(ChatResponse) error

ChatResponseFunc is a function that Client.Chat invokes every time a response is received from the service. If this function returns an error, Client.Chat will stop generating and return this error.

type Choice

type Choice struct {
	Text         string      `json:"text"`
	Index        int         `json:"index"`
	Logprobs     interface{} `json:"logprobs"`
	FinishReason string      `json:"finish_reason"`
}

type Client

type Client struct {
	// contains filtered or unexported fields
}

Client encapsulates client state for interacting with the llama.go service. Use [ClientFromEnvironment] to create new Clients.

func DefaultClient

func DefaultClient() *Client

func NewClient

func NewClient(base *url.URL, http *http.Client) *Client

func (*Client) Chat

func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc) error

Chat generates the next message in a chat. ChatRequest may contain a sequence of messages which can be used to maintain chat history with a model. fn is called for each response (there may be multiple responses, e.g. if case streaming is enabled).

func (*Client) Embed

func (c *Client) Embed(ctx context.Context, req *EmbedRequest) (*EmbedResponse, error)

Embed generates embeddings from a model.

func (*Client) Embeddings

func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error)

Embeddings generates an embedding from a model.

func (*Client) Generate

func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn GenerateResponseFunc) error

Generate generates a response for a given prompt. The req parameter should be populated with prompt details. fn is called for each response (there may be multiple responses, e.g. in case streaming is enabled).

func (*Client) Heartbeat

func (c *Client) Heartbeat(ctx context.Context) error

Heartbeat checks if the server has started and is responsive; if yes, it returns nil, otherwise an error.

func (*Client) List

func (c *Client) List(ctx context.Context) (*ListResponse, error)

List lists models that are available locally.

func (*Client) ListRunning

func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error)

ListRunning lists running models.

func (*Client) Pull

func (c *Client) Pull(ctx context.Context, req *PullRequest, fn PullProgressFunc) error

Pull downloads a model from the llama.go library. fn is called each time progress is made on the request and can be used to display a progress bar, etc.

func (*Client) Show

func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, error)

Show obtains model information, including details, modelfile, license etc.

func (*Client) Version

func (c *Client) Version(ctx context.Context) (string, error)

Version returns the llama.go server version as a string.

type DebugInfo

type DebugInfo struct {
	RenderedTemplate string `json:"rendered_template"`
	ImageCount       int    `json:"image_count,omitempty"`
}

DebugInfo contains debug information for template rendering

type Duration

type Duration struct {
	time.Duration
}

func (Duration) MarshalJSON

func (d Duration) MarshalJSON() ([]byte, error)

func (*Duration) UnmarshalJSON

func (d *Duration) UnmarshalJSON(b []byte) (err error)

type EmbedRequest

type EmbedRequest struct {
	// Model is the model name.
	Model string `json:"model"`

	// Input is the input to embed.
	Input any `json:"input"`

	// KeepAlive controls how long the model will stay loaded in memory following
	// this request.
	KeepAlive *Duration `json:"keep_alive,omitempty"`

	// Truncate truncates the input to fit the model's max sequence length.
	Truncate *bool `json:"truncate,omitempty"`

	// Dimensions truncates the output embedding to the specified dimension.
	Dimensions int `json:"dimensions,omitempty"`

	// Options lists model-specific options.
	Options map[string]any `json:"options"`
}

EmbedRequest is the request passed to Client.Embed.

type EmbedResponse

type EmbedResponse struct {
	Model      string      `json:"model"`
	Embeddings [][]float32 `json:"embeddings"`

	TotalDuration   time.Duration `json:"total_duration,omitempty"`
	LoadDuration    time.Duration `json:"load_duration,omitempty"`
	PromptEvalCount int           `json:"prompt_eval_count,omitempty"`
}

EmbedResponse is the response from Client.Embed.

type EmbeddingRequest

type EmbeddingRequest struct {
	// Model is the model name.
	Model string `json:"model"`

	// Prompt is the textual prompt to embed.
	Prompt string `json:"prompt"`

	// KeepAlive controls how long the model will stay loaded in memory following
	// this request.
	KeepAlive *Duration `json:"keep_alive,omitempty"`

	// Options lists model-specific options.
	Options map[string]any `json:"options"`
}

EmbeddingRequest is the request passed to Client.Embeddings.

type EmbeddingResponse

type EmbeddingResponse struct {
	Embedding []float64 `json:"embedding"`
}

EmbeddingResponse is the response from Client.Embeddings.

type GenerateRequest

type GenerateRequest struct {
	// Model is the model name
	Model string `json:"model"`

	// Prompt is the textual prompt to send to the model.
	Prompt string `json:"prompt"`

	// Suffix is the text that comes after the inserted text.
	Suffix string `json:"suffix"`

	// System overrides the model's default system message/prompt.
	System string `json:"system"`

	// Template overrides the model's default prompt template.
	Template string `json:"template"`

	// Context is the context parameter returned from a previous call to
	// [Client.Generate]. It can be used to keep a short conversational memory.
	Context []int `json:"context,omitempty"`

	// Stream specifies whether the response is streaming; it is true by default.
	Stream *bool `json:"stream,omitempty"`

	// Raw set to true means that no formatting will be applied to the prompt.
	Raw bool `json:"raw,omitempty"`

	// Format specifies the format to return a response in.
	Format json.RawMessage `json:"format,omitempty"`

	// KeepAlive controls how long the model will stay loaded in memory following
	// this request.
	KeepAlive *Duration `json:"keep_alive,omitempty"`

	// Images is an optional list of raw image bytes accompanying this
	// request, for multimodal models.
	Images []ImageData `json:"images,omitempty"`

	// Options lists model-specific options. For example, temperature can be
	// set through this field, if the model supports it.
	Options map[string]any `json:"options"`

	// Think controls whether thinking/reasoning models will think before
	// responding. Can be a boolean (true/false) or a string ("high", "medium", "low")
	// for supported models. Needs to be a pointer so we can distinguish between false
	// (request that thinking _not_ be used) and unset (use the old behavior
	// before this option was introduced)
	Think *ThinkValue `json:"think,omitempty"`

	// DebugRenderOnly is a debug option that, when set to true, returns the rendered
	// template instead of calling the model.
	DebugRenderOnly bool `json:"_debug_render_only,omitempty"`
}

GenerateRequest describes a request sent by Client.Generate. While you have to specify the Model and Prompt fields, all the other fields have reasonable defaults for basic uses.

type GenerateResponse

type GenerateResponse struct {
	// Model is the model name that generated the response.
	Model string `json:"model"`

	// RemoteModel is the name of the upstream model that generated the response.
	RemoteModel string `json:"remote_model,omitempty"`

	// RemoteHost is the URL of the upstream llama.go host that generated the response.
	RemoteHost string `json:"remote_host,omitempty"`

	// CreatedAt is the timestamp of the response.
	CreatedAt int `json:"created"`

	// Response is the textual response itself.
	Choices []Choice `json:"choices"`

	// Thinking contains the text that was inside thinking tags in the
	// original model output when ChatRequest.Think is enabled.
	Thinking string `json:"thinking,omitempty"`

	// Context is an encoding of the conversation used in this response; this
	// can be sent in the next request to keep a conversational memory.
	Context []int `json:"context,omitempty"`

	Metrics

	ToolCalls []ToolCall `json:"tool_calls,omitempty"`

	DebugInfo *DebugInfo `json:"_debug_info,omitempty"`

	SystemFingerprint string `json:"system_fingerprint,omitempty"`
	ID                string `json:"id,omitempty"`
	Object            string `json:"object,omitempty"`
}

GenerateResponse is the response passed into GenerateResponseFunc.

func (*GenerateResponse) Content

func (gr *GenerateResponse) Content() string

func (*GenerateResponse) Done

func (gr *GenerateResponse) Done() bool

type GenerateResponseFunc

type GenerateResponseFunc func(GenerateResponse) error

GenerateResponseFunc is a function that Client.Generate invokes every time a response is received from the service. If this function returns an error, Client.Generate will stop generating and return this error.

type ImageData

type ImageData []byte

ImageData represents the raw binary data of an image file.

type ListModelResponse

type ListModelResponse struct {
	Name        string       `json:"name"`
	Model       string       `json:"model"`
	RemoteModel string       `json:"remote_model,omitempty"`
	RemoteHost  string       `json:"remote_host,omitempty"`
	ModifiedAt  time.Time    `json:"modified_at"`
	Size        int64        `json:"size"`
	Digest      string       `json:"digest"`
	Details     ModelDetails `json:"details,omitempty"`
}

ListModelResponse is a single model description in ListResponse.

type ListResponse

type ListResponse struct {
	Models []ListModelResponse `json:"models"`
}

ListResponse is the response from Client.List.

type Message

type Message struct {
	Role    string `json:"role"`
	Content string `json:"content"`
	// Thinking contains the text that was inside thinking tags in the
	// original model output when ChatRequest.Think is enabled.
	Thinking  string      `json:"thinking,omitempty"`
	Images    []ImageData `json:"images,omitempty"`
	ToolCalls []ToolCall  `json:"tool_calls,omitempty"`
	ToolName  string      `json:"tool_name,omitempty"`
}

Message is a single message in a chat sequence. The message contains the role ("system", "user", or "assistant"), the content and an optional list of images.

func (*Message) UnmarshalJSON

func (m *Message) UnmarshalJSON(b []byte) error

type Metrics

type Metrics struct {
	TotalDuration      time.Duration `json:"total_duration,omitempty"`
	LoadDuration       time.Duration `json:"load_duration,omitempty"`
	PromptEvalCount    int           `json:"prompt_eval_count,omitempty"`
	PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
	EvalCount          int           `json:"eval_count,omitempty"`
	EvalDuration       time.Duration `json:"eval_duration,omitempty"`
}

func (*Metrics) Summary

func (m *Metrics) Summary()

type Modalities

type Modalities struct {
	Vision bool `json:"vision"`
	Audio  bool `json:"audio"`
}

type ModelDetails

type ModelDetails struct {
	ParentModel       string   `json:"parent_model"`
	Format            string   `json:"format"`
	Family            string   `json:"family"`
	Families          []string `json:"families"`
	ParameterSize     string   `json:"parameter_size"`
	QuantizationLevel string   `json:"quantization_level"`
}

ModelDetails provides details about a model.

type Options

type Options struct {
	Runner

	// Predict options used at runtime
	NumKeep          int      `json:"num_keep,omitempty"`
	Seed             int      `json:"seed,omitempty"`
	NumPredict       int      `json:"num_predict,omitempty"`
	TopK             int      `json:"top_k,omitempty"`
	TopP             float32  `json:"top_p,omitempty"`
	MinP             float32  `json:"min_p,omitempty"`
	TypicalP         float32  `json:"typical_p,omitempty"`
	RepeatLastN      int      `json:"repeat_last_n,omitempty"`
	Temperature      float32  `json:"temperature,omitempty"`
	RepeatPenalty    float32  `json:"repeat_penalty,omitempty"`
	PresencePenalty  float32  `json:"presence_penalty,omitempty"`
	FrequencyPenalty float32  `json:"frequency_penalty,omitempty"`
	Stop             []string `json:"stop,omitempty"`
}

Options specified in GenerateRequest. If you add a new option here, also add it to the API docs.

func DefaultOptions

func DefaultOptions() Options

DefaultOptions is the default set of options for GenerateRequest; these values are used unless the user specifies other values explicitly.

func (*Options) FromMap

func (opts *Options) FromMap(m map[string]any) error

type ProcessModelResponse

type ProcessModelResponse struct {
	Name          string       `json:"name"`
	Model         string       `json:"model"`
	Size          int64        `json:"size"`
	Digest        string       `json:"digest"`
	Details       ModelDetails `json:"details,omitempty"`
	ExpiresAt     time.Time    `json:"expires_at"`
	SizeVRAM      int64        `json:"size_vram"`
	ContextLength int          `json:"context_length"`
}

ProcessModelResponse is a single model description in ProcessResponse.

type ProcessResponse

type ProcessResponse struct {
	Models []ProcessModelResponse `json:"models"`
}

ProcessResponse is the response from [Client.Process].

type ProgressResponse

type ProgressResponse struct {
	Status    string `json:"status"`
	Digest    string `json:"digest,omitempty"`
	Total     int64  `json:"total,omitempty"`
	Completed int64  `json:"completed,omitempty"`
}

ProgressResponse is the response passed to progress functions like PullProgressFunc and [PushProgressFunc].

type PropertyType

type PropertyType []string

PropertyType can be either a string or an array of strings

func (PropertyType) MarshalJSON

func (pt PropertyType) MarshalJSON() ([]byte, error)

MarshalJSON implements the json.Marshaler interface

func (PropertyType) String

func (pt PropertyType) String() string

String returns a string representation of the PropertyType

func (*PropertyType) UnmarshalJSON

func (pt *PropertyType) UnmarshalJSON(data []byte) error

UnmarshalJSON implements the json.Unmarshaler interface

type PropsResponse

type PropsResponse struct {
	BuildInfo  string     `json:"build_info"`
	ModelPath  string     `json:"model_path"`
	NCtx       int64      `json:"n_ctx"`
	Modalities Modalities `json:"modalities"`
}

type PullProgressFunc

type PullProgressFunc func(ProgressResponse) error

PullProgressFunc is a function that Client.Pull invokes every time there is progress with a "pull" request sent to the service. If this function returns an error, Client.Pull will stop the process and return this error.

type PullRequest

type PullRequest struct {
	Model  string `json:"model"`
	Stream *bool  `json:"stream,omitempty"`
}

PullRequest is the request passed to Client.Pull.

type Runner

type Runner struct {
	NumCtx    int   `json:"num_ctx,omitempty"`
	NumBatch  int   `json:"num_batch,omitempty"`
	NumGPU    int   `json:"num_gpu,omitempty"`
	MainGPU   int   `json:"main_gpu,omitempty"`
	UseMMap   *bool `json:"use_mmap,omitempty"`
	NumThread int   `json:"num_thread,omitempty"`
}

Runner options which must be set when the model is loaded into memory

type ShowRequest

type ShowRequest struct {
	Model  string `json:"model,omitempty"`
	System string `json:"system"`

	// Template is deprecated
	Template string `json:"template"`
	Verbose  bool   `json:"verbose"`

	Options map[string]any `json:"options"`
}

ShowRequest is the request passed to Client.Show.

type ShowResponse

type ShowResponse struct {
	License       string             `json:"license,omitempty"`
	Modelfile     string             `json:"modelfile,omitempty"`
	Parameters    string             `json:"parameters,omitempty"`
	Template      string             `json:"template,omitempty"`
	System        string             `json:"system,omitempty"`
	Renderer      string             `json:"renderer,omitempty"`
	Parser        string             `json:"parser,omitempty"`
	Details       ModelDetails       `json:"details,omitempty"`
	Messages      []Message          `json:"messages,omitempty"`
	RemoteModel   string             `json:"remote_model,omitempty"`
	RemoteHost    string             `json:"remote_host,omitempty"`
	ModelInfo     map[string]any     `json:"model_info,omitempty"`
	ProjectorInfo map[string]any     `json:"projector_info,omitempty"`
	Tensors       []Tensor           `json:"tensors,omitempty"`
	Capabilities  []model.Capability `json:"capabilities,omitempty"`
	ModifiedAt    time.Time          `json:"modified_at,omitempty"`
}

ShowResponse is the response returned from Client.Show.

type StatusError

type StatusError struct {
	StatusCode   int
	Status       string
	ErrorMessage string `json:"error"`
}

StatusError is an error with an HTTP status code and message.

func (StatusError) Error

func (e StatusError) Error() string

type Tensor

type Tensor struct {
	Name  string   `json:"name"`
	Type  string   `json:"type"`
	Shape []uint64 `json:"shape"`
}

Tensor describes the metadata for a given tensor.

type ThinkValue

type ThinkValue struct {
	// Value can be a bool or string
	Value interface{}
}

ThinkValue represents a value that can be a boolean or a string ("high", "medium", "low")

func (*ThinkValue) Bool

func (t *ThinkValue) Bool() bool

Bool returns the value as a bool (true if enabled in any way)

func (*ThinkValue) IsBool

func (t *ThinkValue) IsBool() bool

IsBool returns true if the value is a boolean

func (*ThinkValue) IsString

func (t *ThinkValue) IsString() bool

IsString returns true if the value is a string

func (*ThinkValue) IsValid

func (t *ThinkValue) IsValid() bool

IsValid checks if the ThinkValue is valid

func (*ThinkValue) MarshalJSON

func (t *ThinkValue) MarshalJSON() ([]byte, error)

MarshalJSON implements json.Marshaler

func (*ThinkValue) String

func (t *ThinkValue) String() string

String returns the value as a string

func (*ThinkValue) UnmarshalJSON

func (t *ThinkValue) UnmarshalJSON(data []byte) error

UnmarshalJSON implements json.Unmarshaler

type TokenResponse

type TokenResponse struct {
	Token string `json:"token"`
}

type Tool

type Tool struct {
	Type     string       `json:"type"`
	Items    any          `json:"items,omitempty"`
	Function ToolFunction `json:"function"`
}

func (Tool) String

func (t Tool) String() string

type ToolCall

type ToolCall struct {
	Function ToolCallFunction `json:"function"`
}

type ToolCallFunction

type ToolCallFunction struct {
	Index     int                       `json:"index,omitempty"`
	Name      string                    `json:"name"`
	Arguments ToolCallFunctionArguments `json:"arguments"`
}

type ToolCallFunctionArguments

type ToolCallFunctionArguments map[string]any

func (*ToolCallFunctionArguments) String

func (t *ToolCallFunctionArguments) String() string

type ToolFunction

type ToolFunction struct {
	Name        string                 `json:"name"`
	Description string                 `json:"description"`
	Parameters  ToolFunctionParameters `json:"parameters"`
}

func (*ToolFunction) String

func (t *ToolFunction) String() string

type ToolFunctionParameters

type ToolFunctionParameters struct {
	Type       string                  `json:"type"`
	Defs       any                     `json:"$defs,omitempty"`
	Items      any                     `json:"items,omitempty"`
	Required   []string                `json:"required"`
	Properties map[string]ToolProperty `json:"properties"`
}

func (*ToolFunctionParameters) String

func (t *ToolFunctionParameters) String() string

type ToolProperty

type ToolProperty struct {
	AnyOf       []ToolProperty `json:"anyOf,omitempty"`
	Type        PropertyType   `json:"type"`
	Items       any            `json:"items,omitempty"`
	Description string         `json:"description"`
	Enum        []any          `json:"enum,omitempty"`
}

func (ToolProperty) ToTypeScriptType

func (tp ToolProperty) ToTypeScriptType() string

ToTypeScriptType converts a ToolProperty to a TypeScript type string

type Tools

type Tools []Tool

func (Tools) String

func (t Tools) String() string

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL