Documentation
¶
Index ¶
- func FormatParams(params map[string][]string) (map[string]any, error)
- type AuthorizationError
- type ChatRequest
- type ChatResponse
- type ChatResponseFunc
- type Choice
- type Client
- func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc) error
- func (c *Client) Embed(ctx context.Context, req *EmbedRequest) (*EmbedResponse, error)
- func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error)
- func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn GenerateResponseFunc) error
- func (c *Client) Heartbeat(ctx context.Context) error
- func (c *Client) List(ctx context.Context) (*ListResponse, error)
- func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error)
- func (c *Client) Pull(ctx context.Context, req *PullRequest, fn PullProgressFunc) error
- func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, error)
- func (c *Client) Version(ctx context.Context) (string, error)
- type DebugInfo
- type Duration
- type EmbedRequest
- type EmbedResponse
- type EmbeddingRequest
- type EmbeddingResponse
- type GenerateRequest
- type GenerateResponse
- type GenerateResponseFunc
- type ImageData
- type ListModelResponse
- type ListResponse
- type Message
- type Metrics
- type Modalities
- type ModelDetails
- type Options
- type ProcessModelResponse
- type ProcessResponse
- type ProgressResponse
- type PropertyType
- type PropsResponse
- type PullProgressFunc
- type PullRequest
- type Runner
- type ShowRequest
- type ShowResponse
- type StatusError
- type Tensor
- type ThinkValue
- type TokenResponse
- type Tool
- type ToolCall
- type ToolCallFunction
- type ToolCallFunctionArguments
- type ToolFunction
- type ToolFunctionParameters
- type ToolProperty
- type Tools
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type AuthorizationError ¶
type AuthorizationError struct {
StatusCode int
Status string
SigninURL string `json:"signin_url"`
}
func (AuthorizationError) Error ¶
func (e AuthorizationError) Error() string
type ChatRequest ¶
type ChatRequest struct {
// Model is the model name, as in [GenerateRequest].
Model string `json:"model"`
// Messages is the messages of the chat - can be used to keep a chat memory.
Messages []Message `json:"messages"`
// Stream enables streaming of returned responses; true by default.
Stream *bool `json:"stream,omitempty"`
// Format is the format to return the response in (e.g. "json").
Format json.RawMessage `json:"format,omitempty"`
// KeepAlive controls how long the model will stay loaded into memory
// following the request.
KeepAlive *Duration `json:"keep_alive,omitempty"`
// Tools is an optional list of tools the model has access to.
Tools `json:"tools,omitempty"`
// Options lists model-specific options.
Options map[string]any `json:"options"`
// Think controls whether thinking/reasoning models will think before
// responding. Can be a boolean (true/false) or a string ("high", "medium", "low")
// for supported models.
Think *ThinkValue `json:"think,omitempty"`
// DebugRenderOnly is a debug option that, when set to true, returns the rendered
// template instead of calling the model.
DebugRenderOnly bool `json:"_debug_render_only,omitempty"`
}
ChatRequest describes a request sent by Client.Chat.
type ChatResponse ¶
type ChatResponse struct {
// Model is the model name that generated the response.
Model string `json:"model"`
// RemoteModel is the name of the upstream model that generated the response.
RemoteModel string `json:"remote_model,omitempty"`
// RemoteHost is the URL of the upstream llama.go host that generated the response.
RemoteHost string `json:"remote_host,omitempty"`
// CreatedAt is the timestamp of the response.
CreatedAt time.Time `json:"created_at"`
// Message contains the message or part of a message from the model.
Message Message `json:"message"`
// Done specifies if the response is complete.
Done bool `json:"done"`
// DoneReason is the reason the model stopped generating text.
DoneReason string `json:"done_reason,omitempty"`
DebugInfo *DebugInfo `json:"_debug_info,omitempty"`
Metrics
}
ChatResponse is the response returned by Client.Chat. Its fields are similar to GenerateResponse.
type ChatResponseFunc ¶
type ChatResponseFunc func(ChatResponse) error
ChatResponseFunc is a function that Client.Chat invokes every time a response is received from the service. If this function returns an error, Client.Chat will stop generating and return this error.
type Client ¶
type Client struct {
// contains filtered or unexported fields
}
Client encapsulates client state for interacting with the llama.go service. Use [ClientFromEnvironment] to create new Clients.
func DefaultClient ¶
func DefaultClient() *Client
func (*Client) Chat ¶
func (c *Client) Chat(ctx context.Context, req *ChatRequest, fn ChatResponseFunc) error
Chat generates the next message in a chat. ChatRequest may contain a sequence of messages which can be used to maintain chat history with a model. fn is called for each response (there may be multiple responses, e.g. if case streaming is enabled).
func (*Client) Embed ¶
func (c *Client) Embed(ctx context.Context, req *EmbedRequest) (*EmbedResponse, error)
Embed generates embeddings from a model.
func (*Client) Embeddings ¶
func (c *Client) Embeddings(ctx context.Context, req *EmbeddingRequest) (*EmbeddingResponse, error)
Embeddings generates an embedding from a model.
func (*Client) Generate ¶
func (c *Client) Generate(ctx context.Context, req *GenerateRequest, fn GenerateResponseFunc) error
Generate generates a response for a given prompt. The req parameter should be populated with prompt details. fn is called for each response (there may be multiple responses, e.g. in case streaming is enabled).
func (*Client) Heartbeat ¶
Heartbeat checks if the server has started and is responsive; if yes, it returns nil, otherwise an error.
func (*Client) List ¶
func (c *Client) List(ctx context.Context) (*ListResponse, error)
List lists models that are available locally.
func (*Client) ListRunning ¶
func (c *Client) ListRunning(ctx context.Context) (*ProcessResponse, error)
ListRunning lists running models.
func (*Client) Pull ¶
func (c *Client) Pull(ctx context.Context, req *PullRequest, fn PullProgressFunc) error
Pull downloads a model from the llama.go library. fn is called each time progress is made on the request and can be used to display a progress bar, etc.
func (*Client) Show ¶
func (c *Client) Show(ctx context.Context, req *ShowRequest) (*ShowResponse, error)
Show obtains model information, including details, modelfile, license etc.
type DebugInfo ¶
type DebugInfo struct {
RenderedTemplate string `json:"rendered_template"`
ImageCount int `json:"image_count,omitempty"`
}
DebugInfo contains debug information for template rendering
type EmbedRequest ¶
type EmbedRequest struct {
// Model is the model name.
Model string `json:"model"`
// Input is the input to embed.
Input any `json:"input"`
// KeepAlive controls how long the model will stay loaded in memory following
// this request.
KeepAlive *Duration `json:"keep_alive,omitempty"`
// Truncate truncates the input to fit the model's max sequence length.
Truncate *bool `json:"truncate,omitempty"`
// Dimensions truncates the output embedding to the specified dimension.
Dimensions int `json:"dimensions,omitempty"`
// Options lists model-specific options.
Options map[string]any `json:"options"`
}
EmbedRequest is the request passed to Client.Embed.
type EmbedResponse ¶
type EmbedResponse struct {
Model string `json:"model"`
Embeddings [][]float32 `json:"embeddings"`
TotalDuration time.Duration `json:"total_duration,omitempty"`
LoadDuration time.Duration `json:"load_duration,omitempty"`
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
}
EmbedResponse is the response from Client.Embed.
type EmbeddingRequest ¶
type EmbeddingRequest struct {
// Model is the model name.
Model string `json:"model"`
// Prompt is the textual prompt to embed.
Prompt string `json:"prompt"`
// KeepAlive controls how long the model will stay loaded in memory following
// this request.
KeepAlive *Duration `json:"keep_alive,omitempty"`
// Options lists model-specific options.
Options map[string]any `json:"options"`
}
EmbeddingRequest is the request passed to Client.Embeddings.
type EmbeddingResponse ¶
type EmbeddingResponse struct {
Embedding []float64 `json:"embedding"`
}
EmbeddingResponse is the response from Client.Embeddings.
type GenerateRequest ¶
type GenerateRequest struct {
// Model is the model name
Model string `json:"model"`
// Prompt is the textual prompt to send to the model.
Prompt string `json:"prompt"`
// Suffix is the text that comes after the inserted text.
Suffix string `json:"suffix"`
// System overrides the model's default system message/prompt.
System string `json:"system"`
// Template overrides the model's default prompt template.
Template string `json:"template"`
// Context is the context parameter returned from a previous call to
// [Client.Generate]. It can be used to keep a short conversational memory.
Context []int `json:"context,omitempty"`
// Stream specifies whether the response is streaming; it is true by default.
Stream *bool `json:"stream,omitempty"`
// Raw set to true means that no formatting will be applied to the prompt.
Raw bool `json:"raw,omitempty"`
// Format specifies the format to return a response in.
Format json.RawMessage `json:"format,omitempty"`
// KeepAlive controls how long the model will stay loaded in memory following
// this request.
KeepAlive *Duration `json:"keep_alive,omitempty"`
// Images is an optional list of raw image bytes accompanying this
// request, for multimodal models.
Images []ImageData `json:"images,omitempty"`
// Options lists model-specific options. For example, temperature can be
// set through this field, if the model supports it.
Options map[string]any `json:"options"`
// Think controls whether thinking/reasoning models will think before
// responding. Can be a boolean (true/false) or a string ("high", "medium", "low")
// for supported models. Needs to be a pointer so we can distinguish between false
// (request that thinking _not_ be used) and unset (use the old behavior
// before this option was introduced)
Think *ThinkValue `json:"think,omitempty"`
// DebugRenderOnly is a debug option that, when set to true, returns the rendered
// template instead of calling the model.
DebugRenderOnly bool `json:"_debug_render_only,omitempty"`
}
GenerateRequest describes a request sent by Client.Generate. While you have to specify the Model and Prompt fields, all the other fields have reasonable defaults for basic uses.
type GenerateResponse ¶
type GenerateResponse struct {
// Model is the model name that generated the response.
Model string `json:"model"`
// RemoteModel is the name of the upstream model that generated the response.
RemoteModel string `json:"remote_model,omitempty"`
// RemoteHost is the URL of the upstream llama.go host that generated the response.
RemoteHost string `json:"remote_host,omitempty"`
// CreatedAt is the timestamp of the response.
CreatedAt int `json:"created"`
// Response is the textual response itself.
Choices []Choice `json:"choices"`
// Thinking contains the text that was inside thinking tags in the
// original model output when ChatRequest.Think is enabled.
Thinking string `json:"thinking,omitempty"`
// Context is an encoding of the conversation used in this response; this
// can be sent in the next request to keep a conversational memory.
Context []int `json:"context,omitempty"`
Metrics
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
DebugInfo *DebugInfo `json:"_debug_info,omitempty"`
SystemFingerprint string `json:"system_fingerprint,omitempty"`
ID string `json:"id,omitempty"`
Object string `json:"object,omitempty"`
}
GenerateResponse is the response passed into GenerateResponseFunc.
func (*GenerateResponse) Content ¶
func (gr *GenerateResponse) Content() string
func (*GenerateResponse) Done ¶
func (gr *GenerateResponse) Done() bool
type GenerateResponseFunc ¶
type GenerateResponseFunc func(GenerateResponse) error
GenerateResponseFunc is a function that Client.Generate invokes every time a response is received from the service. If this function returns an error, Client.Generate will stop generating and return this error.
type ListModelResponse ¶
type ListModelResponse struct {
Name string `json:"name"`
Model string `json:"model"`
RemoteModel string `json:"remote_model,omitempty"`
RemoteHost string `json:"remote_host,omitempty"`
ModifiedAt time.Time `json:"modified_at"`
Size int64 `json:"size"`
Digest string `json:"digest"`
Details ModelDetails `json:"details,omitempty"`
}
ListModelResponse is a single model description in ListResponse.
type ListResponse ¶
type ListResponse struct {
Models []ListModelResponse `json:"models"`
}
ListResponse is the response from Client.List.
type Message ¶
type Message struct {
Role string `json:"role"`
Content string `json:"content"`
// Thinking contains the text that was inside thinking tags in the
// original model output when ChatRequest.Think is enabled.
Thinking string `json:"thinking,omitempty"`
Images []ImageData `json:"images,omitempty"`
ToolCalls []ToolCall `json:"tool_calls,omitempty"`
ToolName string `json:"tool_name,omitempty"`
}
Message is a single message in a chat sequence. The message contains the role ("system", "user", or "assistant"), the content and an optional list of images.
func (*Message) UnmarshalJSON ¶
type Metrics ¶
type Metrics struct {
TotalDuration time.Duration `json:"total_duration,omitempty"`
LoadDuration time.Duration `json:"load_duration,omitempty"`
PromptEvalCount int `json:"prompt_eval_count,omitempty"`
PromptEvalDuration time.Duration `json:"prompt_eval_duration,omitempty"`
EvalCount int `json:"eval_count,omitempty"`
EvalDuration time.Duration `json:"eval_duration,omitempty"`
}
type Modalities ¶
type ModelDetails ¶
type ModelDetails struct {
ParentModel string `json:"parent_model"`
Format string `json:"format"`
Family string `json:"family"`
Families []string `json:"families"`
ParameterSize string `json:"parameter_size"`
QuantizationLevel string `json:"quantization_level"`
}
ModelDetails provides details about a model.
type Options ¶
type Options struct {
Runner
// Predict options used at runtime
NumKeep int `json:"num_keep,omitempty"`
Seed int `json:"seed,omitempty"`
NumPredict int `json:"num_predict,omitempty"`
TopK int `json:"top_k,omitempty"`
TopP float32 `json:"top_p,omitempty"`
MinP float32 `json:"min_p,omitempty"`
TypicalP float32 `json:"typical_p,omitempty"`
RepeatLastN int `json:"repeat_last_n,omitempty"`
Temperature float32 `json:"temperature,omitempty"`
RepeatPenalty float32 `json:"repeat_penalty,omitempty"`
PresencePenalty float32 `json:"presence_penalty,omitempty"`
FrequencyPenalty float32 `json:"frequency_penalty,omitempty"`
Stop []string `json:"stop,omitempty"`
}
Options specified in GenerateRequest. If you add a new option here, also add it to the API docs.
func DefaultOptions ¶
func DefaultOptions() Options
DefaultOptions is the default set of options for GenerateRequest; these values are used unless the user specifies other values explicitly.
type ProcessModelResponse ¶
type ProcessModelResponse struct {
Name string `json:"name"`
Model string `json:"model"`
Size int64 `json:"size"`
Digest string `json:"digest"`
Details ModelDetails `json:"details,omitempty"`
ExpiresAt time.Time `json:"expires_at"`
SizeVRAM int64 `json:"size_vram"`
ContextLength int `json:"context_length"`
}
ProcessModelResponse is a single model description in ProcessResponse.
type ProcessResponse ¶
type ProcessResponse struct {
Models []ProcessModelResponse `json:"models"`
}
ProcessResponse is the response from [Client.Process].
type ProgressResponse ¶
type ProgressResponse struct {
Status string `json:"status"`
Digest string `json:"digest,omitempty"`
Total int64 `json:"total,omitempty"`
Completed int64 `json:"completed,omitempty"`
}
ProgressResponse is the response passed to progress functions like PullProgressFunc and [PushProgressFunc].
type PropertyType ¶
type PropertyType []string
PropertyType can be either a string or an array of strings
func (PropertyType) MarshalJSON ¶
func (pt PropertyType) MarshalJSON() ([]byte, error)
MarshalJSON implements the json.Marshaler interface
func (PropertyType) String ¶
func (pt PropertyType) String() string
String returns a string representation of the PropertyType
func (*PropertyType) UnmarshalJSON ¶
func (pt *PropertyType) UnmarshalJSON(data []byte) error
UnmarshalJSON implements the json.Unmarshaler interface
type PropsResponse ¶
type PropsResponse struct {
BuildInfo string `json:"build_info"`
ModelPath string `json:"model_path"`
NCtx int64 `json:"n_ctx"`
Modalities Modalities `json:"modalities"`
}
type PullProgressFunc ¶
type PullProgressFunc func(ProgressResponse) error
PullProgressFunc is a function that Client.Pull invokes every time there is progress with a "pull" request sent to the service. If this function returns an error, Client.Pull will stop the process and return this error.
type PullRequest ¶
PullRequest is the request passed to Client.Pull.
type Runner ¶
type Runner struct {
NumCtx int `json:"num_ctx,omitempty"`
NumBatch int `json:"num_batch,omitempty"`
NumGPU int `json:"num_gpu,omitempty"`
MainGPU int `json:"main_gpu,omitempty"`
UseMMap *bool `json:"use_mmap,omitempty"`
NumThread int `json:"num_thread,omitempty"`
}
Runner options which must be set when the model is loaded into memory
type ShowRequest ¶
type ShowRequest struct {
Model string `json:"model,omitempty"`
System string `json:"system"`
// Template is deprecated
Template string `json:"template"`
Verbose bool `json:"verbose"`
Options map[string]any `json:"options"`
}
ShowRequest is the request passed to Client.Show.
type ShowResponse ¶
type ShowResponse struct {
License string `json:"license,omitempty"`
Modelfile string `json:"modelfile,omitempty"`
Parameters string `json:"parameters,omitempty"`
Template string `json:"template,omitempty"`
System string `json:"system,omitempty"`
Renderer string `json:"renderer,omitempty"`
Parser string `json:"parser,omitempty"`
Details ModelDetails `json:"details,omitempty"`
Messages []Message `json:"messages,omitempty"`
RemoteModel string `json:"remote_model,omitempty"`
RemoteHost string `json:"remote_host,omitempty"`
ModelInfo map[string]any `json:"model_info,omitempty"`
ProjectorInfo map[string]any `json:"projector_info,omitempty"`
Tensors []Tensor `json:"tensors,omitempty"`
Capabilities []model.Capability `json:"capabilities,omitempty"`
ModifiedAt time.Time `json:"modified_at,omitempty"`
}
ShowResponse is the response returned from Client.Show.
type StatusError ¶
StatusError is an error with an HTTP status code and message.
func (StatusError) Error ¶
func (e StatusError) Error() string
type Tensor ¶
type Tensor struct {
Name string `json:"name"`
Type string `json:"type"`
Shape []uint64 `json:"shape"`
}
Tensor describes the metadata for a given tensor.
type ThinkValue ¶
type ThinkValue struct {
// Value can be a bool or string
Value interface{}
}
ThinkValue represents a value that can be a boolean or a string ("high", "medium", "low")
func (*ThinkValue) Bool ¶
func (t *ThinkValue) Bool() bool
Bool returns the value as a bool (true if enabled in any way)
func (*ThinkValue) IsBool ¶
func (t *ThinkValue) IsBool() bool
IsBool returns true if the value is a boolean
func (*ThinkValue) IsString ¶
func (t *ThinkValue) IsString() bool
IsString returns true if the value is a string
func (*ThinkValue) IsValid ¶
func (t *ThinkValue) IsValid() bool
IsValid checks if the ThinkValue is valid
func (*ThinkValue) MarshalJSON ¶
func (t *ThinkValue) MarshalJSON() ([]byte, error)
MarshalJSON implements json.Marshaler
func (*ThinkValue) String ¶
func (t *ThinkValue) String() string
String returns the value as a string
func (*ThinkValue) UnmarshalJSON ¶
func (t *ThinkValue) UnmarshalJSON(data []byte) error
UnmarshalJSON implements json.Unmarshaler
type TokenResponse ¶
type TokenResponse struct {
Token string `json:"token"`
}
type Tool ¶
type Tool struct {
Type string `json:"type"`
Items any `json:"items,omitempty"`
Function ToolFunction `json:"function"`
}
type ToolCall ¶
type ToolCall struct {
Function ToolCallFunction `json:"function"`
}
type ToolCallFunction ¶
type ToolCallFunction struct {
Index int `json:"index,omitempty"`
Name string `json:"name"`
Arguments ToolCallFunctionArguments `json:"arguments"`
}
type ToolCallFunctionArguments ¶
func (*ToolCallFunctionArguments) String ¶
func (t *ToolCallFunctionArguments) String() string
type ToolFunction ¶
type ToolFunction struct {
Name string `json:"name"`
Description string `json:"description"`
Parameters ToolFunctionParameters `json:"parameters"`
}
func (*ToolFunction) String ¶
func (t *ToolFunction) String() string
type ToolFunctionParameters ¶
type ToolFunctionParameters struct {
Type string `json:"type"`
Defs any `json:"$defs,omitempty"`
Items any `json:"items,omitempty"`
Required []string `json:"required"`
Properties map[string]ToolProperty `json:"properties"`
}
func (*ToolFunctionParameters) String ¶
func (t *ToolFunctionParameters) String() string
type ToolProperty ¶
type ToolProperty struct {
AnyOf []ToolProperty `json:"anyOf,omitempty"`
Type PropertyType `json:"type"`
Items any `json:"items,omitempty"`
Description string `json:"description"`
Enum []any `json:"enum,omitempty"`
}
func (ToolProperty) ToTypeScriptType ¶
func (tp ToolProperty) ToTypeScriptType() string
ToTypeScriptType converts a ToolProperty to a TypeScript type string