Documentation
¶
Index ¶
- func New(c fs.Config) (model.Model, error)
- type Grid
- type ImageProcessor
- type Layer
- type MLP
- type Model
- func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) ([]input.Multimodal, error)
- func (m *Model) Forward(ctx ml.Context, batch input.Batch) (ml.Tensor, error)
- func (m *Model) PixelValues(ctx ml.Context, multimodalData []byte) (ml.Tensor, *Grid, error)
- func (m *Model) PostTokenize(inputs []*input.Input) ([]*input.Input, error)
- type PatchEmbedding
- type SelfAttention
- type TextModel
- type TextOptions
- type VisionEncoderLayer
- type VisionMLP
- type VisionModel
- type VisionModelOptions
- type VisionPatchMerger
- type VisionSelfAttention
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
Types ¶
type ImageProcessor ¶
type ImageProcessor struct {
// contains filtered or unexported fields
}
ImageProcessor contains configuration for the Qwen 2.5 VL image processing
func (*ImageProcessor) ProcessImage ¶
func (*ImageProcessor) SmartResize ¶
func (p *ImageProcessor) SmartResize(height, width int) (int, int)
SmartResize implements the smart resize algorithm
type Layer ¶
type Layer struct {
AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
SelfAttention *SelfAttention
MLPNorm *nn.RMSNorm `gguf:"ffn_norm"`
MLP *MLP
}
Layer represents a single transformer layer combining self-attention and feed-forward components
type MLP ¶
type MLP struct {
Up *nn.Linear `gguf:"ffn_up"`
Down *nn.Linear `gguf:"ffn_down"`
Gate *nn.Linear `gguf:"ffn_gate"`
}
MLP implements the feed-forward network component with SwiGLU activation
type Model ¶
type Model struct {
model.Base
model.BytePairEncoding
*TextModel
*VisionModel `gguf:"v"`
ImageProcessor
}
func (*Model) EncodeMultimodal ¶
func (*Model) PixelValues ¶
type PatchEmbedding ¶
type PatchEmbedding struct {
PatchConv0 *nn.Conv2D `gguf:"patch_embd_0"`
PatchConv1 *nn.Conv2D `gguf:"patch_embd_1"`
}
func (*PatchEmbedding) Forward ¶
func (pe *PatchEmbedding) Forward(ctx ml.Context, pixelValues ml.Tensor, opts *VisionModelOptions) ml.Tensor
type SelfAttention ¶
type SelfAttention struct {
Query *nn.Linear `gguf:"attn_q"`
Key *nn.Linear `gguf:"attn_k"`
Value *nn.Linear `gguf:"attn_v"`
Output *nn.Linear `gguf:"attn_output"`
}
SelfAttention implements the multi-head self-attention mechanism with separate projections for query, key, value and output transformations
type TextModel ¶
type TextModel struct {
TokenEmbedding *nn.Embedding `gguf:"token_embd"`
Layers []Layer `gguf:"blk"`
OutputNorm *nn.RMSNorm `gguf:"output_norm"`
Output *nn.Linear `gguf:"output,alt:token_embd"`
*TextOptions
}
func NewTextModel ¶
type TextOptions ¶
type TextOptions struct {
// contains filtered or unexported fields
}
type VisionEncoderLayer ¶
type VisionEncoderLayer struct {
Norm1 *nn.RMSNorm `gguf:"ln1"`
SelfAttention *VisionSelfAttention
Norm2 *nn.RMSNorm `gguf:"ln2"`
MLP *VisionMLP
}
func (*VisionEncoderLayer) Forward ¶
func (e *VisionEncoderLayer) Forward(ctx ml.Context, hiddenStates, cos, sin, mask ml.Tensor, opts *VisionModelOptions) ml.Tensor
type VisionMLP ¶
type VisionMLP struct {
Gate *nn.Linear `gguf:"ffn_gate"`
Up *nn.Linear `gguf:"ffn_up"`
Down *nn.Linear `gguf:"ffn_down"`
}
VisionMLP implements the multi-layer perceptron
type VisionModel ¶
type VisionModel struct {
PatchEmbedding *PatchEmbedding
Layers []VisionEncoderLayer `gguf:"blk"`
PatchMerger *VisionPatchMerger `gguf:"merger"`
*VisionModelOptions
}
VisionModel implements the Qwen vision model
func (*VisionModel) PositionalEmbedding ¶
PositionalEmbedding generates rotary position embeddings for attention mechanisms
func (*VisionModel) WindowIndex ¶
WindowIndex divides the grid into windows and returns:
- A tensor containing flattened indices of all grid points organized by windows
- A slice of boundaries that mark where each window's data begins and ends in the flattened representation, scaled by spatialMergeSize squared
The boundaries slice always starts with 0 and contains cumulative ending positions for each window, allowing downstream processing to identify window boundaries in the tensor data.
type VisionModelOptions ¶
type VisionModelOptions struct {
// contains filtered or unexported fields
}
VisionModelOptions contains configuration options
type VisionPatchMerger ¶
type VisionPatchMerger struct {
LNQ *nn.RMSNorm `gguf:"ln_q"`
MLP0 *nn.Linear `gguf:"mlp.0"`
MLP2 *nn.Linear `gguf:"mlp.2"`
}
VisionPatchMerger implements patch merging for the Qwen vision model
func (*VisionPatchMerger) Forward ¶
func (pm *VisionPatchMerger) Forward(ctx ml.Context, visionOutputs ml.Tensor, opts *VisionModelOptions) ml.Tensor
Forward computes patch merging for the vision model
type VisionSelfAttention ¶
type VisionSelfAttention struct {
Query *nn.Linear `gguf:"attn_q"`
Key *nn.Linear `gguf:"attn_k"`
Value *nn.Linear `gguf:"attn_v"`
Output *nn.Linear `gguf:"attn_out"`
}
func (*VisionSelfAttention) Forward ¶
func (sa *VisionSelfAttention) Forward(ctx ml.Context, hiddenStates, cos, sin, mask ml.Tensor, opts *VisionModelOptions) ml.Tensor