mllama

package
v0.0.0-...-e80cf58 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 12, 2025 License: MIT Imports: 19 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func New

func New(c ml.Config) (model.Model, error)

func Preprocess

func Preprocess(imageData io.Reader) ([]float32, map[string]any, error)

Types

type ImageProcessor

type ImageProcessor struct {
	// contains filtered or unexported fields
}

func (ImageProcessor) ProcessImage

func (p ImageProcessor) ProcessImage(img image.Image) ([]float32, int, error)

type Model

type Model struct {
	model.Base
	model.BytePairEncoding

	*VisionModel `gguf:"v,vision"`
	*TextModel

	Projector *nn.Linear `gguf:"mm.0"`

	ImageProcessor
}

func (*Model) EncodeMultimodal

func (m *Model) EncodeMultimodal(ctx ml.Context, multimodalData []byte) (any, error)

func (*Model) Forward

func (m *Model) Forward(ctx ml.Context, opts input.Options) (ml.Tensor, error)

func (*Model) PostTokenize

func (m *Model) PostTokenize(ctx ml.Context, inputs []input.Input) ([]input.Input, error)

type PrecomputedAspectRatioEmbedding

type PrecomputedAspectRatioEmbedding struct {
	Embedding *nn.Embedding
	Gate      ml.Tensor `gguf:"gate"`
}

func (*PrecomputedAspectRatioEmbedding) Forward

func (e *PrecomputedAspectRatioEmbedding) Forward(ctx ml.Context, hiddenState ml.Tensor, aspectRatioIDs ml.Tensor, opts *VisionModelOptions) ml.Tensor

type PrecomputedPositionEmbedding

type PrecomputedPositionEmbedding struct {
	PositionEmbedding     *nn.Embedding `gguf:"position_embd"`
	PositionEmbeddingGate ml.Tensor     `gguf:"position_embd.gate"`

	TilePositionEmbedding     *nn.Embedding `gguf:"tile_position_embd"`
	TilePositionEmbeddingGate ml.Tensor     `gguf:"tile_position_embd.gate"`
}

func (*PrecomputedPositionEmbedding) Forward

func (e *PrecomputedPositionEmbedding) Forward(ctx ml.Context, hiddenState, positionIDs, aspectRatioIDs ml.Tensor, numPositions int, opts *VisionModelOptions) ml.Tensor

type TextCrossAttention

type TextCrossAttention struct {
	QueryNorm *nn.RMSNorm `gguf:"cross_attn_q_norm"`
	Query     *nn.Linear  `gguf:"cross_attn_q_proj"`
	KeyNorm   *nn.RMSNorm `gguf:"cross_attn_k_norm"`
	Key       *nn.Linear  `gguf:"cross_attn_k_proj"`
	Value     *nn.Linear  `gguf:"cross_attn_v_proj"`
	Output    *nn.Linear  `gguf:"cross_attn_o_proj"`
}

func (*TextCrossAttention) Forward

func (ca *TextCrossAttention) Forward(ctx ml.Context, hiddenState, crossAttentionStates ml.Tensor, cache *kvcache.WrapperCache, opts *TextModelOptions) ml.Tensor

type TextCrossAttentionDecoderLayer

type TextCrossAttentionDecoderLayer struct {
	AttentionNorm  *nn.RMSNorm `gguf:"attn_norm"`
	CrossAttention *TextCrossAttention
	AttentionGate  ml.Tensor `gguf:"cross_attn_attn_gate"`

	MLPNorm *nn.RMSNorm `gguf:"ffn_norm"`
	MLP     *TextMLP
	MLPGate ml.Tensor `gguf:"cross_attn_mlp_gate"`
}

func (*TextCrossAttentionDecoderLayer) Forward

func (d *TextCrossAttentionDecoderLayer) Forward(ctx ml.Context, hiddenState, _, _, _, crossAttentionStates, crossAttentionMask ml.Tensor, cache *kvcache.WrapperCache, opts *TextModelOptions) ml.Tensor

type TextDecoder

type TextDecoder struct {
	Layers []TextDecoderLayer
}

func (*TextDecoder) Forward

func (d *TextDecoder) Forward(ctx ml.Context, hiddenState, positionIDs, outputs, mask, crossAttentionStates, crossAttentionMask ml.Tensor, cache *kvcache.WrapperCache, opts *TextModelOptions) ml.Tensor

type TextDecoderLayer

type TextDecoderLayer interface {
	Forward(ctx ml.Context, hiddenState, positionIDs, outputs, mask, crossAttentionStates, crossAttentionMask ml.Tensor, cache *kvcache.WrapperCache, opts *TextModelOptions) ml.Tensor
}

type TextMLP

type TextMLP struct {
	Up   *nn.Linear `gguf:"ffn_up"`
	Down *nn.Linear `gguf:"ffn_down"`
	Gate *nn.Linear `gguf:"ffn_gate"`
}

func (*TextMLP) Forward

func (mlp *TextMLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *TextModelOptions) ml.Tensor

type TextModel

type TextModel struct {
	TokenEmbedding *nn.Embedding `gguf:"token_embd"`
	Transformer    *TextDecoder  `gguf:"blk"`
	OutputNorm     *nn.RMSNorm   `gguf:"output_norm"`
	Output         *nn.Linear    `gguf:"output"`

	*TextModelOptions
}

func (*TextModel) Forward

func (m *TextModel) Forward(ctx ml.Context, inputIDs, positionIDs, outputs, mask, crossAttentionStates, crossAttentionMask ml.Tensor, cache *kvcache.WrapperCache) ml.Tensor

func (*TextModel) Shift

func (m *TextModel) Shift(ctx ml.Context, layer int, key, shift ml.Tensor) (ml.Tensor, error)

type TextModelOptions

type TextModelOptions struct {
	// contains filtered or unexported fields
}

type TextSelfAttention

type TextSelfAttention struct {
	Query       *nn.Linear `gguf:"attn_q"`
	Key         *nn.Linear `gguf:"attn_k"`
	Value       *nn.Linear `gguf:"attn_v"`
	Output      *nn.Linear `gguf:"attn_output"`
	RopeFactors ml.Tensor  `gguf:"rope_freqs.weight"`
}

func (*TextSelfAttention) Forward

func (sa *TextSelfAttention) Forward(ctx ml.Context, hiddenState, positions, _ ml.Tensor, cache *kvcache.WrapperCache, opts *TextModelOptions) ml.Tensor

type TextSelfAttentionDecoderLayer

type TextSelfAttentionDecoderLayer struct {
	AttentionNorm *nn.RMSNorm `gguf:"attn_norm"`
	SelfAttention *TextSelfAttention

	MLPNorm *nn.RMSNorm `gguf:"ffn_norm"`
	MLP     *TextMLP
}

func (*TextSelfAttentionDecoderLayer) Forward

func (d *TextSelfAttentionDecoderLayer) Forward(ctx ml.Context, hiddenState, positions, outputs, mask, _, _ ml.Tensor, cache *kvcache.WrapperCache, opts *TextModelOptions) ml.Tensor

type VisionEncoder

type VisionEncoder struct {
	Layers []VisionEncoderLayer
}

func (*VisionEncoder) Forward

func (e *VisionEncoder) Forward(ctx ml.Context, hiddenState ml.Tensor, intermediateLayersIndices []uint32, opts *VisionModelOptions) (ml.Tensor, []ml.Tensor)

type VisionEncoderLayer

type VisionEncoderLayer struct {
	AttentionNorm *nn.LayerNorm `gguf:"ln1"`
	SelfAttention *VisionSelfAttention

	MLPNorm *nn.LayerNorm `gguf:"ln2"`
	MLP     *VisionMLP
}

func (*VisionEncoderLayer) Forward

func (e *VisionEncoderLayer) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *VisionModelOptions) ml.Tensor

type VisionMLP

type VisionMLP struct {
	Down *nn.Linear `gguf:"ffn_down"`
	Up   *nn.Linear `gguf:"ffn_up"`

	Gate ml.Tensor `gguf:"ffn_gate"`
}

func (*VisionMLP) Forward

func (mlp *VisionMLP) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *VisionModelOptions) ml.Tensor

type VisionModel

type VisionModel struct {
	PatchEmbeddings *nn.Conv2D `gguf:"patch_embd"`

	PreTilePositionEmbedding  *PrecomputedAspectRatioEmbedding `gguf:"pre_tile_position_embd"`
	PostTilePositionEmbedding *PrecomputedAspectRatioEmbedding `gguf:"post_tile_position_embd"`
	PositionEmbedding         *PrecomputedPositionEmbedding

	PreLayerNorm   *nn.LayerNorm `gguf:"pre_ln"`
	PostLayerNorm  *nn.LayerNorm `gguf:"post_ln"`
	ClassEmbedding ml.Tensor     `gguf:"class_embd"`

	Transformer       *VisionEncoder `gguf:"blk"`
	GlobalTransformer *VisionEncoder `gguf:"global.blk"`

	*VisionModelOptions
}

func (*VisionModel) Forward

func (m *VisionModel) Forward(ctx ml.Context, pixelValues, positionIDs, aspectRatioIDs ml.Tensor) ml.Tensor

type VisionModelOptions

type VisionModelOptions struct {
	// contains filtered or unexported fields
}

type VisionSelfAttention

type VisionSelfAttention struct {
	Query  *nn.Linear `gguf:"attn_q"`
	Key    *nn.Linear `gguf:"attn_k"`
	Value  *nn.Linear `gguf:"attn_v"`
	Output *nn.Linear `gguf:"attn_out"`

	Gate ml.Tensor `gguf:"attn_gate"`
}

func (*VisionSelfAttention) Forward

func (sa *VisionSelfAttention) Forward(ctx ml.Context, hiddenState ml.Tensor, opts *VisionModelOptions) ml.Tensor

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL