Documentation
¶
Overview ¶
Package libraries provides CUDA runtime library bindings for GoCUDA This implements AmgX functionality for algebraic multigrid solvers
Package libraries provides CUDA runtime library bindings for GoCUDA This implements CUDA Math API functionality for advanced mathematical functions
Package libraries provides CUDA runtime library bindings for GoCUDA This implements cuDNN functionality for Deep Neural Networks
Package libraries provides CUDA runtime library bindings for GoCUDA This implements cuDSS functionality for direct sparse solver operations
Package libraries provides CUDA runtime library bindings for GoCUDA This implements cuFFT functionality for Fast Fourier Transform operations
Package libraries provides CUDA runtime library bindings for GoCUDA This implements cuRAND functionality for random number generation
Package libraries provides CUDA runtime library bindings for GoCUDA This implements cuSOLVER functionality for linear algebra solvers
Package libraries provides CUDA runtime library bindings for GoCUDA This implements cuSPARSE functionality for sparse matrix operations
Package libraries provides CUDA runtime library bindings for GoCUDA This implements cuTENSOR functionality for tensor operations and contractions
Package libraries provides CUDA runtime library bindings for GoCUDA This implements CUTLASS functionality for high-performance CUDA C++ template library
Package libraries provides missing CUDA library functionality This creates the main export interface for all CUDA runtime libraries
Package libraries provides CUDA runtime library bindings for GoCUDA This implements nvJPEG functionality for JPEG encoding/decoding
Package libraries provides CUDA runtime library bindings for GoCUDA This implements nvJPEG2000 functionality for JPEG2000 encoding/decoding
Package libraries provides CUDA runtime library bindings for GoCUDA This implements Thrust-style algorithms for GPU computing
Index ¶
- Variables
- func ApplyActivation(input, output *memory.Memory, dims []int, activationType DNNActivationMode) error
- func ComputeElementwise(op MathOperation, a, b, output *memory.Memory, size int) error
- func ComputeUnary(op MathOperation, a, output *memory.Memory, size int) error
- func ConvolutionForward(input, filter, output *memory.Memory, inputDims, filterDims, outputDims []int, ...) error
- func CutlassRank2k(A, B, C *memory.Memory, N, K int, alpha, beta float32) error
- func CutlassSpmm(sparseA, denseB, denseC *memory.Memory, M, N, K int, sparsity float32) error
- func CutlassTrmm(A, B *memory.Memory, M, N int, side, uplo, trans, diag string, alpha float32) error
- func DecodeJpeg2000Image(j2kData []byte) (*memory.Memory, int, int, error)
- func DecodeJpeg2000Quick(j2kData []byte, outputFormat Jpeg2000Format) (*memory.Memory, int, int, error)
- func DecodeJpegImage(jpegData []byte) (*memory.Memory, int, int, error)
- func DecodeJpegQuick(jpegData []byte, outputFormat JpegFormat) (*memory.Memory, int, int, error)
- func EncodeJpeg2000Image(imageData *memory.Memory, width, height int, compressionRatio float32) ([]byte, error)
- func EncodeJpeg2000Lossless(imageData *memory.Memory, width, height int, inputFormat Jpeg2000Format) ([]byte, error)
- func EncodeJpeg2000Quick(imageData *memory.Memory, width, height int, inputFormat Jpeg2000Format, ...) ([]byte, error)
- func EncodeJpegImage(imageData *memory.Memory, width, height int, quality int) ([]byte, error)
- func EncodeJpegQuick(imageData *memory.Memory, width, height int, inputFormat JpegFormat, ...) ([]byte, error)
- func FFT1D(input, output *memory.Memory, size int, forward bool) error
- func FastMathOperations(op MathOperation, a, b, output *memory.Memory, size int) error
- func GemmOperation(A, B, C *memory.Memory, M, N, K int, alpha, beta float32) error
- func GetCutlassVersion() string
- func GetFFTSize(size int) int
- func GetJpegImageInfo(jpegData []byte) (width, height, channels int, err error)
- func HighPrecisionMath(op MathOperation, a, b, output *memory.Memory, size int) error
- func MatrixMultiply(alpha float64, matA *memory.Memory, rowsA, colsA int, matB *memory.Memory, ...) error
- func RandomNumbers(size int, rngType RngType) ([]float32, error)
- func ReduceArray(data *memory.Memory, n int) (float32, error)
- func SimpleContraction(alpha float64, tensorA *memory.Memory, dimA []int, tensorB *memory.Memory, ...) error
- func SolveAmgX(A, x, b *memory.Memory, n int, nnz int) error
- func SolveSparseSystem(A, x, b *memory.Memory, n int, nnz int) error
- func SolveSystem(A, b *memory.Memory, n int) (*memory.Memory, error)
- func SortArray(data *memory.Memory, n int) error
- func TensorContract(alpha float64, tensorA *memory.Memory, dimA []int, tensorB *memory.Memory, ...) error
- func TensorElementwiseOp(operation TensorOperation, alpha float64, tensorA *memory.Memory, dimA []int, ...) error
- func TensorMatMul(alpha float64, matA *memory.Memory, rowsA, colsA int, matB *memory.Memory, ...) error
- func VectorMath(operation MathOperation, a, b, output *memory.Memory, size int) error
- type ActivationDescriptor
- type AmgXCoarsening
- type AmgXConfig
- type AmgXCycle
- type AmgXHandle
- func (handle *AmgXHandle) Destroy() error
- func (handle *AmgXHandle) GetGridComplexity() (float64, error)
- func (handle *AmgXHandle) GetOperatorComplexity() (float64, error)
- func (handle *AmgXHandle) PrintInfo() error
- func (handle *AmgXHandle) Setup(matrix *AmgXMatrix) error
- func (handle *AmgXHandle) Solve(b, x *AmgXVector) (*AmgXSolveInfo, error)
- func (handle *AmgXHandle) SolveMultiple(B, X []*AmgXVector) ([]*AmgXSolveInfo, error)
- func (handle *AmgXHandle) UpdateMatrix(matrix *AmgXMatrix, keepStructure bool) error
- type AmgXInterpolation
- type AmgXMatrix
- type AmgXMode
- type AmgXPrecision
- type AmgXSmoother
- type AmgXSolveInfo
- type AmgXSolver
- type AmgXVector
- type BatchNormDescriptor
- type Complex128
- type Complex64
- type ContractionAlgorithm
- type ContractionDescriptor
- type ConvolutionDescriptor
- func (desc *ConvolutionDescriptor) DestroyConvolutionDescriptor() error
- func (desc *ConvolutionDescriptor) GetConvolution2dForwardOutputDim(inputDesc *TensorDescriptor, filterDesc *FilterDescriptor) (n, c, h, w int, err error)
- func (desc *ConvolutionDescriptor) SetConvolution2dDescriptor(padH, padW, strideH, strideW, dilationH, dilationW int, ...) error
- type CuTensorDescriptor
- type CuTensorHandle
- func (handle *CuTensorHandle) BatchedTensorContraction(batchCount int, alpha float64, tensorA []*memory.Memory, ...) error
- func (handle *CuTensorHandle) CreateContractionPlan(descA *CuTensorDescriptor, modesA []int, descB *CuTensorDescriptor, ...) (*TensorPlan, error)
- func (handle *CuTensorHandle) Destroy() error
- func (handle *CuTensorHandle) ExecuteContractionPlan(plan *TensorPlan, alpha float64, tensorA *memory.Memory, ...) error
- func (handle *CuTensorHandle) TensorContraction(alpha float64, tensorA *memory.Memory, descA *CuTensorDescriptor, modesA []int, ...) error
- func (handle *CuTensorHandle) TensorCopy(alpha float64, tensorA *memory.Memory, descA *CuTensorDescriptor, ...) error
- func (handle *CuTensorHandle) TensorElementwiseAdd(alpha float64, tensorA *memory.Memory, descA *CuTensorDescriptor, beta float64, ...) error
- func (handle *CuTensorHandle) TensorElementwiseMul(alpha float64, tensorA *memory.Memory, descA *CuTensorDescriptor, beta float64, ...) error
- func (handle *CuTensorHandle) TensorPermute(alpha float64, tensorA *memory.Memory, descA *CuTensorDescriptor, ...) error
- func (handle *CuTensorHandle) TensorReduce(alpha float64, tensorA *memory.Memory, descA *CuTensorDescriptor, beta float64, ...) error
- type CutlassConvDesc
- type CutlassConvHandle
- type CutlassConvMode
- type CutlassDataType
- type CutlassEpilogueOp
- type CutlassGemmAlgorithm
- type CutlassGemmDesc
- type CutlassGemmHandle
- type CutlassLayout
- type CutlassOperation
- type DNNActivationMode
- type DNNBatchNormMode
- type DNNConvolutionMode
- type DNNDataType
- type DNNHandle
- func (h *DNNHandle) ActivationForward(activationDesc *ActivationDescriptor, alpha float32, ...) error
- func (h *DNNHandle) BatchNormalizationForwardInference(mode DNNBatchNormMode, alpha, beta float32, inputDesc *TensorDescriptor, ...) error
- func (h *DNNHandle) ConvolutionForward(alpha float32, inputDesc *TensorDescriptor, inputData *memory.Memory, ...) error
- func (h *DNNHandle) DestroyHandle() error
- func (h *DNNHandle) PoolingForward(poolingDesc *PoolingDescriptor, alpha float32, inputDesc *TensorDescriptor, ...) error
- type DNNNanPropagation
- type DNNPoolingMode
- type DNNTensorFormat
- type DSSConfig
- type DSSFactorization
- type DSSHandle
- func (handle *DSSHandle) Analyze(matrix *DSSMatrix) error
- func (handle *DSSHandle) Destroy() error
- func (handle *DSSHandle) Factor(matrix *DSSMatrix) error
- func (handle *DSSHandle) GetDeterminant() (float64, error)
- func (handle *DSSHandle) GetInertia() ([3]int, error)
- func (handle *DSSHandle) Refactor(matrix *DSSMatrix) error
- func (handle *DSSHandle) Solve(b, x *memory.Memory, nrhs int) (*DSSSolutionInfo, error)
- func (handle *DSSHandle) SolveMultiple(B, X *memory.Memory, nrhs int) ([]*DSSSolutionInfo, error)
- type DSSMatrix
- type DSSMatrixFormat
- type DSSOrdering
- type DSSPivotType
- type DSSRefinement
- type DSSSolutionInfo
- type ExecutionPolicy
- type FFTContext
- func (ctx *FFTContext) CreatePlan1D(nx int, fftType FFTType, batch int) (*FFTPlan, error)
- func (ctx *FFTContext) CreatePlan2D(nx, ny int, fftType FFTType) (*FFTPlan, error)
- func (ctx *FFTContext) CreatePlan3D(nx, ny, nz int, fftType FFTType) (*FFTPlan, error)
- func (ctx *FFTContext) DestroyContext() error
- func (ctx *FFTContext) EstimateMemory(plan *FFTPlan) (inputBytes, outputBytes int64)
- func (ctx *FFTContext) ExecC2C(plan *FFTPlan, input, output *memory.Memory, direction FFTDirection) error
- func (ctx *FFTContext) ExecC2R(plan *FFTPlan, input, output *memory.Memory) error
- func (ctx *FFTContext) ExecR2C(plan *FFTPlan, input, output *memory.Memory) error
- type FFTDirection
- type FFTPlan
- type FFTType
- type FilterDescriptor
- func (desc *FilterDescriptor) DestroyFilterDescriptor() error
- func (desc *FilterDescriptor) SetFilter4dDescriptor(dataType DNNDataType, format DNNTensorFormat, k, c, h, w int) error
- func (desc *FilterDescriptor) SetFilterNdDescriptor(dataType DNNDataType, format DNNTensorFormat, dimensions []int) error
- type Jpeg2000Codec
- type Jpeg2000DecodeParams
- type Jpeg2000DecoderState
- func (decoder *Jpeg2000DecoderState) DecodeJpeg2000(j2kData []byte, params Jpeg2000DecodeParams) (*memory.Memory, int, int, error)
- func (decoder *Jpeg2000DecoderState) DecodeJpeg2000Batch(j2kDataList [][]byte, params Jpeg2000DecodeParams) ([]*memory.Memory, []int, []int, error)
- func (decoder *Jpeg2000DecoderState) Destroy() error
- type Jpeg2000EncoderState
- func (encoder *Jpeg2000EncoderState) Destroy() error
- func (encoder *Jpeg2000EncoderState) EncodeJpeg2000(imageData *memory.Memory, width, height int, params Jpeg2000EnodeParams) ([]byte, error)
- func (encoder *Jpeg2000EncoderState) SetCompressionRatio(ratio float32) error
- func (encoder *Jpeg2000EncoderState) SetNumLayers(layers int) error
- func (encoder *Jpeg2000EncoderState) SetNumLevels(levels int) error
- type Jpeg2000EnodeParams
- type Jpeg2000Format
- type Jpeg2000ImageInfo
- type Jpeg2000ProgressionOrder
- type JpegBackend
- type JpegDecodeParams
- type JpegDecoderState
- func (decoder *JpegDecoderState) DecodeJpeg(jpegData []byte, params JpegDecodeParams) (*memory.Memory, int, int, error)
- func (decoder *JpegDecoderState) DecodeJpegBatch(jpegDataList [][]byte, params JpegDecodeParams) ([]*memory.Memory, []int, []int, error)
- func (decoder *JpegDecoderState) Destroy() error
- type JpegEncodeParams
- type JpegEncoderState
- type JpegFormat
- type LUInfo
- type MathConfig
- type MathContext
- func (ctx *MathContext) BatchVectorOps(ops []MathVectorOp) error
- func (ctx *MathContext) Destroy() error
- func (ctx *MathContext) VectorAdd(a, b, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorBesselJ0(a, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorComplexAbs(a, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorComplexArg(a, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorCos(a, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorErf(a, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorExp(a, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorFMA(a, b, c, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorGamma(a, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorLog(a, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorMax(a *memory.Memory, size int) (float64, int, error)
- func (ctx *MathContext) VectorMul(a, b, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorNorm(a *memory.Memory, size int) (float64, error)
- func (ctx *MathContext) VectorPow(a, b, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorRsqrt(a, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorSin(a, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorSinCos(a, sin_out, cos_out *memory.Memory, size int) error
- func (ctx *MathContext) VectorSqrt(a, output *memory.Memory, size int) error
- func (ctx *MathContext) VectorSum(a *memory.Memory, size int) (float64, error)
- func (ctx *MathContext) VectorTan(a, output *memory.Memory, size int) error
- type MathDataType
- type MathOperation
- type MathPrecision
- type MathVectorOp
- type MatrixFormat
- type PoolingDescriptor
- type QRInfo
- type RandomGenerator
- func (rg *RandomGenerator) Destroy() error
- func (rg *RandomGenerator) GenerateLogNormal(output *memory.Memory, n int, mean, stddev float32) error
- func (rg *RandomGenerator) GenerateNormal(output *memory.Memory, n int, mean, stddev float32) error
- func (rg *RandomGenerator) GeneratePoisson(output *memory.Memory, n int, lambda float32) error
- func (rg *RandomGenerator) GenerateUniform(output *memory.Memory, n int) error
- func (rg *RandomGenerator) SetSeed(seed uint64)
- type RngType
- type SVDInfo
- type SolverContext
- func (ctx *SolverContext) CholeskyFactorization(A *memory.Memory, n int) error
- func (ctx *SolverContext) DestroyContext() error
- func (ctx *SolverContext) Eigenvalues(A *memory.Memory, n int, computeVectors bool) (*memory.Memory, *memory.Memory, error)
- func (ctx *SolverContext) LUFactorization(A *memory.Memory, m, n int) (*LUInfo, error)
- func (ctx *SolverContext) PseudoInverse(A *memory.Memory, m, n int) (*memory.Memory, error)
- func (ctx *SolverContext) QRFactorization(A *memory.Memory, m, n int) (*QRInfo, error)
- func (ctx *SolverContext) SVDDecomposition(A *memory.Memory, m, n int, computeUV bool) (*SVDInfo, error)
- func (ctx *SolverContext) SolveLinearSystem(A *memory.Memory, b *memory.Memory, n int) (*memory.Memory, error)
- type SparseContext
- func (ctx *SparseContext) CreateSparseMatrix(rows, cols, nnz int, format MatrixFormat) (*SparseMatrix, error)
- func (ctx *SparseContext) DenseToSparse(dense *memory.Memory, rows, cols int, format MatrixFormat) (*SparseMatrix, error)
- func (ctx *SparseContext) DestroyContext() error
- func (ctx *SparseContext) SpGEMM(A, B *SparseMatrix) (*SparseMatrix, error)
- func (ctx *SparseContext) SpLU(A *SparseMatrix) (*SparseMatrix, *SparseMatrix, error)
- func (ctx *SparseContext) SpMM(alpha float32, A *SparseMatrix, B *SparseMatrix, beta float32, C *SparseMatrix) error
- func (ctx *SparseContext) SpMV(alpha float32, A *SparseMatrix, x *memory.Memory, beta float32, ...) error
- func (ctx *SparseContext) SpSV(A *SparseMatrix, b, x *memory.Memory) error
- func (ctx *SparseContext) SparseToDense(sparse *SparseMatrix) (*memory.Memory, error)
- type SparseMatrix
- type TensorDataType
- type TensorDescriptor
- type TensorLayout
- type TensorMathMode
- type TensorOperation
- type TensorPlan
- type TensorReduction
- type ThrustContext
- func (ctx *ThrustContext) Copy(src, dst *memory.Memory, n int, policy ExecutionPolicy) error
- func (ctx *ThrustContext) CopyIf(src, dst *memory.Memory, n int, predicate string, policy ExecutionPolicy) (int, error)
- func (ctx *ThrustContext) Count(data *memory.Memory, n int, value float32, policy ExecutionPolicy) (int, error)
- func (ctx *ThrustContext) DestroyContext() error
- func (ctx *ThrustContext) ExclusiveScan(input, output *memory.Memory, n int, initValue float32, policy ExecutionPolicy) error
- func (ctx *ThrustContext) Fill(data *memory.Memory, n int, value float32, policy ExecutionPolicy) error
- func (ctx *ThrustContext) Find(data *memory.Memory, n int, value float32, policy ExecutionPolicy) (int, error)
- func (ctx *ThrustContext) Generate(data *memory.Memory, n int, generator string, policy ExecutionPolicy) error
- func (ctx *ThrustContext) MaxElement(data *memory.Memory, n int, policy ExecutionPolicy) (float32, int, error)
- func (ctx *ThrustContext) Merge(input1, input2, output *memory.Memory, n1, n2 int, policy ExecutionPolicy) error
- func (ctx *ThrustContext) MinElement(data *memory.Memory, n int, policy ExecutionPolicy) (float32, int, error)
- func (ctx *ThrustContext) Partition(data *memory.Memory, n int, predicate string, policy ExecutionPolicy) (int, error)
- func (ctx *ThrustContext) Reduce(data *memory.Memory, n int, initValue float32, policy ExecutionPolicy) (float32, error)
- func (ctx *ThrustContext) Scan(input, output *memory.Memory, n int, policy ExecutionPolicy) error
- func (ctx *ThrustContext) SetIntersection(input1, input2, output *memory.Memory, n1, n2 int, policy ExecutionPolicy) (int, error)
- func (ctx *ThrustContext) SetUnion(input1, input2, output *memory.Memory, n1, n2 int, policy ExecutionPolicy) (int, error)
- func (ctx *ThrustContext) Sort(data *memory.Memory, n int, policy ExecutionPolicy) error
- func (ctx *ThrustContext) SortByKey(keys, values *memory.Memory, n int, policy ExecutionPolicy) error
- func (ctx *ThrustContext) Transform(input, output *memory.Memory, n int, operation string, policy ExecutionPolicy) error
- func (ctx *ThrustContext) TransformBinary(input1, input2, output *memory.Memory, n int, operation string, ...) error
- func (ctx *ThrustContext) Unique(data *memory.Memory, n int, policy ExecutionPolicy) (int, error)
Constants ¶
This section is empty.
Variables ¶
var ( // cuRAND CreateRNG = CreateRandomGenerator // cuSPARSE CreateSparseCtx = CreateSparseContext // cuSOLVER CreateSolverCtx = CreateSolverContext // Thrust CreateThrustCtx = CreateThrustContext // cuFFT CreateFFTCtx = CreateFFTContext // cuDNN CreateDNNCtx = CreateDNNHandle // nvJPEG CreateJpegDec = CreateJpegDecoder CreateJpegEnc = CreateJpegEncoder // nvJPEG2000 CreateJpeg2000Dec = CreateJpeg2000Decoder CreateJpeg2000Enc = CreateJpeg2000Encoder // CUTLASS CreateCutlassGemmCtx = CreateCutlassGemm CreateCutlassConvCtx = CreateCutlassConv // cuDSS CreateDssCtx = CreateDSSHandle // AmgX CreateAmgxCtx = CreateAmgXHandle // CUDA Math API CreateMathCtx = CreateMathContext // cuTENSOR CreateTensorCtx = CreateCuTensorHandle )
Library initialization functions
Functions ¶
func ApplyActivation ¶
func ApplyActivation(input, output *memory.Memory, dims []int, activationType DNNActivationMode) error
ApplyActivation provides simplified activation function application
func ComputeElementwise ¶
func ComputeElementwise(op MathOperation, a, b, output *memory.Memory, size int) error
ComputeElementwise performs elementwise operations on vectors
func ComputeUnary ¶
func ComputeUnary(op MathOperation, a, output *memory.Memory, size int) error
ComputeUnary performs unary operations on vectors
func ConvolutionForward ¶
func ConvolutionForward(input, filter, output *memory.Memory, inputDims, filterDims, outputDims []int, padH, padW, strideH, strideW int) error
ConvolutionForward provides simplified convolution operation
func CutlassRank2k ¶
CutlassRank2k performs rank-2k update: C = alpha*A*B^T + alpha*B*A^T + beta*C
func CutlassSpmm ¶
CutlassSpmm performs Sparse Matrix-Dense Matrix Multiplication
func CutlassTrmm ¶
func CutlassTrmm(A, B *memory.Memory, M, N int, side, uplo, trans, diag string, alpha float32) error
CutlassTrmm performs Triangular Matrix Multiplication
func DecodeJpeg2000Image ¶
DecodeJpeg2000Image provides a simple interface for JPEG2000 decoding
func DecodeJpeg2000Quick ¶
func DecodeJpeg2000Quick(j2kData []byte, outputFormat Jpeg2000Format) (*memory.Memory, int, int, error)
DecodeJpeg2000Quick provides a simple interface for JPEG2000 decoding
func DecodeJpegImage ¶
DecodeJpegImage provides a simple interface for JPEG decoding
func DecodeJpegQuick ¶
DecodeJpegQuick provides a simple interface for JPEG decoding
func EncodeJpeg2000Image ¶
func EncodeJpeg2000Image(imageData *memory.Memory, width, height int, compressionRatio float32) ([]byte, error)
EncodeJpeg2000Image provides a simple interface for JPEG2000 encoding
func EncodeJpeg2000Lossless ¶
func EncodeJpeg2000Lossless(imageData *memory.Memory, width, height int, inputFormat Jpeg2000Format) ([]byte, error)
EncodeJpeg2000Lossless provides lossless JPEG2000 encoding
func EncodeJpeg2000Quick ¶
func EncodeJpeg2000Quick(imageData *memory.Memory, width, height int, inputFormat Jpeg2000Format, compressionRatio float32) ([]byte, error)
EncodeJpeg2000Quick provides a simple interface for JPEG2000 encoding
func EncodeJpegImage ¶
EncodeJpegImage provides a simple interface for JPEG encoding
func EncodeJpegQuick ¶
func EncodeJpegQuick(imageData *memory.Memory, width, height int, inputFormat JpegFormat, quality int) ([]byte, error)
EncodeJpegQuick provides a simple interface for JPEG encoding
func FastMathOperations ¶
func FastMathOperations(op MathOperation, a, b, output *memory.Memory, size int) error
FastMathOperations provides optimized math operations with reduced precision
func GemmOperation ¶
GemmOperation performs a simple GEMM operation using CUTLASS
func GetCutlassVersion ¶
func GetCutlassVersion() string
GetCutlassVersion returns the simulated CUTLASS version
func GetFFTSize ¶
GetFFTSize returns the optimal FFT size for a given input size
func GetJpegImageInfo ¶
GetImageInfo extracts basic information from JPEG data without full decoding
func HighPrecisionMath ¶
func HighPrecisionMath(op MathOperation, a, b, output *memory.Memory, size int) error
HighPrecisionMath provides IEEE 754 compliant high-precision operations
func MatrixMultiply ¶
func MatrixMultiply( alpha float64, matA *memory.Memory, rowsA, colsA int, matB *memory.Memory, rowsB, colsB int, beta float64, matC *memory.Memory) error
MatrixMultiply performs matrix multiplication using tensor contraction
func RandomNumbers ¶
RandomNumbers provides a simplified interface for random number generation
func ReduceArray ¶
ReduceArray provides simplified parallel reduction
func SimpleContraction ¶
func SimpleContraction( alpha float64, tensorA *memory.Memory, dimA []int, tensorB *memory.Memory, dimB []int, beta float64, tensorC *memory.Memory, dimC []int) error
SimpleContraction performs a simple tensor contraction with default settings
func SolveSparseSystem ¶
SolveSparseSystem demonstrates cuDSS usage for sparse linear systems
func SolveSystem ¶
SolveSystem provides simplified linear system solving
func TensorContract ¶
func TensorContract( alpha float64, tensorA *memory.Memory, dimA []int, tensorB *memory.Memory, dimB []int, beta float64, tensorC *memory.Memory, dimC []int) error
TensorContract performs tensor contraction operations
func TensorElementwiseOp ¶
func TensorElementwiseOp( operation TensorOperation, alpha float64, tensorA *memory.Memory, dimA []int, beta float64, tensorB *memory.Memory, dimB []int, tensorC *memory.Memory) error
TensorElementwiseOp performs element-wise operations on tensors
func TensorMatMul ¶
func TensorMatMul( alpha float64, matA *memory.Memory, rowsA, colsA int, matB *memory.Memory, rowsB, colsB int, beta float64, matC *memory.Memory) error
TensorMatMul performs matrix multiplication using tensor operations
func VectorMath ¶
func VectorMath(operation MathOperation, a, b, output *memory.Memory, size int) error
VectorMath performs element-wise mathematical operations with default settings
Types ¶
type ActivationDescriptor ¶
type ActivationDescriptor struct {
// contains filtered or unexported fields
}
ActivationDescriptor describes an activation function
func CreateActivationDescriptor ¶
func CreateActivationDescriptor() (*ActivationDescriptor, error)
CreateActivationDescriptor creates an activation descriptor
func (*ActivationDescriptor) DestroyActivationDescriptor ¶
func (desc *ActivationDescriptor) DestroyActivationDescriptor() error
DestroyActivationDescriptor destroys an activation descriptor
func (*ActivationDescriptor) SetActivationDescriptor ¶
func (desc *ActivationDescriptor) SetActivationDescriptor(mode DNNActivationMode, nanOpt DNNNanPropagation, coeff float64) error
SetActivationDescriptor sets the activation descriptor
type AmgXCoarsening ¶
type AmgXCoarsening int
AmgX coarsening algorithms
const ( AmgXCoarseningPMIS AmgXCoarsening = iota AmgXCoarseningRuge_Stueben AmgXCoarseningHMIS AmgXCoarseningFalgout AmgXCoarseningMultiPASS )
type AmgXConfig ¶
type AmgXConfig struct { Solver AmgXSolver Precision AmgXPrecision Mode AmgXMode MaxIterations int Tolerance float64 RelativeTolerance float64 Cycle AmgXCycle Coarsening AmgXCoarsening Interpolation AmgXInterpolation Smoother AmgXSmoother PreSmoothSteps int PostSmoothSteps int MaxLevels int CoarseGridSize int StrongThreshold float64 SmootherWeight float64 UseScaling bool Deterministic bool MonitorResidual bool PrintSolveStats bool }
AmgX configuration
type AmgXHandle ¶
type AmgXHandle struct {
// contains filtered or unexported fields
}
AmgX handle
func CreateAmgXHandle ¶
func CreateAmgXHandle(config AmgXConfig) (*AmgXHandle, error)
CreateAmgXHandle creates a new AmgX solver handle
func (*AmgXHandle) Destroy ¶
func (handle *AmgXHandle) Destroy() error
Destroy cleans up AmgX handle resources
func (*AmgXHandle) GetGridComplexity ¶
func (handle *AmgXHandle) GetGridComplexity() (float64, error)
GetGridComplexity returns the grid complexity of the AMG hierarchy
func (*AmgXHandle) GetOperatorComplexity ¶
func (handle *AmgXHandle) GetOperatorComplexity() (float64, error)
GetOperatorComplexity returns the operator complexity of the AMG hierarchy
func (*AmgXHandle) PrintInfo ¶
func (handle *AmgXHandle) PrintInfo() error
PrintInfo prints information about the AMG hierarchy
func (*AmgXHandle) Setup ¶
func (handle *AmgXHandle) Setup(matrix *AmgXMatrix) error
Setup performs the AMG setup phase (coarsening, interpolation, etc.)
func (*AmgXHandle) Solve ¶
func (handle *AmgXHandle) Solve(b, x *AmgXVector) (*AmgXSolveInfo, error)
Solve solves the linear system using AMG
func (*AmgXHandle) SolveMultiple ¶
func (handle *AmgXHandle) SolveMultiple(B, X []*AmgXVector) ([]*AmgXSolveInfo, error)
SolveMultiple solves multiple systems with the same matrix
func (*AmgXHandle) UpdateMatrix ¶
func (handle *AmgXHandle) UpdateMatrix(matrix *AmgXMatrix, keepStructure bool) error
UpdateMatrix updates the matrix values (keeping the same sparsity pattern)
type AmgXInterpolation ¶
type AmgXInterpolation int
AmgX interpolation methods
const ( AmgXInterpolationClassical AmgXInterpolation = iota AmgXInterpolationDirect AmgXInterpolationMultipass AmgXInterpolationExtended AmgXInterpolationModifiedClassical )
type AmgXMatrix ¶
type AmgXMatrix struct {
// contains filtered or unexported fields
}
AmgX matrix
func CreateAmgXMatrix ¶
func CreateAmgXMatrix(n, nnz int, rowPtr, colInd, values *memory.Memory, mode AmgXMode) (*AmgXMatrix, error)
CreateAmgXMatrix creates an AmgX matrix
func (*AmgXMatrix) Destroy ¶
func (matrix *AmgXMatrix) Destroy() error
Destroy cleans up AmgX matrix resources
type AmgXPrecision ¶
type AmgXPrecision int
AmgX precision modes
const ( AmgXPrecisionFloat AmgXPrecision = iota AmgXPrecisionDouble AmgXPrecisionComplexFloat AmgXPrecisionComplexDouble )
type AmgXSmoother ¶
type AmgXSmoother int
AmgX smoothers
const ( AmgXSmootherJacobi AmgXSmoother = iota AmgXSmootherGS AmgXSmootherSGS AmgXSmootherBlockJacobi AmgXSmootherCF_Jacobi AmgXSmootherL1_Jacobi AmgXSmootherChebyshev AmgXSmootherPolynomial )
type AmgXSolveInfo ¶
type AmgXSolveInfo struct { Iterations int RelativeResidual float64 AbsoluteResidual float64 ConvergenceReason string SolveTime float64 SetupTime float64 GridComplexity float64 OperatorComplexity float64 Levels int }
AmgX solve info
type AmgXSolver ¶
type AmgXSolver int
AmgX solver types
const ( AmgXSolverAMG AmgXSolver = iota AmgXSolverPCG AmgXSolverPBICGSTAB AmgXSolverGMRES AmgXSolverFGMRES AmgXSolverCG AmgXSolverBICGSTAB AmgXSolverIDR AmgXSolverKPF )
type AmgXVector ¶
type AmgXVector struct {
// contains filtered or unexported fields
}
AmgX vector
func CreateAmgXVector ¶
CreateAmgXVector creates an AmgX vector
func (*AmgXVector) Destroy ¶
func (vector *AmgXVector) Destroy() error
Destroy cleans up AmgX vector resources
type BatchNormDescriptor ¶
type BatchNormDescriptor struct {
// contains filtered or unexported fields
}
BatchNormDescriptor describes batch normalization
func CreateBatchNormDescriptor ¶
func CreateBatchNormDescriptor() (*BatchNormDescriptor, error)
CreateBatchNormDescriptor creates a batch normalization descriptor
func (*BatchNormDescriptor) DestroyBatchNormDescriptor ¶
func (desc *BatchNormDescriptor) DestroyBatchNormDescriptor() error
DestroyBatchNormDescriptor destroys a batch normalization descriptor
type Complex128 ¶
Complex128 represents a double-precision complex number
type ContractionAlgorithm ¶
type ContractionAlgorithm int
Contraction algorithms
const ( ContractionAlgoDefault ContractionAlgorithm = iota ContractionAlgoGEMM ContractionAlgoTensorCore ContractionAlgoOptimal ContractionAlgoFastest ContractionAlgoLowestMemory )
type ContractionDescriptor ¶
type ContractionDescriptor struct { TensorA *CuTensorDescriptor TensorB *CuTensorDescriptor TensorC *CuTensorDescriptor ModesA []int // Contraction modes for tensor A ModesB []int // Contraction modes for tensor B ModesC []int // Output modes for tensor C Alpha float64 Beta float64 Algorithm ContractionAlgorithm Workspace *memory.Memory }
Tensor contraction descriptor
type ConvolutionDescriptor ¶
type ConvolutionDescriptor struct {
// contains filtered or unexported fields
}
ConvolutionDescriptor describes a convolution operation
func CreateConvolutionDescriptor ¶
func CreateConvolutionDescriptor() (*ConvolutionDescriptor, error)
CreateConvolutionDescriptor creates a convolution descriptor
func (*ConvolutionDescriptor) DestroyConvolutionDescriptor ¶
func (desc *ConvolutionDescriptor) DestroyConvolutionDescriptor() error
DestroyConvolutionDescriptor destroys a convolution descriptor
func (*ConvolutionDescriptor) GetConvolution2dForwardOutputDim ¶
func (desc *ConvolutionDescriptor) GetConvolution2dForwardOutputDim(inputDesc *TensorDescriptor, filterDesc *FilterDescriptor) (n, c, h, w int, err error)
GetConvolution2dForwardOutputDim calculates output dimensions for convolution
func (*ConvolutionDescriptor) SetConvolution2dDescriptor ¶
func (desc *ConvolutionDescriptor) SetConvolution2dDescriptor(padH, padW, strideH, strideW, dilationH, dilationW int, mode DNNConvolutionMode, dataType DNNDataType) error
SetConvolution2dDescriptor sets the convolution descriptor for 2D convolutions
type CuTensorDescriptor ¶
type CuTensorDescriptor struct {
// contains filtered or unexported fields
}
Tensor descriptor
func CreateCuTensorDescriptor ¶
func CreateCuTensorDescriptor(dataType TensorDataType, dimensions []int, layout TensorLayout) (*CuTensorDescriptor, error)
CreateCuTensorDescriptor creates a tensor descriptor
type CuTensorHandle ¶
type CuTensorHandle struct {
// contains filtered or unexported fields
}
cuTENSOR handle
func CreateCuTensorHandle ¶
func CreateCuTensorHandle() (*CuTensorHandle, error)
CreateCuTensorHandle creates a new cuTENSOR handle
func (*CuTensorHandle) BatchedTensorContraction ¶
func (handle *CuTensorHandle) BatchedTensorContraction( batchCount int, alpha float64, tensorA []*memory.Memory, descA *CuTensorDescriptor, modesA []int, tensorB []*memory.Memory, descB *CuTensorDescriptor, modesB []int, beta float64, tensorC []*memory.Memory, descC *CuTensorDescriptor, modesC []int, algorithm ContractionAlgorithm) error
BatchedTensorContraction performs batched tensor contractions
func (*CuTensorHandle) CreateContractionPlan ¶
func (handle *CuTensorHandle) CreateContractionPlan( descA *CuTensorDescriptor, modesA []int, descB *CuTensorDescriptor, modesB []int, descC *CuTensorDescriptor, modesC []int, algorithm ContractionAlgorithm) (*TensorPlan, error)
CreateContractionPlan creates an optimized execution plan for tensor contraction
func (*CuTensorHandle) Destroy ¶
func (handle *CuTensorHandle) Destroy() error
Destroy cleans up cuTENSOR handle resources
func (*CuTensorHandle) ExecuteContractionPlan ¶
func (handle *CuTensorHandle) ExecuteContractionPlan( plan *TensorPlan, alpha float64, tensorA *memory.Memory, tensorB *memory.Memory, beta float64, tensorC *memory.Memory) error
ExecuteContractionPlan executes a pre-compiled contraction plan
func (*CuTensorHandle) TensorContraction ¶
func (handle *CuTensorHandle) TensorContraction( alpha float64, tensorA *memory.Memory, descA *CuTensorDescriptor, modesA []int, tensorB *memory.Memory, descB *CuTensorDescriptor, modesB []int, beta float64, tensorC *memory.Memory, descC *CuTensorDescriptor, modesC []int, algorithm ContractionAlgorithm) error
TensorContraction performs general tensor contraction
func (*CuTensorHandle) TensorCopy ¶
func (handle *CuTensorHandle) TensorCopy( alpha float64, tensorA *memory.Memory, descA *CuTensorDescriptor, tensorC *memory.Memory, descC *CuTensorDescriptor) error
TensorCopy performs tensor copy with potential layout conversion
func (*CuTensorHandle) TensorElementwiseAdd ¶
func (handle *CuTensorHandle) TensorElementwiseAdd( alpha float64, tensorA *memory.Memory, descA *CuTensorDescriptor, beta float64, tensorB *memory.Memory, descB *CuTensorDescriptor, gamma float64, tensorC *memory.Memory, descC *CuTensorDescriptor) error
TensorElementwiseAdd performs element-wise addition
func (*CuTensorHandle) TensorElementwiseMul ¶
func (handle *CuTensorHandle) TensorElementwiseMul( alpha float64, tensorA *memory.Memory, descA *CuTensorDescriptor, beta float64, tensorB *memory.Memory, descB *CuTensorDescriptor, gamma float64, tensorC *memory.Memory, descC *CuTensorDescriptor) error
TensorElementwiseMul performs element-wise multiplication
func (*CuTensorHandle) TensorPermute ¶
func (handle *CuTensorHandle) TensorPermute( alpha float64, tensorA *memory.Memory, descA *CuTensorDescriptor, tensorC *memory.Memory, descC *CuTensorDescriptor, perm []int) error
TensorPermute performs tensor permutation (transpose generalization)
func (*CuTensorHandle) TensorReduce ¶
func (handle *CuTensorHandle) TensorReduce( alpha float64, tensorA *memory.Memory, descA *CuTensorDescriptor, beta float64, tensorC *memory.Memory, descC *CuTensorDescriptor, reduceModes []int, reductionOp TensorReduction) error
TensorReduce performs tensor reduction along specified modes
type CutlassConvDesc ¶
type CutlassConvDesc struct {
N, H, W, C int // Input dimensions
K int // Output channels
R, S int // Filter dimensions
PadH, PadW int // Padding
StrideH, StrideW int // Stride
DilationH, DilationW int // Dilation
Mode CutlassConvMode
DataType CutlassDataType
Algorithm CutlassGemmAlgorithm
}
CUTLASS convolution descriptor
type CutlassConvHandle ¶
type CutlassConvHandle struct {
// contains filtered or unexported fields
}
CUTLASS convolution handle
func CreateCutlassConv ¶
func CreateCutlassConv(desc CutlassConvDesc) (*CutlassConvHandle, error)
CreateCutlassConv creates a CUTLASS convolution operation handle
func (*CutlassConvHandle) CutlassConv ¶
func (handle *CutlassConvHandle) CutlassConv(input, filter, output *memory.Memory) error
CutlassConv performs convolution using CUTLASS
func (*CutlassConvHandle) Destroy ¶
func (handle *CutlassConvHandle) Destroy() error
Destroy cleans up convolution handle resources
type CutlassConvMode ¶
type CutlassConvMode int
CUTLASS convolution modes
const ( CutlassConvForward CutlassConvMode = iota CutlassConvDgrad CutlassConvWgrad )
type CutlassDataType ¶
type CutlassDataType int
CUTLASS data types
const ( CutlassFloat16 CutlassDataType = iota CutlassFloat32 CutlassFloat64 CutlassBFloat16 CutlassInt8 CutlassInt16 CutlassInt32 CutlassTensorFloat32 CutlassComplexFloat32 CutlassComplexFloat64 )
type CutlassEpilogueOp ¶
type CutlassEpilogueOp int
CUTLASS epilogue operations
const ( CutlassEpilogueLinearCombination CutlassEpilogueOp = iota CutlassEpilogueLinearCombinationClamp CutlassEpilogueBias CutlassEpilogueRelu CutlassEpilogueGelu CutlassEpilogueSigmoid )
type CutlassGemmAlgorithm ¶
type CutlassGemmAlgorithm int
CUTLASS GEMM algorithms
const ( CutlassGemmDefault CutlassGemmAlgorithm = iota CutlassGemmSiMt CutlassGemmAnalytic CutlassGemmPlanar CutlassGemmWmma CutlassGemmTensorOp CutlassGemmSparseTensorOp )
func GetOptimalGemmAlgorithm ¶
func GetOptimalGemmAlgorithm(M, N, K int, dataType CutlassDataType) CutlassGemmAlgorithm
GetOptimalGemmAlgorithm suggests optimal GEMM algorithm based on problem size
type CutlassGemmDesc ¶
type CutlassGemmDesc struct {
M, N, K int
DataType CutlassDataType
LayoutA CutlassLayout
LayoutB CutlassLayout
LayoutC CutlassLayout
OpA CutlassOperation
OpB CutlassOperation
Algorithm CutlassGemmAlgorithm
EpilogueOp CutlassEpilogueOp
Alpha float32
Beta float32
SplitKSlices int
}
CUTLASS GEMM descriptor
type CutlassGemmHandle ¶
type CutlassGemmHandle struct {
// contains filtered or unexported fields
}
CUTLASS GEMM handle
func CreateCutlassGemm ¶
func CreateCutlassGemm(desc CutlassGemmDesc) (*CutlassGemmHandle, error)
CreateCutlassGemm creates a CUTLASS GEMM operation handle
func (*CutlassGemmHandle) CutlassGemm ¶
func (handle *CutlassGemmHandle) CutlassGemm(A, B, C *memory.Memory) error
CutlassGemm performs General Matrix Multiplication using CUTLASS
func (*CutlassGemmHandle) CutlassGemmBatched ¶
func (handle *CutlassGemmHandle) CutlassGemmBatched(A, B, C []*memory.Memory, batchCount int) error
CutlassGemmBatched performs batched GEMM operations
func (*CutlassGemmHandle) Destroy ¶
func (handle *CutlassGemmHandle) Destroy() error
Destroy cleans up GEMM handle resources
type CutlassLayout ¶
type CutlassLayout int
CUTLASS matrix layouts
const ( CutlassRowMajor CutlassLayout = iota CutlassColumnMajor )
type CutlassOperation ¶
type CutlassOperation int
CUTLASS operation types
const ( CutlassOpN CutlassOperation = iota // No transpose CutlassOpT // Transpose CutlassOpC // Conjugate transpose )
type DNNActivationMode ¶
type DNNActivationMode int
DNNActivationMode represents activation function types
const ( DNNActivationSigmoid DNNActivationMode = iota DNNActivationRelu DNNActivationTanh DNNActivationClippedRelu DNNActivationElu DNNActivationIdentity DNNActivationSwish )
type DNNBatchNormMode ¶
type DNNBatchNormMode int
DNNBatchNormMode represents batch normalization modes
const ( DNNBatchNormPerActivation DNNBatchNormMode = iota DNNBatchNormSpatial )
type DNNConvolutionMode ¶
type DNNConvolutionMode int
DNNConvolutionMode represents convolution modes
const ( DNNConvolution DNNConvolutionMode = iota DNNCrossCorrelation )
type DNNDataType ¶
type DNNDataType int
DNNDataType represents cuDNN data types
const ( DNNDataFloat DNNDataType = iota DNNDataDouble DNNDataHalf DNNDataInt8 DNNDataInt32 DNNDataInt8x4 DNNDataUint8 DNNDataUint8x4 DNNDataInt8x32 DNNDataBFloat16 DNNDataInt64 )
type DNNHandle ¶
type DNNHandle struct {
// contains filtered or unexported fields
}
DNNHandle represents a cuDNN library handle
func CreateDNNHandle ¶
CreateDNNHandle creates a cuDNN library handle
func (*DNNHandle) ActivationForward ¶
func (h *DNNHandle) ActivationForward(activationDesc *ActivationDescriptor, alpha float32, inputDesc *TensorDescriptor, inputData *memory.Memory, beta float32, outputDesc *TensorDescriptor, outputData *memory.Memory) error
ActivationForward performs forward activation
func (*DNNHandle) BatchNormalizationForwardInference ¶
func (h *DNNHandle) BatchNormalizationForwardInference(mode DNNBatchNormMode, alpha, beta float32, inputDesc *TensorDescriptor, input *memory.Memory, outputDesc *TensorDescriptor, output *memory.Memory, bnScaleBiasDesc *TensorDescriptor, bnScale, bnBias, estimatedMean, estimatedVariance *memory.Memory, epsilon float64) error
BatchNormalizationForwardInference performs batch normalization inference
func (*DNNHandle) ConvolutionForward ¶
func (h *DNNHandle) ConvolutionForward(alpha float32, inputDesc *TensorDescriptor, inputData *memory.Memory, filterDesc *FilterDescriptor, filterData *memory.Memory, convDesc *ConvolutionDescriptor, beta float32, outputDesc *TensorDescriptor, outputData *memory.Memory) error
ConvolutionForward performs forward convolution
func (*DNNHandle) DestroyHandle ¶
DestroyHandle destroys the cuDNN handle
func (*DNNHandle) PoolingForward ¶
func (h *DNNHandle) PoolingForward(poolingDesc *PoolingDescriptor, alpha float32, inputDesc *TensorDescriptor, inputData *memory.Memory, beta float32, outputDesc *TensorDescriptor, outputData *memory.Memory) error
PoolingForward performs forward pooling
type DNNNanPropagation ¶
type DNNNanPropagation int
DNNNanPropagation represents NaN propagation modes
const ( DNNNotPropagateNaN DNNNanPropagation = iota DNNPropagateNaN )
type DNNPoolingMode ¶
type DNNPoolingMode int
DNNPoolingMode represents pooling modes
const ( DNNPoolingMax DNNPoolingMode = iota DNNPoolingAverageCountIncludePadding DNNPoolingAverageCountExcludePadding DNNPoolingMaxDeterministic )
type DNNTensorFormat ¶
type DNNTensorFormat int
DNNTensorFormat represents tensor memory layouts
const ( DNNTensorNHWC DNNTensorFormat = iota // batch, height, width, channels DNNTensorNCHW // batch, channels, height, width DNNTensorNCHWVectC // vectorized channels DNNTensorNHWCVectC // vectorized channels )
type DSSConfig ¶
type DSSConfig struct { MatrixFormat DSSMatrixFormat Factorization DSSFactorization Ordering DSSOrdering Refinement DSSRefinement PivotType DSSPivotType PivotThreshold float64 Symmetry bool Deterministic bool UseGPU bool }
DSS configuration structure
type DSSFactorization ¶
type DSSFactorization int
DSS factorization algorithms
const ( DSSFactorizationLU DSSFactorization = iota DSSFactorizationLDLT DSSFactorizationCholesky DSSFactorizationQR )
type DSSHandle ¶
type DSSHandle struct {
// contains filtered or unexported fields
}
DSS solver handle
func CreateDSSHandle ¶
CreateDSSHandle creates a new cuDSS solver handle
func (*DSSHandle) GetDeterminant ¶
GetDeterminant computes the determinant of the factored matrix
func (*DSSHandle) GetInertia ¶
GetInertia computes the inertia of the factored matrix
func (*DSSHandle) SolveMultiple ¶
SolveMultiple solves multiple linear systems with the same matrix
type DSSMatrix ¶
type DSSMatrix struct {
// contains filtered or unexported fields
}
DSS matrix descriptor
func CreateDSSMatrix ¶
func CreateDSSMatrix(n, nnz int, rowPtr, colInd, values *memory.Memory, format DSSMatrixFormat, symmetry bool) (*DSSMatrix, error)
CreateDSSMatrix creates a DSS matrix from sparse data
type DSSMatrixFormat ¶
type DSSMatrixFormat int
DSS matrix formats
const ( DSSMatrixFormatCSR DSSMatrixFormat = iota DSSMatrixFormatCOO DSSMatrixFormatCSC )
type DSSOrdering ¶
type DSSOrdering int
DSS ordering algorithms
const ( DSSOrderingNone DSSOrdering = iota DSSOrderingAMD DSSOrderingMETIS DSSOrderingNDBox DSSOrderingRCM DSSOrderingFillReducing )
type DSSPivotType ¶
type DSSPivotType int
DSS pivot type
const ( DSSPivotNone DSSPivotType = iota DSSPivotPartial DSSPivotRook DSSPivotBunch )
type DSSRefinement ¶
type DSSRefinement int
DSS refinement options
const ( DSSRefinementNone DSSRefinement = iota DSSRefinementSingle DSSRefinementDouble DSSRefinementMixed )
type DSSSolutionInfo ¶
type DSSSolutionInfo struct { Iterations int Residual float64 Error float64 Determinant float64 Inertia [3]int // positive, negative, zero eigenvalues PivotGrowth float64 ConditionNumber float64 BackwardError float64 ComponentwiseError float64 }
DSS solution info
func SolveDirect ¶
func SolveDirect(n, nnz int, rowPtr, colInd, values, b, x *memory.Memory) (*DSSSolutionInfo, error)
SolveDirect provides a simple interface for direct sparse solving
func SolveSymmetric ¶
func SolveSymmetric(n, nnz int, rowPtr, colInd, values, b, x *memory.Memory) (*DSSSolutionInfo, error)
SolveSymmetric solves a symmetric positive definite system using Cholesky
type ExecutionPolicy ¶
type ExecutionPolicy int
ExecutionPolicy defines how algorithms should execute
const ( PolicyDevice ExecutionPolicy = iota // Execute on GPU PolicyHost // Execute on CPU PolicyCuda // CUDA-specific optimizations )
type FFTContext ¶
type FFTContext struct {
// contains filtered or unexported fields
}
FFTContext manages cuFFT operations
func CreateFFTContext ¶
func CreateFFTContext() (*FFTContext, error)
CreateFFTContext creates a new cuFFT context
func (*FFTContext) CreatePlan1D ¶
CreatePlan1D creates a 1D FFT plan
func (*FFTContext) CreatePlan2D ¶
func (ctx *FFTContext) CreatePlan2D(nx, ny int, fftType FFTType) (*FFTPlan, error)
CreatePlan2D creates a 2D FFT plan
func (*FFTContext) CreatePlan3D ¶
func (ctx *FFTContext) CreatePlan3D(nx, ny, nz int, fftType FFTType) (*FFTPlan, error)
CreatePlan3D creates a 3D FFT plan
func (*FFTContext) DestroyContext ¶
func (ctx *FFTContext) DestroyContext() error
DestroyContext destroys the cuFFT context
func (*FFTContext) EstimateMemory ¶
func (ctx *FFTContext) EstimateMemory(plan *FFTPlan) (inputBytes, outputBytes int64)
Estimate memory requirements for FFT operations
func (*FFTContext) ExecC2C ¶
func (ctx *FFTContext) ExecC2C(plan *FFTPlan, input, output *memory.Memory, direction FFTDirection) error
ExecC2C executes a complex-to-complex FFT
type FFTDirection ¶
type FFTDirection int
FFTDirection represents FFT direction
const ( FFTForward FFTDirection = -1 FFTInverse FFTDirection = 1 )
type FFTPlan ¶
type FFTPlan struct {
// contains filtered or unexported fields
}
FFTPlan represents a cuFFT execution plan
func (*FFTPlan) DestroyPlan ¶
DestroyPlan destroys an FFT plan
type FilterDescriptor ¶
type FilterDescriptor struct {
// contains filtered or unexported fields
}
FilterDescriptor describes a convolution filter
func CreateFilterDescriptor ¶
func CreateFilterDescriptor() (*FilterDescriptor, error)
CreateFilterDescriptor creates a filter descriptor
func (*FilterDescriptor) DestroyFilterDescriptor ¶
func (desc *FilterDescriptor) DestroyFilterDescriptor() error
DestroyFilterDescriptor destroys a filter descriptor
func (*FilterDescriptor) SetFilter4dDescriptor ¶
func (desc *FilterDescriptor) SetFilter4dDescriptor(dataType DNNDataType, format DNNTensorFormat, k, c, h, w int) error
SetFilter4dDescriptor sets the filter descriptor for 4D filters
func (*FilterDescriptor) SetFilterNdDescriptor ¶
func (desc *FilterDescriptor) SetFilterNdDescriptor(dataType DNNDataType, format DNNTensorFormat, dimensions []int) error
SetFilterNdDescriptor sets the filter descriptor for N-dimensional filters
type Jpeg2000Codec ¶
type Jpeg2000Codec int
JPEG2000 codec types
const ( Jpeg2000CodecJ2K Jpeg2000Codec = iota // Raw JPEG2000 codestream Jpeg2000CodecJP2 // JPEG2000 with JP2 container Jpeg2000CodecJPT // JPEG2000 with JPT container Jpeg2000CodecJPX // JPEG2000 with JPX container )
type Jpeg2000DecodeParams ¶
type Jpeg2000DecodeParams struct { OutputFormat Jpeg2000Format Codec Jpeg2000Codec DecodeLayer int // Decode up to this layer (-1 for all) DecodeLevel int // Decode up to this resolution level (-1 for all) CropX int CropY int CropWidth int CropHeight int ReduceFactor int // Reduce image by 2^reduce_factor }
JPEG2000 decode parameters
type Jpeg2000DecoderState ¶
type Jpeg2000DecoderState struct {
// contains filtered or unexported fields
}
JPEG2000 decoder state
func CreateJpeg2000Decoder ¶
func CreateJpeg2000Decoder(codec Jpeg2000Codec) (*Jpeg2000DecoderState, error)
CreateJpeg2000Decoder creates a new JPEG2000 decoder
func (*Jpeg2000DecoderState) DecodeJpeg2000 ¶
func (decoder *Jpeg2000DecoderState) DecodeJpeg2000(j2kData []byte, params Jpeg2000DecodeParams) (*memory.Memory, int, int, error)
DecodeJpeg2000 decodes a JPEG2000 image from byte data
func (*Jpeg2000DecoderState) DecodeJpeg2000Batch ¶
func (decoder *Jpeg2000DecoderState) DecodeJpeg2000Batch(j2kDataList [][]byte, params Jpeg2000DecodeParams) ([]*memory.Memory, []int, []int, error)
DecodeJpeg2000Batch decodes multiple JPEG2000 images in batch
func (*Jpeg2000DecoderState) Destroy ¶
func (decoder *Jpeg2000DecoderState) Destroy() error
Destroy cleans up decoder resources
type Jpeg2000EncoderState ¶
type Jpeg2000EncoderState struct {
// contains filtered or unexported fields
}
JPEG2000 encoder state
func CreateJpeg2000Encoder ¶
func CreateJpeg2000Encoder(codec Jpeg2000Codec) (*Jpeg2000EncoderState, error)
CreateJpeg2000Encoder creates a new JPEG2000 encoder
func (*Jpeg2000EncoderState) Destroy ¶
func (encoder *Jpeg2000EncoderState) Destroy() error
Destroy cleans up encoder resources
func (*Jpeg2000EncoderState) EncodeJpeg2000 ¶
func (encoder *Jpeg2000EncoderState) EncodeJpeg2000(imageData *memory.Memory, width, height int, params Jpeg2000EnodeParams) ([]byte, error)
EncodeJpeg2000 encodes image data to JPEG2000 format
func (*Jpeg2000EncoderState) SetCompressionRatio ¶
func (encoder *Jpeg2000EncoderState) SetCompressionRatio(ratio float32) error
SetCompressionRatio sets the JPEG2000 compression ratio
func (*Jpeg2000EncoderState) SetNumLayers ¶
func (encoder *Jpeg2000EncoderState) SetNumLayers(layers int) error
SetNumLayers sets the number of quality layers
func (*Jpeg2000EncoderState) SetNumLevels ¶
func (encoder *Jpeg2000EncoderState) SetNumLevels(levels int) error
SetNumLevels sets the number of wavelet decomposition levels
type Jpeg2000EnodeParams ¶
type Jpeg2000EnodeParams struct { InputFormat Jpeg2000Format Codec Jpeg2000Codec CompressionRatio float32 Lossless bool NumLayers int NumLevels int ProgressionOrder Jpeg2000ProgressionOrder CodeblockWidth int CodeblockHeight int PrecinctWidth []int PrecinctHeight []int }
JPEG2000 encode parameters
type Jpeg2000Format ¶
type Jpeg2000Format int
JPEG2000 formats and configurations
const ( Jpeg2000FormatRGB Jpeg2000Format = iota Jpeg2000FormatBGR Jpeg2000FormatRGBA Jpeg2000FormatBGRA Jpeg2000FormatGrayscale Jpeg2000FormatYUV420 Jpeg2000FormatYUV422 Jpeg2000FormatYUV444 )
type Jpeg2000ImageInfo ¶
type Jpeg2000ImageInfo struct { Width int Height int Components int BitDepth int NumLayers int NumLevels int Codec Jpeg2000Codec }
JPEG2000 image information
func GetJpeg2000ImageInfo ¶
func GetJpeg2000ImageInfo(j2kData []byte) (*Jpeg2000ImageInfo, error)
GetJpeg2000ImageInfo extracts detailed information from JPEG2000 data
type Jpeg2000ProgressionOrder ¶
type Jpeg2000ProgressionOrder int
JPEG2000 progression orders
const ( Jpeg2000ProgressionLRCP Jpeg2000ProgressionOrder = iota // Layer-Resolution-Component-Position Jpeg2000ProgressionRLCP // Resolution-Layer-Component-Position Jpeg2000ProgressionRPCL // Resolution-Position-Component-Layer Jpeg2000ProgressionPCRL // Position-Component-Resolution-Layer Jpeg2000ProgressionCPRL // Component-Position-Resolution-Layer )
type JpegBackend ¶
type JpegBackend int
JPEG decoder backend types
const ( JpegBackendDefault JpegBackend = iota JpegBackendHybrid JpegBackendGPUHybrid JpegBackendHardware )
type JpegDecodeParams ¶
type JpegDecodeParams struct { OutputFormat JpegFormat Backend JpegBackend CropX int CropY int CropWidth int CropHeight int ScaleWidth int ScaleHeight int }
JPEG decode parameters
type JpegDecoderState ¶
type JpegDecoderState struct {
// contains filtered or unexported fields
}
JPEG decoder state
func CreateJpegDecoder ¶
func CreateJpegDecoder(backend JpegBackend) (*JpegDecoderState, error)
CreateJpegDecoder creates a new JPEG decoder
func (*JpegDecoderState) DecodeJpeg ¶
func (decoder *JpegDecoderState) DecodeJpeg(jpegData []byte, params JpegDecodeParams) (*memory.Memory, int, int, error)
DecodeJpeg decodes a JPEG image from byte data
func (*JpegDecoderState) DecodeJpegBatch ¶
func (decoder *JpegDecoderState) DecodeJpegBatch(jpegDataList [][]byte, params JpegDecodeParams) ([]*memory.Memory, []int, []int, error)
DecodeJpegBatch decodes multiple JPEG images in batch
func (*JpegDecoderState) Destroy ¶
func (decoder *JpegDecoderState) Destroy() error
Destroy cleans up decoder resources
type JpegEncodeParams ¶
type JpegEncodeParams struct { InputFormat JpegFormat Quality int OptimizedHuffman bool RestartInterval int }
JPEG encode parameters
type JpegEncoderState ¶
type JpegEncoderState struct {
// contains filtered or unexported fields
}
JPEG encoder state
func CreateJpegEncoder ¶
func CreateJpegEncoder(backend JpegBackend) (*JpegEncoderState, error)
CreateJpegEncoder creates a new JPEG encoder
func (*JpegEncoderState) Destroy ¶
func (encoder *JpegEncoderState) Destroy() error
Destroy cleans up encoder resources
func (*JpegEncoderState) EncodeJpeg ¶
func (encoder *JpegEncoderState) EncodeJpeg(imageData *memory.Memory, width, height int, params JpegEncodeParams) ([]byte, error)
EncodeJpeg encodes image data to JPEG format
func (*JpegEncoderState) SetQuality ¶
func (encoder *JpegEncoderState) SetQuality(quality int) error
SetQuality sets the JPEG encoding quality (0-100)
type JpegFormat ¶
type JpegFormat int
JPEG formats and configurations
const ( JpegFormatRGB JpegFormat = iota JpegFormatBGR JpegFormatRGBI JpegFormatBGRI JpegFormatGrayscale JpegFormatYUV420 JpegFormatYUV422 JpegFormatYUV444 )
type LUInfo ¶
type LUInfo struct {
// contains filtered or unexported fields
}
LUInfo contains information about LU decomposition
type MathConfig ¶
type MathConfig struct { Precision MathPrecision DataType MathDataType VectorSize int UseHardware bool // Use hardware-specific optimizations FastMath bool // Enable fast math optimizations FlushToZero bool // Flush denormalized numbers to zero HandleNaN bool // Special handling for NaN values HandleInf bool // Special handling for infinity values }
Math configuration
type MathContext ¶
type MathContext struct {
// contains filtered or unexported fields
}
Math context for batch operations
func CreateMathContext ¶
func CreateMathContext(config MathConfig) (*MathContext, error)
CreateMathContext creates a new CUDA Math API context
func (*MathContext) BatchVectorOps ¶
func (ctx *MathContext) BatchVectorOps(ops []MathVectorOp) error
BatchVectorOps performs multiple vector operations in parallel
func (*MathContext) Destroy ¶
func (ctx *MathContext) Destroy() error
Destroy cleans up math context resources
func (*MathContext) VectorAdd ¶
func (ctx *MathContext) VectorAdd(a, b, output *memory.Memory, size int) error
VectorAdd performs element-wise addition: output[i] = a[i] + b[i]
func (*MathContext) VectorBesselJ0 ¶
func (ctx *MathContext) VectorBesselJ0(a, output *memory.Memory, size int) error
VectorBessel performs element-wise Bessel function J0: output[i] = j0(a[i])
func (*MathContext) VectorComplexAbs ¶
func (ctx *MathContext) VectorComplexAbs(a, output *memory.Memory, size int) error
VectorComplexAbs performs element-wise complex absolute value: output[i] = |a[i]|
func (*MathContext) VectorComplexArg ¶
func (ctx *MathContext) VectorComplexArg(a, output *memory.Memory, size int) error
VectorComplexArg performs element-wise complex argument: output[i] = arg(a[i])
func (*MathContext) VectorCos ¶
func (ctx *MathContext) VectorCos(a, output *memory.Memory, size int) error
VectorCos performs element-wise cosine: output[i] = cos(a[i])
func (*MathContext) VectorErf ¶
func (ctx *MathContext) VectorErf(a, output *memory.Memory, size int) error
VectorErf performs element-wise error function: output[i] = erf(a[i])
func (*MathContext) VectorExp ¶
func (ctx *MathContext) VectorExp(a, output *memory.Memory, size int) error
VectorExp performs element-wise exponential: output[i] = exp(a[i])
func (*MathContext) VectorFMA ¶
func (ctx *MathContext) VectorFMA(a, b, c, output *memory.Memory, size int) error
VectorFMA performs fused multiply-add: output[i] = a[i] * b[i] + c[i]
func (*MathContext) VectorGamma ¶
func (ctx *MathContext) VectorGamma(a, output *memory.Memory, size int) error
VectorGamma performs element-wise gamma function: output[i] = gamma(a[i])
func (*MathContext) VectorLog ¶
func (ctx *MathContext) VectorLog(a, output *memory.Memory, size int) error
VectorLog performs element-wise natural logarithm: output[i] = log(a[i])
func (*MathContext) VectorMul ¶
func (ctx *MathContext) VectorMul(a, b, output *memory.Memory, size int) error
VectorMul performs element-wise multiplication: output[i] = a[i] * b[i]
func (*MathContext) VectorNorm ¶
VectorNorm computes the L2 norm of a vector
func (*MathContext) VectorPow ¶
func (ctx *MathContext) VectorPow(a, b, output *memory.Memory, size int) error
VectorPow performs element-wise power: output[i] = pow(a[i], b[i])
func (*MathContext) VectorRsqrt ¶
func (ctx *MathContext) VectorRsqrt(a, output *memory.Memory, size int) error
VectorRsqrt performs element-wise reciprocal square root: output[i] = 1/sqrt(a[i])
func (*MathContext) VectorSin ¶
func (ctx *MathContext) VectorSin(a, output *memory.Memory, size int) error
VectorSin performs element-wise sine: output[i] = sin(a[i])
func (*MathContext) VectorSinCos ¶
func (ctx *MathContext) VectorSinCos(a, sin_out, cos_out *memory.Memory, size int) error
VectorSinCos computes both sine and cosine: sin_out[i] = sin(a[i]), cos_out[i] = cos(a[i])
func (*MathContext) VectorSqrt ¶
func (ctx *MathContext) VectorSqrt(a, output *memory.Memory, size int) error
VectorSqrt performs element-wise square root: output[i] = sqrt(a[i])
type MathDataType ¶
type MathDataType int
Math data types
const ( MathDataFloat32 MathDataType = iota MathDataFloat64 MathDataComplexFloat32 MathDataComplexFloat64 MathDataHalf MathDataBFloat16 )
type MathOperation ¶
type MathOperation int
Math operation types
const ( // Basic operations MathOpAdd MathOperation = iota MathOpSub MathOpMul MathOpDiv MathOpFMA MathOpSqrt MathOpCbrt MathOpRsqrt MathOpRcp // Trigonometric functions MathOpSin MathOpCos MathOpTan MathOpAsin MathOpAcos MathOpAtan MathOpAtan2 MathOpSincos MathOpSinpi MathOpCospi MathOpTanpi // Hyperbolic functions MathOpSinh MathOpCosh MathOpTanh MathOpAsinh MathOpAcosh MathOpAtanh // Exponential and logarithmic MathOpExp MathOpExp2 MathOpExp10 MathOpExpm1 MathOpLog MathOpLog2 MathOpLog10 MathOpLog1p MathOpLogb MathOpPow MathOpPowi // Special functions MathOpErf MathOpErfc MathOpErfinv MathOpErfcinv MathOpGamma MathOpLgamma MathOpTgamma MathOpJ0 MathOpJ1 MathOpY0 MathOpY1 MathOpJn MathOpYn // Rounding and remainder MathOpCeil MathOpFloor MathOpTrunc MathOpRound MathOpRint MathOpNearbyint MathOpFmod MathOpRemainder MathOpRemquo MathOpModf MathOpFrexp MathOpLdexp // Comparison and classification MathOpFmax MathOpFmin MathOpFdim MathOpIsnan MathOpIsinf MathOpIsfinite MathOpSignbit MathOpCopysign // Complex operations MathOpCabs MathOpCarg MathOpConj MathOpCproj MathOpCreal MathOpCimag )
type MathPrecision ¶
type MathPrecision int
Math precision modes
const ( MathPrecisionFast MathPrecision = iota // Fast, lower precision MathPrecisionDefault // Balanced precision/performance MathPrecisionAccurate // High precision MathPrecisionIEEE // IEEE 754 compliant )
type MathVectorOp ¶
type MathVectorOp struct { Operation MathOperation InputA *memory.Memory InputB *memory.Memory // Optional second input Output *memory.Memory Size int Config MathConfig }
Math vector operation descriptor
type MatrixFormat ¶
type MatrixFormat int
MatrixFormat represents different sparse matrix storage formats
const ( MatrixFormatCSR MatrixFormat = iota // Compressed Sparse Row MatrixFormatCSC // Compressed Sparse Column MatrixFormatCOO // Coordinate format MatrixFormatELL // ELLPACK format MatrixFormatHYB // Hybrid ELL-COO format )
type PoolingDescriptor ¶
type PoolingDescriptor struct {
// contains filtered or unexported fields
}
PoolingDescriptor describes a pooling operation
func CreatePoolingDescriptor ¶
func CreatePoolingDescriptor() (*PoolingDescriptor, error)
CreatePoolingDescriptor creates a pooling descriptor
func (*PoolingDescriptor) DestroyPoolingDescriptor ¶
func (desc *PoolingDescriptor) DestroyPoolingDescriptor() error
DestroyPoolingDescriptor destroys a pooling descriptor
func (*PoolingDescriptor) SetPooling2dDescriptor ¶
func (desc *PoolingDescriptor) SetPooling2dDescriptor(mode DNNPoolingMode, windowH, windowW, padH, padW, strideH, strideW int) error
SetPooling2dDescriptor sets the pooling descriptor for 2D pooling
type QRInfo ¶
type QRInfo struct {
// contains filtered or unexported fields
}
QRInfo contains information about QR decomposition
type RandomGenerator ¶
type RandomGenerator struct {
// contains filtered or unexported fields
}
RandomGenerator manages random number generation
func CreateRandomGenerator ¶
func CreateRandomGenerator(rngType RngType) (*RandomGenerator, error)
CreateRandomGenerator creates a new random number generator
func (*RandomGenerator) Destroy ¶
func (rg *RandomGenerator) Destroy() error
Destroy cleans up the generator
func (*RandomGenerator) GenerateLogNormal ¶
func (rg *RandomGenerator) GenerateLogNormal(output *memory.Memory, n int, mean, stddev float32) error
GenerateLogNormal generates log-normal random numbers
func (*RandomGenerator) GenerateNormal ¶
GenerateNormal generates normal random numbers (mean=0, stddev=1)
func (*RandomGenerator) GeneratePoisson ¶
GeneratePoisson generates Poisson-distributed random numbers
func (*RandomGenerator) GenerateUniform ¶
func (rg *RandomGenerator) GenerateUniform(output *memory.Memory, n int) error
GenerateUniform generates uniform random numbers in [0, 1)
func (*RandomGenerator) SetSeed ¶
func (rg *RandomGenerator) SetSeed(seed uint64)
SetSeed sets the random seed
type SVDInfo ¶
type SVDInfo struct {
// contains filtered or unexported fields
}
SVDInfo contains information about SVD decomposition
type SolverContext ¶
type SolverContext struct {
// contains filtered or unexported fields
}
SolverContext manages cuSOLVER operations
func CreateSolverContext ¶
func CreateSolverContext() (*SolverContext, error)
CreateSolverContext creates a new cuSOLVER context
func (*SolverContext) CholeskyFactorization ¶
func (ctx *SolverContext) CholeskyFactorization(A *memory.Memory, n int) error
CholeskyFactorization performs Cholesky decomposition for positive definite matrices
func (*SolverContext) DestroyContext ¶
func (ctx *SolverContext) DestroyContext() error
DestroyContext cleans up the solver context
func (*SolverContext) Eigenvalues ¶
func (ctx *SolverContext) Eigenvalues(A *memory.Memory, n int, computeVectors bool) (*memory.Memory, *memory.Memory, error)
Eigenvalues computes eigenvalues and optionally eigenvectors
func (*SolverContext) LUFactorization ¶
LUFactorization performs LU decomposition with partial pivoting
func (*SolverContext) PseudoInverse ¶
PseudoInverse computes the Moore-Penrose pseudoinverse using SVD
func (*SolverContext) QRFactorization ¶
QRFactorization performs QR decomposition of matrix A
func (*SolverContext) SVDDecomposition ¶
func (ctx *SolverContext) SVDDecomposition(A *memory.Memory, m, n int, computeUV bool) (*SVDInfo, error)
SVDDecomposition performs Singular Value Decomposition
type SparseContext ¶
type SparseContext struct {
// contains filtered or unexported fields
}
SparseContext manages cuSPARSE operations
func CreateSparseContext ¶
func CreateSparseContext() (*SparseContext, error)
CreateSparseContext creates a new cuSPARSE context
func (*SparseContext) CreateSparseMatrix ¶
func (ctx *SparseContext) CreateSparseMatrix(rows, cols, nnz int, format MatrixFormat) (*SparseMatrix, error)
CreateSparseMatrix creates a sparse matrix
func (*SparseContext) DenseToSparse ¶
func (ctx *SparseContext) DenseToSparse(dense *memory.Memory, rows, cols int, format MatrixFormat) (*SparseMatrix, error)
DenseToSparse converts a dense matrix to sparse format
func (*SparseContext) DestroyContext ¶
func (ctx *SparseContext) DestroyContext() error
DestroyContext destroys the cuSPARSE context
func (*SparseContext) SpGEMM ¶
func (ctx *SparseContext) SpGEMM(A, B *SparseMatrix) (*SparseMatrix, error)
SpGEMM performs general sparse matrix-matrix multiplication
func (*SparseContext) SpLU ¶
func (ctx *SparseContext) SpLU(A *SparseMatrix) (*SparseMatrix, *SparseMatrix, error)
SpLU performs sparse LU factorization
func (*SparseContext) SpMM ¶
func (ctx *SparseContext) SpMM(alpha float32, A *SparseMatrix, B *SparseMatrix, beta float32, C *SparseMatrix) error
SpMM performs sparse matrix-matrix multiplication: C = α*A*B + β*C
func (*SparseContext) SpMV ¶
func (ctx *SparseContext) SpMV(alpha float32, A *SparseMatrix, x *memory.Memory, beta float32, y *memory.Memory) error
SpMV performs sparse matrix-vector multiplication: y = α*A*x + β*y
func (*SparseContext) SpSV ¶
func (ctx *SparseContext) SpSV(A *SparseMatrix, b, x *memory.Memory) error
SpSV solves sparse triangular system: A*x = b
func (*SparseContext) SparseToDense ¶
func (ctx *SparseContext) SparseToDense(sparse *SparseMatrix) (*memory.Memory, error)
SparseToDense converts a sparse matrix to dense format
type SparseMatrix ¶
type SparseMatrix struct {
// contains filtered or unexported fields
}
SparseMatrix represents a sparse matrix
func SparseMatrixMultiply ¶
func SparseMatrixMultiply(A, B *SparseMatrix) (*SparseMatrix, error)
SparseMatrixMultiply provides simplified sparse matrix multiplication
func (*SparseMatrix) Destroy ¶
func (sm *SparseMatrix) Destroy() error
Destroy cleans up the sparse matrix
func (*SparseMatrix) GetMatrixInfo ¶
func (sm *SparseMatrix) GetMatrixInfo() (rows, cols, nnz int, format MatrixFormat)
GetMatrixInfo returns information about the sparse matrix
type TensorDataType ¶
type TensorDataType int
Tensor data types
const ( TensorFloat16 TensorDataType = iota TensorFloat32 TensorFloat64 TensorComplex32 TensorComplex64 TensorInt8 TensorInt16 TensorInt32 TensorInt64 TensorUInt8 TensorUInt16 TensorUInt32 TensorUInt64 TensorBFloat16 )
type TensorDescriptor ¶
type TensorDescriptor struct {
// contains filtered or unexported fields
}
TensorDescriptor describes the layout of a tensor
func CreateTensorDescriptor ¶
func CreateTensorDescriptor() (*TensorDescriptor, error)
CreateTensorDescriptor creates a tensor descriptor
func (*TensorDescriptor) DestroyTensorDescriptor ¶
func (desc *TensorDescriptor) DestroyTensorDescriptor() error
DestroyTensorDescriptor destroys a tensor descriptor
func (*TensorDescriptor) SetTensor4dDescriptor ¶
func (desc *TensorDescriptor) SetTensor4dDescriptor(format DNNTensorFormat, dataType DNNDataType, n, c, h, w int) error
SetTensor4dDescriptor sets the tensor descriptor for 4D tensors
func (*TensorDescriptor) SetTensorNdDescriptor ¶
func (desc *TensorDescriptor) SetTensorNdDescriptor(dataType DNNDataType, dimensions []int, strides []int) error
SetTensorNdDescriptor sets the tensor descriptor for N-dimensional tensors
type TensorLayout ¶
type TensorLayout int
Tensor memory layout
const ( TensorLayoutRowMajor TensorLayout = iota // C-style (last dimension contiguous) TensorLayoutColMajor // Fortran-style (first dimension contiguous) TensorLayoutCustom // Custom stride pattern )
type TensorMathMode ¶
type TensorMathMode int
Tensor math modes
const ( TensorMathDefault TensorMathMode = iota TensorMathTensorCore TensorMathFast TensorMathAccurate )
type TensorOperation ¶
type TensorOperation int
Tensor operation types
const ( // Basic operations TensorOpAdd TensorOperation = iota TensorOpSub TensorOpMul TensorOpDiv TensorOpScale TensorOpCopy TensorOpTranspose TensorOpPermute TensorOpReduce // Contractions TensorOpContraction TensorOpBilinear TensorOpElementwise // Advanced operations TensorOpConvolution TensorOpGEMM TensorOpBatchedGEMM TensorOpTensorCore )
type TensorPlan ¶
type TensorPlan struct {
// contains filtered or unexported fields
}
Tensor plan for optimized execution
func (*TensorPlan) Destroy ¶
func (plan *TensorPlan) Destroy() error
DestroyTensorPlan destroys a tensor plan and frees its resources
type TensorReduction ¶
type TensorReduction int
Tensor reduction operations
const ( TensorReduceSum TensorReduction = iota TensorReduceMax TensorReduceMin TensorReduceMean TensorReduceNorm1 TensorReduceNorm2 TensorReduceNormInf TensorReduceAny TensorReduceAll )
type ThrustContext ¶
type ThrustContext struct {
// contains filtered or unexported fields
}
ThrustContext manages Thrust operations
func CreateThrustContext ¶
func CreateThrustContext() (*ThrustContext, error)
CreateThrustContext creates a new Thrust context
func (*ThrustContext) Copy ¶
func (ctx *ThrustContext) Copy(src, dst *memory.Memory, n int, policy ExecutionPolicy) error
Copy copies elements from source to destination
func (*ThrustContext) CopyIf ¶
func (ctx *ThrustContext) CopyIf(src, dst *memory.Memory, n int, predicate string, policy ExecutionPolicy) (int, error)
CopyIf copies elements that satisfy predicate
func (*ThrustContext) Count ¶
func (ctx *ThrustContext) Count(data *memory.Memory, n int, value float32, policy ExecutionPolicy) (int, error)
Count counts occurrences of value
func (*ThrustContext) DestroyContext ¶
func (ctx *ThrustContext) DestroyContext() error
DestroyContext cleans up Thrust context
func (*ThrustContext) ExclusiveScan ¶
func (ctx *ThrustContext) ExclusiveScan(input, output *memory.Memory, n int, initValue float32, policy ExecutionPolicy) error
ExclusiveScan performs exclusive prefix sum
func (*ThrustContext) Fill ¶
func (ctx *ThrustContext) Fill(data *memory.Memory, n int, value float32, policy ExecutionPolicy) error
Fill fills memory with specified value
func (*ThrustContext) Find ¶
func (ctx *ThrustContext) Find(data *memory.Memory, n int, value float32, policy ExecutionPolicy) (int, error)
Find locates first occurrence of value
func (*ThrustContext) Generate ¶
func (ctx *ThrustContext) Generate(data *memory.Memory, n int, generator string, policy ExecutionPolicy) error
Generate fills memory using generator function
func (*ThrustContext) MaxElement ¶
func (ctx *ThrustContext) MaxElement(data *memory.Memory, n int, policy ExecutionPolicy) (float32, int, error)
MaxElement finds maximum element
func (*ThrustContext) Merge ¶
func (ctx *ThrustContext) Merge(input1, input2, output *memory.Memory, n1, n2 int, policy ExecutionPolicy) error
Merge merges two sorted sequences
func (*ThrustContext) MinElement ¶
func (ctx *ThrustContext) MinElement(data *memory.Memory, n int, policy ExecutionPolicy) (float32, int, error)
MinElement finds minimum element
func (*ThrustContext) Partition ¶
func (ctx *ThrustContext) Partition(data *memory.Memory, n int, predicate string, policy ExecutionPolicy) (int, error)
Partition partitions elements based on predicate
func (*ThrustContext) Reduce ¶
func (ctx *ThrustContext) Reduce(data *memory.Memory, n int, initValue float32, policy ExecutionPolicy) (float32, error)
Reduce performs parallel reduction
func (*ThrustContext) Scan ¶
func (ctx *ThrustContext) Scan(input, output *memory.Memory, n int, policy ExecutionPolicy) error
Scan performs inclusive prefix sum
func (*ThrustContext) SetIntersection ¶
func (ctx *ThrustContext) SetIntersection(input1, input2, output *memory.Memory, n1, n2 int, policy ExecutionPolicy) (int, error)
SetIntersection computes intersection of two sorted sequences
func (*ThrustContext) SetUnion ¶
func (ctx *ThrustContext) SetUnion(input1, input2, output *memory.Memory, n1, n2 int, policy ExecutionPolicy) (int, error)
SetUnion computes union of two sorted sequences
func (*ThrustContext) Sort ¶
func (ctx *ThrustContext) Sort(data *memory.Memory, n int, policy ExecutionPolicy) error
Sort sorts elements in ascending order
func (*ThrustContext) SortByKey ¶
func (ctx *ThrustContext) SortByKey(keys, values *memory.Memory, n int, policy ExecutionPolicy) error
SortByKey sorts key-value pairs by keys
func (*ThrustContext) Transform ¶
func (ctx *ThrustContext) Transform(input, output *memory.Memory, n int, operation string, policy ExecutionPolicy) error
Transform applies unary operation to each element
func (*ThrustContext) TransformBinary ¶
func (ctx *ThrustContext) TransformBinary(input1, input2, output *memory.Memory, n int, operation string, policy ExecutionPolicy) error
TransformBinary applies binary operation to pairs of elements
func (*ThrustContext) Unique ¶
func (ctx *ThrustContext) Unique(data *memory.Memory, n int, policy ExecutionPolicy) (int, error)
Unique removes consecutive duplicate elements