table

package
v0.2.1 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 11, 2025 License: Apache-2.0 Imports: 31 Imported by: 0

Documentation

Index

Constants

View Source
const (
	ArrowFieldDocKey = "doc"
	// Arrow schemas that are generated from the Parquet library will utilize
	// this key to identify the field id of the source Parquet field.
	// We use this when converting to Iceberg to provide field IDs
	ArrowParquetFieldIDKey = "PARQUET:field_id"
)

constants to look for as Keys in Arrow field metadata

View Source
const (
	WriteDataPathKey                        = "write.data.path"
	WriteMetadataPathKey                    = "write.metadata.path"
	WriteObjectStorePartitionedPathsKey     = "write.object-storage.partitioned-paths"
	WriteObjectStorePartitionedPathsDefault = true
	ObjectStoreEnabledKey                   = "write.object-storage.enabled"
	ObjectStoreEnabledDefault               = false
)
View Source
const (
	InitialSortOrderID  = 1
	UnsortedSortOrderID = 0
)
View Source
const DefaultFormatVersion = 2
View Source
const MainBranch = "main"
View Source
const ScanNoLimit = -1
View Source
const (
	ScanOptionArrowUseLargeTypes = "arrow.use_large_types"
)

Variables

View Source
var (
	ErrInvalidMetadataFormatVersion = errors.New("invalid or missing format-version in table metadata")
	ErrInvalidMetadata              = errors.New("invalid metadata")
)
View Source
var (
	ErrInvalidOperation = errors.New("invalid operation value")
	ErrMissingOperation = errors.New("missing operation key")
)
View Source
var (
	ErrInvalidSortDirection = errors.New("invalid sort direction, must be 'asc' or 'desc'")
	ErrInvalidNullOrder     = errors.New("invalid null order, must be 'nulls-first' or 'nulls-last'")
)
View Source
var ErrInvalidRefType = errors.New("invalid snapshot ref type, should be 'branch' or 'tag'")
View Source
var UnsortedSortOrder = SortOrder{OrderID: UnsortedSortOrderID, Fields: []SortField{}}

A default Sort Order indicating no sort order at all

Functions

func ApplyNameMapping

func ApplyNameMapping(schemaWithoutIDs *iceberg.Schema, nameMapping NameMapping) (*iceberg.Schema, error)

func ArrowSchemaToIceberg

func ArrowSchemaToIceberg(sc *arrow.Schema, downcastNsTimestamp bool, nameMapping NameMapping) (*iceberg.Schema, error)

func ArrowTypeToIceberg

func ArrowTypeToIceberg(dt arrow.DataType, downcastNsTimestamp bool) (iceberg.Type, error)

func NewRemoveSnapshotRefUpdate

func NewRemoveSnapshotRefUpdate(ref string) *removeSnapshotRefUpdate

NewRemoveSnapshotRefUpdate creates a new update that removes a snapshot reference from the table metadata.

func NewSetPropertiesUpdate

func NewSetPropertiesUpdate(updates iceberg.Properties) *setPropertiesUpdate

NewSetPropertiesUpdate creates a new update that sets the given properties in the table metadata.

func SchemaToArrowSchema

func SchemaToArrowSchema(sc *iceberg.Schema, metadata map[string]string, includeFieldIDs, useLargeTypes bool) (*arrow.Schema, error)

SchemaToArrowSchema converts an Iceberg schema to an Arrow schema. If the metadata parameter is non-nil, it will be included as the top-level metadata in the schema. If includeFieldIDs is true, then each field of the schema will contain a metadata key PARQUET:field_id set to the field id from the iceberg schema.

func ToRequestedSchema

func ToRequestedSchema(ctx context.Context, requested, fileSchema *iceberg.Schema, batch arrow.Record, downcastTimestamp, includeFieldIDs, useLargeTypes bool) (arrow.Record, error)

ToRequestedSchema will construct a new record batch matching the requested iceberg schema casting columns if necessary as appropriate.

func TypeToArrowType

func TypeToArrowType(t iceberg.Type, includeFieldIDs bool, useLargeTypes bool) (arrow.DataType, error)

TypeToArrowType converts a given iceberg type, into the equivalent Arrow data type. For dealing with nested fields (List, Struct, Map) if includeFieldIDs is true, then the child fields will contain a metadata key PARQUET:field_id set to the field id.

func VisitArrowSchema

func VisitArrowSchema[T any](sc *arrow.Schema, visitor ArrowSchemaVisitor[T]) (res T, err error)

func VisitMappedFields

func VisitMappedFields[S, T any](fields []MappedField, visitor NameMappingVisitor[S, T]) (res S, err error)

func VisitNameMapping

func VisitNameMapping[S, T any](obj NameMapping, visitor NameMappingVisitor[S, T]) (res S, err error)

func WithMaxRefAgeMs

func WithMaxRefAgeMs(maxRefAgeMs int64) setSnapshotRefOption

func WithMaxSnapshotAgeMs

func WithMaxSnapshotAgeMs(maxSnapshotAgeMs int64) setSnapshotRefOption

func WithMinSnapshotsToKeep

func WithMinSnapshotsToKeep(minSnapshotsToKeep int) setSnapshotRefOption

Types

type ArrowSchemaVisitor

type ArrowSchemaVisitor[T any] interface {
	Schema(*arrow.Schema, T) T
	Struct(*arrow.StructType, []T) T
	Field(arrow.Field, T) T
	List(arrow.ListLikeType, T) T
	Map(mt *arrow.MapType, keyResult T, valueResult T) T
	Primitive(arrow.DataType) T
}

ArrowSchemaVisitor is an interface that can be implemented and used to call VisitArrowSchema for iterating

type CatalogIO

type CatalogIO interface {
	LoadTable(context.Context, Identifier, iceberg.Properties) (*Table, error)
	CommitTable(context.Context, *Table, []Requirement, []Update) (Metadata, string, error)
}

type FileScanTask

type FileScanTask struct {
	File          iceberg.DataFile
	DeleteFiles   []iceberg.DataFile
	Start, Length int64
}

type Identifier

type Identifier = []string

type LocationProvider

type LocationProvider interface {
	NewTableMetadataFileLocation(newVersion int) (string, error)
	NewMetadataLocation(metadataFileName string) string
}

func LoadLocationProvider

func LoadLocationProvider(tableLocation string, tableProps iceberg.Properties) (LocationProvider, error)

type MappedField

type MappedField struct {
	Names []string `json:"names"`
	// iceberg spec says this is optional, but I don't see any examples
	// of this being left empty. Does pyiceberg need to be updated or should
	// the spec not say field-id is optional?
	FieldID *int          `json:"field-id,omitempty"`
	Fields  []MappedField `json:"fields,omitempty"`
}

func (*MappedField) Len

func (m *MappedField) Len() int

func (*MappedField) String

func (m *MappedField) String() string

type Metadata

type Metadata interface {
	// Version indicates the version of this metadata, 1 for V1, 2 for V2, etc.
	Version() int
	// TableUUID returns a UUID that identifies the table, generated when the
	// table is created. Implementations must throw an exception if a table's
	// UUID does not match the expected UUID after refreshing metadata.
	TableUUID() uuid.UUID
	// Location is the table's base location. This is used by writers to determine
	// where to store data files, manifest files, and table metadata files.
	Location() string
	// LastUpdatedMillis is the timestamp in milliseconds from the unix epoch when
	// the table was last updated. Each table metadata file should update this
	// field just before writing.
	LastUpdatedMillis() int64
	// LastColumnID returns the highest assigned column ID for the table.
	// This is used to ensure fields are always assigned an unused ID when
	// evolving schemas.
	LastColumnID() int
	// Schemas returns the list of schemas, stored as objects with their
	// schema-id.
	Schemas() []*iceberg.Schema
	// CurrentSchema returns the table's current schema.
	CurrentSchema() *iceberg.Schema
	// PartitionSpecs returns the list of all partition specs in the table.
	PartitionSpecs() []iceberg.PartitionSpec
	// PartitionSpec returns the current partition spec that the table is using.
	PartitionSpec() iceberg.PartitionSpec
	// DefaultPartitionSpec is the ID of the current spec that writers should
	// use by default.
	DefaultPartitionSpec() int
	// LastPartitionSpecID is the highest assigned partition field ID across
	// all partition specs for the table. This is used to ensure partition
	// fields are always assigned an unused ID when evolving specs.
	LastPartitionSpecID() *int
	// Snapshots returns the list of valid snapshots. Valid snapshots are
	// snapshots for which all data files exist in the file system. A data
	// file must not be deleted from the file system until the last snapshot
	// in which it was listed is garbage collected.
	Snapshots() []Snapshot
	// SnapshotByID find and return a specific snapshot by its ID. Returns
	// nil if the ID is not found in the list of snapshots.
	SnapshotByID(int64) *Snapshot
	// SnapshotByName searches the list of snapshots for a snapshot with a given
	// ref name. Returns nil if there's no ref with this name for a snapshot.
	SnapshotByName(name string) *Snapshot
	// CurrentSnapshot returns the table's current snapshot.
	CurrentSnapshot() *Snapshot
	// Ref returns the snapshot ref for the main branch.
	Ref() SnapshotRef
	// Refs returns a list of snapshot name/reference pairs.
	Refs() iter.Seq2[string, SnapshotRef]
	// SnapshotLogs returns the list of snapshot logs for the table.
	SnapshotLogs() iter.Seq[SnapshotLogEntry]
	// SortOrder returns the table's current sort order, ie: the one with the
	// ID that matches the default-sort-order-id.
	SortOrder() SortOrder
	// SortOrders returns the list of sort orders in the table.
	SortOrders() []SortOrder
	// DefaultSortOrder returns the ID of the current sort order that writers
	// should use by default.
	DefaultSortOrder() int
	// Properties is a string to string map of table properties. This is used
	// to control settings that affect reading and writing and is not intended
	// to be used for arbitrary metadata. For example, commit.retry.num-retries
	// is used to control the number of commit retries.
	Properties() iceberg.Properties
	// PreviousFiles returns the list of metadata log entries for the table.
	PreviousFiles() iter.Seq[MetadataLogEntry]

	Equals(Metadata) bool
}

Metadata for an iceberg table as specified in the Iceberg spec

https://iceberg.apache.org/spec/#iceberg-table-spec

func NewMetadata

func NewMetadata(sc *iceberg.Schema, partitions *iceberg.PartitionSpec, sortOrder SortOrder, location string, props iceberg.Properties) (Metadata, error)

NewMetadata creates a new table metadata object using the provided schema, information, generating a fresh UUID for the new table metadata. By default, this will generate a V2 table metadata, but this can be modified by adding a "format-version" property to the props map. An error will be returned if the "format-version" property exists and is not a valid version number.

func NewMetadataWithUUID

func NewMetadataWithUUID(sc *iceberg.Schema, partitions *iceberg.PartitionSpec, sortOrder SortOrder, location string, props iceberg.Properties, tableUuid uuid.UUID) (Metadata, error)

NewMetadataWithUUID is like NewMetadata, but allows the caller to specify the UUID of the table rather than creating a new one.

func ParseMetadata

func ParseMetadata(r io.Reader) (Metadata, error)

ParseMetadata parses json metadata provided by the passed in reader, returning an error if one is encountered.

func ParseMetadataBytes

func ParseMetadataBytes(b []byte) (Metadata, error)

ParseMetadataBytes is like ParseMetadataString but for a byte slice.

func ParseMetadataString

func ParseMetadataString(s string) (Metadata, error)

ParseMetadataString is like ParseMetadata, but for a string rather than an io.Reader.

type MetadataBuilder

type MetadataBuilder struct {
	// contains filtered or unexported fields
}

func MetadataBuilderFromBase

func MetadataBuilderFromBase(metadata Metadata) (*MetadataBuilder, error)

func NewMetadataBuilder

func NewMetadataBuilder() (*MetadataBuilder, error)

func (*MetadataBuilder) AddPartitionSpec

func (b *MetadataBuilder) AddPartitionSpec(spec *iceberg.PartitionSpec, initial bool) (*MetadataBuilder, error)

func (*MetadataBuilder) AddSchema

func (b *MetadataBuilder) AddSchema(schema *iceberg.Schema, newLastColumnID int, initial bool) (*MetadataBuilder, error)

func (*MetadataBuilder) AddSnapshot

func (b *MetadataBuilder) AddSnapshot(snapshot *Snapshot) (*MetadataBuilder, error)

func (*MetadataBuilder) AddSortOrder

func (b *MetadataBuilder) AddSortOrder(sortOrder *SortOrder, initial bool) (*MetadataBuilder, error)

func (*MetadataBuilder) Build

func (b *MetadataBuilder) Build() (Metadata, error)

func (*MetadataBuilder) GetSchemaByID

func (b *MetadataBuilder) GetSchemaByID(id int) (*iceberg.Schema, error)

func (*MetadataBuilder) GetSortOrderByID

func (b *MetadataBuilder) GetSortOrderByID(id int) (*SortOrder, error)

func (*MetadataBuilder) GetSpecByID

func (b *MetadataBuilder) GetSpecByID(id int) (*iceberg.PartitionSpec, error)

func (*MetadataBuilder) RemoveProperties

func (b *MetadataBuilder) RemoveProperties(keys []string) (*MetadataBuilder, error)

func (*MetadataBuilder) SetCurrentSchemaID

func (b *MetadataBuilder) SetCurrentSchemaID(currentSchemaID int) (*MetadataBuilder, error)

func (*MetadataBuilder) SetDefaultSortOrderID

func (b *MetadataBuilder) SetDefaultSortOrderID(defaultSortOrderID int) (*MetadataBuilder, error)

func (*MetadataBuilder) SetDefaultSpecID

func (b *MetadataBuilder) SetDefaultSpecID(defaultSpecID int) (*MetadataBuilder, error)

func (*MetadataBuilder) SetFormatVersion

func (b *MetadataBuilder) SetFormatVersion(formatVersion int) (*MetadataBuilder, error)

func (*MetadataBuilder) SetLoc

func (b *MetadataBuilder) SetLoc(loc string) (*MetadataBuilder, error)

func (*MetadataBuilder) SetProperties

func (b *MetadataBuilder) SetProperties(props iceberg.Properties) (*MetadataBuilder, error)

func (*MetadataBuilder) SetSnapshotRef

func (b *MetadataBuilder) SetSnapshotRef(
	name string,
	snapshotID int64,
	refType RefType,
	options ...setSnapshotRefOption,
) (*MetadataBuilder, error)

func (*MetadataBuilder) SetUUID

func (b *MetadataBuilder) SetUUID(uuid uuid.UUID) (*MetadataBuilder, error)

func (*MetadataBuilder) SnapshotByID

func (b *MetadataBuilder) SnapshotByID(id int64) (*Snapshot, error)

type MetadataLogEntry

type MetadataLogEntry struct {
	MetadataFile string `json:"metadata-file"`
	TimestampMs  int64  `json:"timestamp-ms"`
}

type NameMapping

type NameMapping []MappedField

func (NameMapping) String

func (nm NameMapping) String() string

type NameMappingAccessor

type NameMappingAccessor struct{}

func (NameMappingAccessor) FieldPartner

func (n NameMappingAccessor) FieldPartner(partnerStruct *MappedField, _ int, fieldName string) *MappedField

func (NameMappingAccessor) ListElementPartner

func (n NameMappingAccessor) ListElementPartner(partnerList *MappedField) *MappedField

func (NameMappingAccessor) MapKeyPartner

func (n NameMappingAccessor) MapKeyPartner(partnerMap *MappedField) *MappedField

func (NameMappingAccessor) MapValuePartner

func (n NameMappingAccessor) MapValuePartner(partnerMap *MappedField) *MappedField

func (NameMappingAccessor) SchemaPartner

func (NameMappingAccessor) SchemaPartner(partner *MappedField) *MappedField

type NameMappingVisitor

type NameMappingVisitor[S, T any] interface {
	Mapping(nm NameMapping, fieldResults S) S
	Fields(st []MappedField, fieldResults []T) S
	Field(field MappedField, fieldResult S) T
}

type NullOrder

type NullOrder string
const (
	NullsFirst NullOrder = "nulls-first"
	NullsLast  NullOrder = "nulls-last"
)

type Operation

type Operation string
const (
	OpAppend    Operation = "append"
	OpReplace   Operation = "replace"
	OpOverwrite Operation = "overwrite"
	OpDelete    Operation = "delete"
)

func ValidOperation

func ValidOperation(s string) (Operation, error)

ValidOperation ensures that a given string is one of the valid operation types: append,replace,overwrite,delete

type RefType

type RefType string

RefType will be either a BranchRef or a TagRef

const (
	BranchRef RefType = "branch"
	TagRef    RefType = "tag"
)

type Requirement

type Requirement interface {
	// Validate checks that the current table metadata satisfies the requirement.
	Validate(Metadata) error
	GetType() string
}

A Requirement is a validation rule that must be satisfied before attempting to make and commit changes to a table. Requirements are used to ensure that the table is in a valid state before making changes.

func AssertCreate

func AssertCreate() Requirement

AssertCreate creates a requirement that the table does not already exist.

func AssertCurrentSchemaID

func AssertCurrentSchemaID(id int) Requirement

AssertCurrentSchemaId creates a requirement that the table's current schema ID matches the given id.

func AssertDefaultSortOrderID

func AssertDefaultSortOrderID(id int) Requirement

AssertDefaultSortOrderID creates a requirement that the table's default sort order ID matches the given id.

func AssertDefaultSpecID

func AssertDefaultSpecID(id int) Requirement

AssertDefaultSpecID creates a requirement that the table's default partition spec ID matches the given id.

func AssertLastAssignedFieldID

func AssertLastAssignedFieldID(id int) Requirement

AssertLastAssignedFieldID validates that the table's last assigned column ID matches the given id.

func AssertLastAssignedPartitionID

func AssertLastAssignedPartitionID(id int) Requirement

AssertLastAssignedPartitionID creates a requriement that the table's last assigned partition ID matches the given id.

func AssertRefSnapshotID

func AssertRefSnapshotID(ref string, id *int64) Requirement

AssertRefSnapshotID creates a requirement which ensures that the table branch or tag identified by the given ref must reference the given snapshot id. If the id is nil, the ref must not already exist.

func AssertTableUUID

func AssertTableUUID(uuid uuid.UUID) Requirement

AssertTableUUID creates a requirement that the table UUID matches the given UUID.

type Scan

type Scan struct {
	// contains filtered or unexported fields
}

func (*Scan) PlanFiles

func (scan *Scan) PlanFiles(ctx context.Context) ([]FileScanTask, error)

PlanFiles orchestrates the fetching and filtering of manifests, and then building a list of FileScanTasks that match the current Scan criteria.

func (*Scan) Projection

func (scan *Scan) Projection() (*iceberg.Schema, error)

func (*Scan) Snapshot

func (scan *Scan) Snapshot() *Snapshot

func (*Scan) ToArrowRecords

func (scan *Scan) ToArrowRecords(ctx context.Context) (*arrow.Schema, iter.Seq2[arrow.Record, error], error)

ToArrowRecords returns the arrow schema of the expected records and an interator that can be used with a range expression to read the records as they are available. If an error is encountered, during the planning and setup then this will return the error directly. If the error occurs while iterating the records, it will be returned by the iterator.

The purpose for returning the schema up front is to handle the case where there are no rows returned. The resulting Arrow Schema of the projection will still be known.

func (*Scan) ToArrowTable

func (scan *Scan) ToArrowTable(ctx context.Context) (arrow.Table, error)

ToArrowTable calls ToArrowRecords and then gathers all of the records together and returns an arrow.Table make from those records.

func (*Scan) UseRef

func (scan *Scan) UseRef(name string) (*Scan, error)

func (*Scan) UseRowLimit

func (scan *Scan) UseRowLimit(n int64) *Scan

type ScanOption

type ScanOption func(*Scan)

func WitMaxConcurrency

func WitMaxConcurrency(n int) ScanOption

WitMaxConcurrency sets the maximum concurrency for table scan and plan operations. When unset it defaults to runtime.GOMAXPROCS.

func WithCaseSensitive

func WithCaseSensitive(b bool) ScanOption

func WithLimit

func WithLimit(n int64) ScanOption

func WithOptions

func WithOptions(opts iceberg.Properties) ScanOption

func WithRowFilter

func WithRowFilter(e iceberg.BooleanExpression) ScanOption

func WithSelectedFields

func WithSelectedFields(fields ...string) ScanOption

func WithSnapshotID

func WithSnapshotID(n int64) ScanOption

type Snapshot

type Snapshot struct {
	SnapshotID       int64    `json:"snapshot-id"`
	ParentSnapshotID *int64   `json:"parent-snapshot-id,omitempty"`
	SequenceNumber   int64    `json:"sequence-number"`
	TimestampMs      int64    `json:"timestamp-ms"`
	ManifestList     string   `json:"manifest-list,omitempty"`
	Summary          *Summary `json:"summary,omitempty"`
	SchemaID         *int     `json:"schema-id,omitempty"`
}

func (Snapshot) Equals

func (s Snapshot) Equals(other Snapshot) bool

func (Snapshot) Manifests

func (s Snapshot) Manifests(fio iceio.IO) ([]iceberg.ManifestFile, error)

func (Snapshot) String

func (s Snapshot) String() string

type SnapshotLogEntry

type SnapshotLogEntry struct {
	SnapshotID  int64 `json:"snapshot-id"`
	TimestampMs int64 `json:"timestamp-ms"`
}

type SnapshotRef

type SnapshotRef struct {
	SnapshotID         int64   `json:"snapshot-id"`
	SnapshotRefType    RefType `json:"type"`
	MinSnapshotsToKeep *int    `json:"min-snapshots-to-keep,omitempty"`
	MaxSnapshotAgeMs   *int64  `json:"max-snapshot-age-ms,omitempty"`
	MaxRefAgeMs        *int64  `json:"max-ref-age-ms,omitempty"`
}

SnapshotRef represents the reference information for a specific snapshot

func (*SnapshotRef) Equals

func (s *SnapshotRef) Equals(rhs SnapshotRef) bool

func (*SnapshotRef) UnmarshalJSON

func (s *SnapshotRef) UnmarshalJSON(b []byte) error

type SnapshotSummaryCollector

type SnapshotSummaryCollector struct {
	// contains filtered or unexported fields
}

type SortDirection

type SortDirection string
const (
	SortASC  SortDirection = "asc"
	SortDESC SortDirection = "desc"
)

type SortField

type SortField struct {
	// SourceID is the source column id from the table's schema
	SourceID int `json:"source-id"`
	// Transform is the tranformation used to produce values to be
	// sorted on from the source column.
	Transform iceberg.Transform `json:"transform"`
	// Direction is an enum indicating ascending or descending direction.
	Direction SortDirection `json:"direction"`
	// NullOrder describes the order of null values when sorting
	// should be only either nulls-first or nulls-last enum values.
	NullOrder NullOrder `json:"null-order"`
}

SortField describes a field used in a sort order definition.

func (*SortField) MarshalJSON

func (s *SortField) MarshalJSON() ([]byte, error)

func (*SortField) String

func (s *SortField) String() string

func (*SortField) UnmarshalJSON

func (s *SortField) UnmarshalJSON(b []byte) error

type SortOrder

type SortOrder struct {
	OrderID int         `json:"order-id"`
	Fields  []SortField `json:"fields"`
}

SortOrder describes how the data is sorted within the table.

Data can be sorted within partitions by columns to gain performance. The order of the sort fields within the list defines the order in which the sort is applied to the data.

func AssignFreshSortOrderIDs

func AssignFreshSortOrderIDs(sortOrder SortOrder, old, fresh *iceberg.Schema) (SortOrder, error)

AssignFreshSortOrderIDs updates and reassigns the field source IDs from the old schema to the corresponding fields in the fresh schema, while also giving the Sort Order a fresh ID of 0 (the initial Sort Order ID).

func AssignFreshSortOrderIDsWithID

func AssignFreshSortOrderIDsWithID(sortOrder SortOrder, old, fresh *iceberg.Schema, sortOrderID int) (SortOrder, error)

AssignFreshSortOrderIDsWithID is like AssignFreshSortOrderIDs but allows specifying the id of the returned SortOrder.

func (SortOrder) Equals

func (s SortOrder) Equals(rhs SortOrder) bool

func (SortOrder) String

func (s SortOrder) String() string

func (*SortOrder) UnmarshalJSON

func (s *SortOrder) UnmarshalJSON(b []byte) error

type Summary

type Summary struct {
	Operation  Operation
	Properties iceberg.Properties
}

Summary stores the summary information for a snapshot indicating the operation that created the snapshot, and various properties which might exist in the summary.

func (*Summary) Equals

func (s *Summary) Equals(other *Summary) bool

func (*Summary) MarshalJSON

func (s *Summary) MarshalJSON() ([]byte, error)

func (*Summary) String

func (s *Summary) String() string

func (*Summary) UnmarshalJSON

func (s *Summary) UnmarshalJSON(b []byte) (err error)

type Table

type Table struct {
	// contains filtered or unexported fields
}

func New

func New(ident Identifier, meta Metadata, location string, fs io.IO, cat CatalogIO) *Table

func NewFromLocation

func NewFromLocation(ident Identifier, metalocation string, fsys io.IO, cat CatalogIO) (*Table, error)

func (Table) CurrentSnapshot

func (t Table) CurrentSnapshot() *Snapshot

func (Table) Equals

func (t Table) Equals(other Table) bool

func (Table) FS

func (t Table) FS() io.IO

func (Table) Identifier

func (t Table) Identifier() Identifier

func (Table) Location

func (t Table) Location() string

func (Table) LocationProvider

func (t Table) LocationProvider() (LocationProvider, error)

func (Table) Metadata

func (t Table) Metadata() Metadata

func (Table) MetadataLocation

func (t Table) MetadataLocation() string

func (Table) Properties

func (t Table) Properties() iceberg.Properties

func (Table) Scan

func (t Table) Scan(opts ...ScanOption) *Scan

func (Table) Schema

func (t Table) Schema() *iceberg.Schema

func (Table) Schemas

func (t Table) Schemas() map[int]*iceberg.Schema

func (Table) SnapshotByID

func (t Table) SnapshotByID(id int64) *Snapshot

func (Table) SnapshotByName

func (t Table) SnapshotByName(name string) *Snapshot

func (Table) SortOrder

func (t Table) SortOrder() SortOrder

func (Table) Spec

func (t Table) Spec() iceberg.PartitionSpec

type Update

type Update interface {
	// Action returns the name of the action that the update represents.
	Action() string
	// Apply applies the update to the given metadata builder.
	Apply(*MetadataBuilder) error
}

Update represents a change to a table's metadata.

func NewAddPartitionSpecUpdate

func NewAddPartitionSpecUpdate(spec *iceberg.PartitionSpec, initial bool) Update

NewAddPartitionSpecUpdate creates a new update that adds the given partition spec to the table metadata. If the initial flag is set to true, the spec is considered the initial spec of the table, and all other previously added specs in the metadata builder are removed.

func NewAddSchemaUpdate

func NewAddSchemaUpdate(schema *iceberg.Schema, lastColumnID int, initial bool) Update

NewAddSchemaUpdate creates a new update that adds the given schema and last column ID to the table metadata. If the initial flag is set to true, the schema is considered the initial schema of the table, and all previously added schemas in the metadata builder are removed.

func NewAddSnapshotUpdate

func NewAddSnapshotUpdate(snapshot *Snapshot) Update

NewAddSnapshotUpdate creates a new update that adds the given snapshot to the table metadata.

func NewAddSortOrderUpdate

func NewAddSortOrderUpdate(sortOrder *SortOrder, initial bool) Update

NewAddSortOrderUpdate creates a new update that adds the given sort order to the table metadata. If the initial flag is set to true, the sort order is considered the initial sort order of the table, and all previously added sort orders in the metadata builder are removed.

func NewAssignUUIDUpdate

func NewAssignUUIDUpdate(uuid uuid.UUID) Update

NewAssignUUIDUpdate creates a new update to assign a UUID to the table metadata.

func NewRemovePropertiesUpdate

func NewRemovePropertiesUpdate(removals []string) Update

NewRemovePropertiesUpdate creates a new update that removes properties from the table metadata. The properties are identified by their names, and if a property with the given name does not exist, it is ignored.

func NewRemoveSnapshotsUpdate

func NewRemoveSnapshotsUpdate(ids []int64) Update

NewRemoveSnapshotsUpdate creates a new update that removes all snapshots from the table metadata with the given snapshot IDs.

func NewSetCurrentSchemaUpdate

func NewSetCurrentSchemaUpdate(id int) Update

NewSetCurrentSchemaUpdate creates a new update that sets the current schema of the table metadata to the given schema ID.

func NewSetDefaultSortOrderUpdate

func NewSetDefaultSortOrderUpdate(id int) Update

NewSetDefaultSortOrderUpdate creates a new update that sets the default sort order of the table metadata to the given sort order ID.

func NewSetDefaultSpecUpdate

func NewSetDefaultSpecUpdate(id int) Update

NewSetDefaultSpecUpdate creates a new update that sets the default partition spec of the table metadata to the given spec ID.

func NewSetLocationUpdate

func NewSetLocationUpdate(loc string) Update

NewSetLocationUpdate creates a new update that sets the location of the table metadata.

func NewSetSnapshotRefUpdate

func NewSetSnapshotRefUpdate(
	name string,
	snapshotID int64,
	refType RefType,
	maxRefAgeMs, maxSnapshotAgeMs int64,
	minSnapshotsToKeep int,
) Update

NewSetSnapshotRefUpdate creates a new update that sets the given snapshot reference as the current snapshot of the table metadata. MaxRefAgeMs, MaxSnapshotAgeMs, and MinSnapshotsToKeep are optional, and any non-positive values are ignored.

func NewUpgradeFormatVersionUpdate

func NewUpgradeFormatVersionUpdate(formatVersion int) Update

NewUpgradeFormatVersionUpdate creates a new update that upgrades the format version of the table metadata to the given formatVersion.

Directories

Path Synopsis

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL