Documentation
¶
Index ¶
- func NewMemoryStorage() core.Storage
- func NewRedisStorage(client *redis.Client, prefix string) core.Storage
- type Config
- type Crawler
- type MemoryStorage
- func (ms *MemoryStorage) BatchGet(ctx context.Context, keys []string) (map[string]interface{}, error)
- func (ms *MemoryStorage) BatchStore(ctx context.Context, items map[string]interface{}) error
- func (ms *MemoryStorage) Clear(ctx context.Context) error
- func (ms *MemoryStorage) Close() error
- func (ms *MemoryStorage) Count(ctx context.Context) (int64, error)
- func (ms *MemoryStorage) Delete(ctx context.Context, key string) error
- func (ms *MemoryStorage) Exists(ctx context.Context, key string) (bool, error)
- func (ms *MemoryStorage) Get(ctx context.Context, key string) (interface{}, error)
- func (ms *MemoryStorage) Query(ctx context.Context, collection string, filter map[string]interface{}) ([]map[string]interface{}, error)
- func (ms *MemoryStorage) Save(ctx context.Context, collection string, data []map[string]interface{}) error
- func (ms *MemoryStorage) Store(ctx context.Context, key string, value interface{}) error
- func (ms *MemoryStorage) StoreWithTTL(ctx context.Context, key string, value interface{}, ttl time.Duration) error
- type Metrics
- type Mode
- type RedisStorage
- func (rs *RedisStorage) BatchGet(ctx context.Context, keys []string) (map[string]interface{}, error)
- func (rs *RedisStorage) BatchStore(ctx context.Context, items map[string]interface{}) error
- func (rs *RedisStorage) Clear(ctx context.Context) error
- func (rs *RedisStorage) Close() error
- func (rs *RedisStorage) Count(ctx context.Context) (int64, error)
- func (rs *RedisStorage) Delete(ctx context.Context, key string) error
- func (rs *RedisStorage) Exists(ctx context.Context, key string) (bool, error)
- func (rs *RedisStorage) Get(ctx context.Context, key string) (interface{}, error)
- func (rs *RedisStorage) Query(ctx context.Context, collection string, filter map[string]interface{}) ([]map[string]interface{}, error)
- func (rs *RedisStorage) QueryGeneric(ctx context.Context, query interface{}) ([]interface{}, error)
- func (rs *RedisStorage) Save(ctx context.Context, collection string, data []map[string]interface{}) error
- func (rs *RedisStorage) Store(ctx context.Context, key string, value interface{}) error
- func (rs *RedisStorage) StoreWithTTL(ctx context.Context, key string, value interface{}, ttl time.Duration) error
- type Status
- type StorageConfig
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func NewMemoryStorage ¶
NewMemoryStorage creates a new memory storage
Types ¶
type Config ¶
type Config struct {
// Basic
ID string `json:"id"`
Name string `json:"name"`
Version string `json:"version"`
Mode Mode `json:"mode"` // standalone, distributed, cluster
Tags []string `json:"tags"`
Labels map[string]string `json:"labels"`
// Components
Fetcher *fetcher.Config `json:"fetcher"`
Pipeline *pipeline.Config `json:"pipeline"`
Storage *StorageConfig `json:"storage"`
TaskQueue string `json:"task_queue"` // "redis" or "memory"
Dispatcher *task.DispatcherConfig `json:"dispatcher"`
Scheduler *task.SchedulerConfig `json:"scheduler"`
Node *distributed.NodeConfig `json:"node"`
// Runtime
Workers int `json:"workers"`
MaxConcurrent int `json:"max_concurrent"`
MaxDepth int `json:"max_depth"`
MaxRetries int `json:"max_retries"`
Timeout time.Duration `json:"timeout"`
// Redis
RedisAddr string `json:"redis_addr"`
RedisDB int `json:"redis_db"`
RedisPrefix string `json:"redis_prefix"`
// Features
EnableScheduler bool `json:"enable_scheduler"`
EnableDistributed bool `json:"enable_distributed"`
EnableMetrics bool `json:"enable_metrics"`
EnableProfiling bool `json:"enable_profiling"`
// Logging
LogLevel string `json:"log_level"`
LogFile string `json:"log_file"`
}
Config contains crawler configuration
type Crawler ¶
type Crawler struct {
// Identity
ID string `json:"id"`
Name string `json:"name"`
Version string `json:"version"`
Tags []string `json:"tags"`
Labels map[string]string `json:"labels"`
// contains filtered or unexported fields
}
Crawler represents the main crawler system
func (*Crawler) GetMetrics ¶
GetMetrics returns crawler metrics
type MemoryStorage ¶
type MemoryStorage struct {
// contains filtered or unexported fields
}
MemoryStorage implements in-memory storage
func (*MemoryStorage) BatchGet ¶
func (ms *MemoryStorage) BatchGet(ctx context.Context, keys []string) (map[string]interface{}, error)
BatchGet retrieves multiple items
func (*MemoryStorage) BatchStore ¶
func (ms *MemoryStorage) BatchStore(ctx context.Context, items map[string]interface{}) error
BatchStore stores multiple items
func (*MemoryStorage) Clear ¶
func (ms *MemoryStorage) Clear(ctx context.Context) error
Clear removes all data
func (*MemoryStorage) Close ¶
func (ms *MemoryStorage) Close() error
Close implements core.Storage interface
func (*MemoryStorage) Count ¶
func (ms *MemoryStorage) Count(ctx context.Context) (int64, error)
Count returns the number of stored items
func (*MemoryStorage) Delete ¶
func (ms *MemoryStorage) Delete(ctx context.Context, key string) error
Delete removes data by key
func (*MemoryStorage) Get ¶
func (ms *MemoryStorage) Get(ctx context.Context, key string) (interface{}, error)
Get retrieves data by key
func (*MemoryStorage) Query ¶
func (ms *MemoryStorage) Query(ctx context.Context, collection string, filter map[string]interface{}) ([]map[string]interface{}, error)
Query implements core.Storage interface
func (*MemoryStorage) Save ¶
func (ms *MemoryStorage) Save(ctx context.Context, collection string, data []map[string]interface{}) error
Save implements core.Storage interface
func (*MemoryStorage) Store ¶
func (ms *MemoryStorage) Store(ctx context.Context, key string, value interface{}) error
Store stores data with optional TTL
func (*MemoryStorage) StoreWithTTL ¶
func (ms *MemoryStorage) StoreWithTTL(ctx context.Context, key string, value interface{}, ttl time.Duration) error
StoreWithTTL stores data with specific TTL
type Metrics ¶
type Metrics struct {
// Counters
RequestsTotal int64 `json:"requests_total"`
RequestsSuccess int64 `json:"requests_success"`
RequestsFailed int64 `json:"requests_failed"`
BytesDownloaded int64 `json:"bytes_downloaded"`
ItemsExtracted int64 `json:"items_extracted"`
ItemsProcessed int64 `json:"items_processed"`
ItemsStored int64 `json:"items_stored"`
TasksCreated int64 `json:"tasks_created"`
TasksCompleted int64 `json:"tasks_completed"`
TasksFailed int64 `json:"tasks_failed"`
// Gauges
QueueSize int64 `json:"queue_size"`
ActiveWorkers int64 `json:"active_workers"`
AvgResponseTime time.Duration `json:"avg_response_time"`
AvgProcessTime time.Duration `json:"avg_process_time"`
// Rates
RequestRate float64 `json:"request_rate"`
SuccessRate float64 `json:"success_rate"`
ErrorRate float64 `json:"error_rate"`
// Timing
StartTime time.Time `json:"start_time"`
LastRequestTime time.Time `json:"last_request_time"`
TotalRuntime time.Duration `json:"total_runtime"`
}
Metrics contains crawler metrics
type RedisStorage ¶
type RedisStorage struct {
// contains filtered or unexported fields
}
RedisStorage implements Redis-based storage
func (*RedisStorage) BatchGet ¶
func (rs *RedisStorage) BatchGet(ctx context.Context, keys []string) (map[string]interface{}, error)
BatchGet retrieves multiple items
func (*RedisStorage) BatchStore ¶
func (rs *RedisStorage) BatchStore(ctx context.Context, items map[string]interface{}) error
BatchStore stores multiple items
func (*RedisStorage) Clear ¶
func (rs *RedisStorage) Clear(ctx context.Context) error
Clear removes all data with the prefix
func (*RedisStorage) Close ¶
func (rs *RedisStorage) Close() error
Close implements core.Storage interface
func (*RedisStorage) Count ¶
func (rs *RedisStorage) Count(ctx context.Context) (int64, error)
Count returns the number of stored items
func (*RedisStorage) Delete ¶
func (rs *RedisStorage) Delete(ctx context.Context, key string) error
Delete removes data by key
func (*RedisStorage) Get ¶
func (rs *RedisStorage) Get(ctx context.Context, key string) (interface{}, error)
Get retrieves data by key
func (*RedisStorage) Query ¶
func (rs *RedisStorage) Query(ctx context.Context, collection string, filter map[string]interface{}) ([]map[string]interface{}, error)
Query implements core.Storage interface
func (*RedisStorage) QueryGeneric ¶
func (rs *RedisStorage) QueryGeneric(ctx context.Context, query interface{}) ([]interface{}, error)
QueryGeneric performs a generic query using Redis patterns
func (*RedisStorage) Save ¶
func (rs *RedisStorage) Save(ctx context.Context, collection string, data []map[string]interface{}) error
Save implements core.Storage interface
func (*RedisStorage) Store ¶
func (rs *RedisStorage) Store(ctx context.Context, key string, value interface{}) error
Store stores data with default TTL
func (*RedisStorage) StoreWithTTL ¶
func (rs *RedisStorage) StoreWithTTL(ctx context.Context, key string, value interface{}, ttl time.Duration) error
StoreWithTTL stores data with specific TTL