fetch

package
v0.0.11 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 31, 2025 License: Apache-2.0 Imports: 13 Imported by: 0

Documentation

Index

Constants

View Source
const (
	DefaultMaxBodySize = 10 * 1024 * 1024 // 10 MB
	DefaultTimeout     = 30 * time.Second
)
View Source
const FakeUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0"

FakeUserAgent may be used to mimic a real browser.

Variables

View Source
var (
	DefaultHTTPClient = &http.Client{Timeout: DefaultTimeout}
	DefaultHeaders    = map[string]string{}
)
View Source
var FakeHeaders = map[string]string{
	"Accept":                    "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
	"Accept-Language":           "en-US,en;q=0.5",
	"Connection":                "keep-alive",
	"Dnt":                       "1",
	"Sec-Fetch-Dest":            "document",
	"Sec-Fetch-Mode":            "navigate",
	"Sec-Fetch-Site":            "cross-site",
	"Upgrade-Insecure-Requests": "1",
	"User-Agent":                FakeUserAgent,
	"Priority":                  "u=0, i",
}

FakeHeaders may be used to mimic a real browser.

Functions

This section is empty.

Types

type Action

type Action struct {
	Action TypedAction
}

Action is used for JSON marshaling/unmarshaling of polymorphic actions

func NewPDFAction

func NewPDFAction(options PDFActionOptions) Action

NewPDFAction creates a new PDF action

func NewScreenshotAction

func NewScreenshotAction(options ScreenshotActionOptions) Action

NewScreenshotAction creates a new screenshot action

func NewWaitAction

func NewWaitAction(options WaitActionOptions) Action

NewWaitAction creates a new wait action

func (*Action) MarshalJSON

func (a *Action) MarshalJSON() ([]byte, error)

MarshalJSON implements custom marshaling for polymorphic actions

func (*Action) UnmarshalJSON

func (a *Action) UnmarshalJSON(data []byte) error

UnmarshalJSON implements custom unmarshaling for polymorphic actions

type BaseAction

type BaseAction struct {
	Type string `json:"type"`
}

BaseAction contains common fields for all actions

func (BaseAction) GetType

func (a BaseAction) GetType() string

type Client

type Client struct {
	// contains filtered or unexported fields
}

Client defines a client for fetching pages via a remote proxy.

func NewClient

func NewClient(options ClientOptions) *Client

NewClient creates a new client with the given options.

func (*Client) Fetch

func (c *Client) Fetch(ctx context.Context, request *Request) (*Response, error)

Fetch a page using a remote proxy.

func (*Client) SetHeader

func (c *Client) SetHeader(key, value string)

SetHeader sets a header for the client.

type ClientOptions

type ClientOptions struct {
	BaseURL   string            // Optional proxy base URL
	AuthToken string            // Optional authorization token
	Timeout   time.Duration     // Optional HTTP timeout
	Headers   map[string]string // Optional HTTP headers
}

ClientOptions defines the options for the client.

type Fetcher

type Fetcher interface {

	// Fetch a webpage and return the response.
	Fetch(ctx context.Context, request *Request) (*Response, error)
}

Fetcher defines an interface for fetching pages.

type HTTPFetcher

type HTTPFetcher struct {
	// contains filtered or unexported fields
}

HTTPFetcher implements the Fetcher interface using standard HTTP client.

func NewHTTPFetcher

func NewHTTPFetcher(options HTTPFetcherOptions) *HTTPFetcher

NewHTTPFetcher creates a new HTTP fetcher

func (*HTTPFetcher) Fetch

func (f *HTTPFetcher) Fetch(ctx context.Context, req *Request) (*Response, error)

Fetch implements the Fetcher interface for HTTP requests

type HTTPFetcherOptions

type HTTPFetcherOptions struct {
	Timeout     time.Duration
	Headers     map[string]string
	Client      *http.Client
	MaxBodySize int64
}

HTTPFetcherOptions defines the options for the HTTP fetcher.

type Link web.Link

Type aliases for convenience.

type Meta

type Meta web.Meta

Type aliases for convenience.

type Metadata

type Metadata web.Metadata

Type aliases for convenience.

type MockFetcher

type MockFetcher struct {
	mock.Mock
	// contains filtered or unexported fields
}

MockFetcher implements the Fetcher interface for testing

func NewMockFetcher

func NewMockFetcher() *MockFetcher

func (*MockFetcher) AddError

func (m *MockFetcher) AddError(url string, err error)

func (*MockFetcher) AddResponse

func (m *MockFetcher) AddResponse(url string, response *Response)

func (*MockFetcher) Fetch

func (m *MockFetcher) Fetch(ctx context.Context, req *Request) (*Response, error)

type PDFAction

type PDFAction struct {
	BaseAction
	Format string `json:"format,omitempty"` // A4, Letter, Legal, etc.
}

PDFAction generates a PDF of the page

type PDFActionOptions

type PDFActionOptions struct {
	Format string `json:"format,omitempty"` // A4, Letter, Legal, etc.
}

PDFActionOptions represents the options for a PDF action

type Request

type Request struct {
	URL             string            `json:"url"`
	OnlyMainContent bool              `json:"only_main_content,omitempty"`
	IncludeTags     []string          `json:"include_tags,omitempty"`
	ExcludeTags     []string          `json:"exclude_tags,omitempty"`
	MaxAge          int               `json:"max_age,omitempty"`  // milliseconds
	Timeout         int               `json:"timeout,omitempty"`  // milliseconds
	WaitFor         int               `json:"wait_for,omitempty"` // milliseconds
	Fetcher         string            `json:"fetcher,omitempty"`
	Mobile          bool              `json:"mobile,omitempty"`
	Prettify        bool              `json:"prettify,omitempty"`
	Formats         []string          `json:"formats,omitempty"`
	Actions         []Action          `json:"actions,omitempty"`
	Headers         map[string]string `json:"headers,omitempty"`
	StorageState    map[string]any    `json:"storage_state,omitempty"`
}

Request defines the JSON payload for fetch requests.

func ParseGetRequest

func ParseGetRequest(r *http.Request) (*Request, error)

ParseGetRequest parses a fetch.Request from a GET request and its query parameters.

func ParsePostRequest

func ParsePostRequest(r *http.Request) (*Request, error)

ParsePostRequest parses a fetch.Request from a POST request body.

type Response

type Response struct {
	URL          string            `json:"url"`
	StatusCode   int               `json:"status_code"`
	Headers      map[string]string `json:"headers"`
	HTML         string            `json:"html,omitempty"`
	Markdown     string            `json:"markdown,omitempty"`
	Screenshot   string            `json:"screenshot,omitempty"`
	PDF          string            `json:"pdf,omitempty"`
	Error        string            `json:"error,omitempty"`
	Metadata     Metadata          `json:"metadata,omitempty"`
	Links        []*Link           `json:"links,omitempty"`
	StorageState map[string]any    `json:"storage_state,omitempty"`
	Timestamp    time.Time         `json:"timestamp,omitzero"`
}

Response defines the JSON payload for fetch responses.

func ProcessRequest

func ProcessRequest(request *Request, html string) (*Response, error)

ProcessRequest applies request options to the given HTML content and builds the corresponding response. Applies any requested transformations. This is a reference implementation and may not be used in all cases.

type ScreenshotAction

type ScreenshotAction struct {
	BaseAction
	FullPage bool `json:"full_page,omitempty"`
}

ScreenshotAction triggers a screenshot of the page

type ScreenshotActionOptions

type ScreenshotActionOptions struct {
	FullPage bool `json:"full_page,omitempty"`
}

ScreenshotActionOptions represents the options for a screenshot action

type TypedAction

type TypedAction interface {
	GetType() string
}

TypedAction represents an action to be taken on a page

type WaitAction

type WaitAction struct {
	BaseAction
	Selector string `json:"selector,omitempty"` // Wait for element to appear
	Duration int    `json:"duration,omitempty"` // Wait for specific duration in milliseconds
}

WaitAction waits for a condition or time

type WaitActionOptions

type WaitActionOptions struct {
	Selector string `json:"selector,omitempty"` // Wait for element to appear
	Duration int    `json:"duration,omitempty"` // Wait for specific duration in milliseconds
}

WaitActionOptions represents the options for a wait action

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL