Documentation
¶
Index ¶
- Constants
- Variables
- type Action
- type BaseAction
- type Client
- type ClientOptions
- type Fetcher
- type HTTPFetcher
- type HTTPFetcherOptions
- type Link
- type Meta
- type Metadata
- type MockFetcher
- type PDFAction
- type PDFActionOptions
- type Request
- type Response
- type ScreenshotAction
- type ScreenshotActionOptions
- type TypedAction
- type WaitAction
- type WaitActionOptions
Constants ¶
const ( DefaultMaxBodySize = 10 * 1024 * 1024 // 10 MB DefaultTimeout = 30 * time.Second )
const FakeUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0"
FakeUserAgent may be used to mimic a real browser.
Variables ¶
var ( DefaultHTTPClient = &http.Client{Timeout: DefaultTimeout} DefaultHeaders = map[string]string{} )
var FakeHeaders = map[string]string{ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Connection": "keep-alive", "Dnt": "1", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "cross-site", "Upgrade-Insecure-Requests": "1", "User-Agent": FakeUserAgent, "Priority": "u=0, i", }
FakeHeaders may be used to mimic a real browser.
Functions ¶
This section is empty.
Types ¶
type Action ¶
type Action struct {
Action TypedAction
}
Action is used for JSON marshaling/unmarshaling of polymorphic actions
func NewPDFAction ¶
func NewPDFAction(options PDFActionOptions) Action
NewPDFAction creates a new PDF action
func NewScreenshotAction ¶
func NewScreenshotAction(options ScreenshotActionOptions) Action
NewScreenshotAction creates a new screenshot action
func NewWaitAction ¶
func NewWaitAction(options WaitActionOptions) Action
NewWaitAction creates a new wait action
func (*Action) MarshalJSON ¶
MarshalJSON implements custom marshaling for polymorphic actions
func (*Action) UnmarshalJSON ¶
UnmarshalJSON implements custom unmarshaling for polymorphic actions
type BaseAction ¶
type BaseAction struct {
Type string `json:"type"`
}
BaseAction contains common fields for all actions
func (BaseAction) GetType ¶
func (a BaseAction) GetType() string
type Client ¶
type Client struct {
// contains filtered or unexported fields
}
Client defines a client for fetching pages via a remote proxy.
func NewClient ¶
func NewClient(options ClientOptions) *Client
NewClient creates a new client with the given options.
type ClientOptions ¶
type ClientOptions struct { BaseURL string // Optional proxy base URL AuthToken string // Optional authorization token Timeout time.Duration // Optional HTTP timeout Headers map[string]string // Optional HTTP headers }
ClientOptions defines the options for the client.
type Fetcher ¶
type Fetcher interface { // Fetch a webpage and return the response. Fetch(ctx context.Context, request *Request) (*Response, error) }
Fetcher defines an interface for fetching pages.
type HTTPFetcher ¶
type HTTPFetcher struct {
// contains filtered or unexported fields
}
HTTPFetcher implements the Fetcher interface using standard HTTP client.
func NewHTTPFetcher ¶
func NewHTTPFetcher(options HTTPFetcherOptions) *HTTPFetcher
NewHTTPFetcher creates a new HTTP fetcher
type HTTPFetcherOptions ¶
type HTTPFetcherOptions struct { Timeout time.Duration Headers map[string]string Client *http.Client MaxBodySize int64 }
HTTPFetcherOptions defines the options for the HTTP fetcher.
type MockFetcher ¶
MockFetcher implements the Fetcher interface for testing
func NewMockFetcher ¶
func NewMockFetcher() *MockFetcher
func (*MockFetcher) AddError ¶
func (m *MockFetcher) AddError(url string, err error)
func (*MockFetcher) AddResponse ¶
func (m *MockFetcher) AddResponse(url string, response *Response)
type PDFAction ¶
type PDFAction struct { BaseAction Format string `json:"format,omitempty"` // A4, Letter, Legal, etc. }
PDFAction generates a PDF of the page
type PDFActionOptions ¶
type PDFActionOptions struct {
Format string `json:"format,omitempty"` // A4, Letter, Legal, etc.
}
PDFActionOptions represents the options for a PDF action
type Request ¶
type Request struct { URL string `json:"url"` OnlyMainContent bool `json:"only_main_content,omitempty"` IncludeTags []string `json:"include_tags,omitempty"` ExcludeTags []string `json:"exclude_tags,omitempty"` MaxAge int `json:"max_age,omitempty"` // milliseconds Timeout int `json:"timeout,omitempty"` // milliseconds WaitFor int `json:"wait_for,omitempty"` // milliseconds Fetcher string `json:"fetcher,omitempty"` Mobile bool `json:"mobile,omitempty"` Prettify bool `json:"prettify,omitempty"` Formats []string `json:"formats,omitempty"` Actions []Action `json:"actions,omitempty"` Headers map[string]string `json:"headers,omitempty"` StorageState map[string]any `json:"storage_state,omitempty"` }
Request defines the JSON payload for fetch requests.
func ParseGetRequest ¶
ParseGetRequest parses a fetch.Request from a GET request and its query parameters.
type Response ¶
type Response struct { URL string `json:"url"` StatusCode int `json:"status_code"` Headers map[string]string `json:"headers"` HTML string `json:"html,omitempty"` Markdown string `json:"markdown,omitempty"` Screenshot string `json:"screenshot,omitempty"` PDF string `json:"pdf,omitempty"` Error string `json:"error,omitempty"` Metadata Metadata `json:"metadata,omitempty"` Links []*Link `json:"links,omitempty"` StorageState map[string]any `json:"storage_state,omitempty"` Timestamp time.Time `json:"timestamp,omitzero"` }
Response defines the JSON payload for fetch responses.
func ProcessRequest ¶
ProcessRequest applies request options to the given HTML content and builds the corresponding response. Applies any requested transformations. This is a reference implementation and may not be used in all cases.
type ScreenshotAction ¶
type ScreenshotAction struct { BaseAction FullPage bool `json:"full_page,omitempty"` }
ScreenshotAction triggers a screenshot of the page
type ScreenshotActionOptions ¶
type ScreenshotActionOptions struct {
FullPage bool `json:"full_page,omitempty"`
}
ScreenshotActionOptions represents the options for a screenshot action
type TypedAction ¶
type TypedAction interface {
GetType() string
}
TypedAction represents an action to be taken on a page
type WaitAction ¶
type WaitAction struct { BaseAction Selector string `json:"selector,omitempty"` // Wait for element to appear Duration int `json:"duration,omitempty"` // Wait for specific duration in milliseconds }
WaitAction waits for a condition or time
type WaitActionOptions ¶
type WaitActionOptions struct { Selector string `json:"selector,omitempty"` // Wait for element to appear Duration int `json:"duration,omitempty"` // Wait for specific duration in milliseconds }
WaitActionOptions represents the options for a wait action