fetch

package
v0.0.10 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Jul 20, 2025 License: Apache-2.0 Imports: 13 Imported by: 2

Documentation

Index

Constants

View Source
const (
	DefaultMaxBodySize = 10 * 1024 * 1024 // 10 MB
	DefaultTimeout     = 30 * time.Second
)
View Source
const FakeUserAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:133.0) Gecko/20100101 Firefox/133.0"

FakeUserAgent may be used to mimic a real browser.

Variables

View Source
var (
	DefaultHTTPClient = &http.Client{Timeout: DefaultTimeout}
	DefaultHeaders    = map[string]string{}
)
View Source
var FakeHeaders = map[string]string{
	"Accept":                    "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
	"Accept-Language":           "en-US,en;q=0.5",
	"Connection":                "keep-alive",
	"Dnt":                       "1",
	"Sec-Fetch-Dest":            "document",
	"Sec-Fetch-Mode":            "navigate",
	"Sec-Fetch-Site":            "cross-site",
	"Upgrade-Insecure-Requests": "1",
	"User-Agent":                FakeUserAgent,
	"Priority":                  "u=0, i",
}

FakeHeaders may be used to mimic a real browser.

Functions

This section is empty.

Types

type Action

type Action struct {
	Action TypedAction
}

Action is used for JSON marshaling/unmarshaling of polymorphic actions

func NewPDFAction

func NewPDFAction(options PDFActionOptions) Action

NewPDFAction creates a new PDF action

func NewScreenshotAction

func NewScreenshotAction(options ScreenshotActionOptions) Action

NewScreenshotAction creates a new screenshot action

func NewWaitAction

func NewWaitAction(options WaitActionOptions) Action

NewWaitAction creates a new wait action

func (*Action) MarshalJSON

func (a *Action) MarshalJSON() ([]byte, error)

MarshalJSON implements custom marshaling for polymorphic actions

func (*Action) UnmarshalJSON

func (a *Action) UnmarshalJSON(data []byte) error

UnmarshalJSON implements custom unmarshaling for polymorphic actions

type BaseAction

type BaseAction struct {
	Type string `json:"type"`
}

BaseAction contains common fields for all actions

func (BaseAction) GetType

func (a BaseAction) GetType() string

type Client

type Client struct {
	// contains filtered or unexported fields
}

Client defines a client for fetching pages via a remote proxy.

func NewClient

func NewClient(options ClientOptions) *Client

NewClient creates a new client with the given options.

func (*Client) Fetch

func (c *Client) Fetch(ctx context.Context, request *Request) (*Response, error)

Fetch a page using a remote proxy.

func (*Client) SetHeader added in v0.0.5

func (c *Client) SetHeader(key, value string)

SetHeader sets a header for the client.

type ClientOptions

type ClientOptions struct {
	BaseURL   string            // Optional proxy base URL
	AuthToken string            // Optional authorization token
	Timeout   time.Duration     // Optional HTTP timeout
	Headers   map[string]string // Optional HTTP headers
}

ClientOptions defines the options for the client.

type Fetcher

type Fetcher interface {

	// Fetch a webpage and return the response.
	Fetch(ctx context.Context, request *Request) (*Response, error)
}

Fetcher defines an interface for fetching pages.

type HTTPFetcher added in v0.0.4

type HTTPFetcher struct {
	// contains filtered or unexported fields
}

HTTPFetcher implements the Fetcher interface using standard HTTP client.

func NewHTTPFetcher added in v0.0.4

func NewHTTPFetcher(options HTTPFetcherOptions) *HTTPFetcher

NewHTTPFetcher creates a new HTTP fetcher

func (*HTTPFetcher) Fetch added in v0.0.4

func (f *HTTPFetcher) Fetch(ctx context.Context, req *Request) (*Response, error)

Fetch implements the Fetcher interface for HTTP requests

type HTTPFetcherOptions added in v0.0.4

type HTTPFetcherOptions struct {
	Timeout     time.Duration
	Headers     map[string]string
	Client      *http.Client
	MaxBodySize int64
}

HTTPFetcherOptions defines the options for the HTTP fetcher.

type Link web.Link

Type aliases for convenience.

type Meta

type Meta web.Meta

Type aliases for convenience.

type Metadata

type Metadata web.Metadata

Type aliases for convenience.

type MockFetcher added in v0.0.7

type MockFetcher struct {
	mock.Mock
	// contains filtered or unexported fields
}

MockFetcher implements the Fetcher interface for testing

func NewMockFetcher added in v0.0.7

func NewMockFetcher() *MockFetcher

func (*MockFetcher) AddError added in v0.0.7

func (m *MockFetcher) AddError(url string, err error)

func (*MockFetcher) AddResponse added in v0.0.7

func (m *MockFetcher) AddResponse(url string, response *Response)

func (*MockFetcher) Fetch added in v0.0.7

func (m *MockFetcher) Fetch(ctx context.Context, req *Request) (*Response, error)

type PDFAction

type PDFAction struct {
	BaseAction
	Format string `json:"format,omitempty"` // A4, Letter, Legal, etc.
}

PDFAction generates a PDF of the page

type PDFActionOptions

type PDFActionOptions struct {
	Format string `json:"format,omitempty"` // A4, Letter, Legal, etc.
}

PDFActionOptions represents the options for a PDF action

type Request

type Request struct {
	URL             string            `json:"url"`
	OnlyMainContent bool              `json:"only_main_content,omitempty"`
	IncludeTags     []string          `json:"include_tags,omitempty"`
	ExcludeTags     []string          `json:"exclude_tags,omitempty"`
	MaxAge          int               `json:"max_age,omitempty"`  // milliseconds
	Timeout         int               `json:"timeout,omitempty"`  // milliseconds
	WaitFor         int               `json:"wait_for,omitempty"` // milliseconds
	Fetcher         string            `json:"fetcher,omitempty"`
	Mobile          bool              `json:"mobile,omitempty"`
	Prettify        bool              `json:"prettify,omitempty"`
	Formats         []string          `json:"formats,omitempty"`
	Actions         []Action          `json:"actions,omitempty"`
	Headers         map[string]string `json:"headers,omitempty"`
	StorageState    map[string]any    `json:"storage_state,omitempty"`
}

Request defines the JSON payload for fetch requests.

func ParseGetRequest added in v0.0.4

func ParseGetRequest(r *http.Request) (*Request, error)

ParseGetRequest parses a fetch.Request from a GET request and its query parameters.

func ParsePostRequest added in v0.0.4

func ParsePostRequest(r *http.Request) (*Request, error)

ParsePostRequest parses a fetch.Request from a POST request body.

type Response

type Response struct {
	URL          string            `json:"url"`
	StatusCode   int               `json:"status_code"`
	Headers      map[string]string `json:"headers"`
	HTML         string            `json:"html,omitempty"`
	Markdown     string            `json:"markdown,omitempty"`
	Screenshot   string            `json:"screenshot,omitempty"`
	PDF          string            `json:"pdf,omitempty"`
	Error        string            `json:"error,omitempty"`
	Metadata     Metadata          `json:"metadata,omitempty"`
	Links        []*Link           `json:"links,omitempty"`
	StorageState map[string]any    `json:"storage_state,omitempty"`
	Timestamp    time.Time         `json:"timestamp,omitzero"`
}

Response defines the JSON payload for fetch responses.

func ProcessRequest added in v0.0.4

func ProcessRequest(request *Request, html string) (*Response, error)

ProcessRequest applies request options to the given HTML content and builds the corresponding response. Applies any requested transformations. This is a reference implementation and may not be used in all cases.

type ScreenshotAction

type ScreenshotAction struct {
	BaseAction
	FullPage bool `json:"full_page,omitempty"`
}

ScreenshotAction triggers a screenshot of the page

type ScreenshotActionOptions

type ScreenshotActionOptions struct {
	FullPage bool `json:"full_page,omitempty"`
}

ScreenshotActionOptions represents the options for a screenshot action

type TypedAction

type TypedAction interface {
	GetType() string
}

TypedAction represents an action to be taken on a page

type WaitAction

type WaitAction struct {
	BaseAction
	Selector string `json:"selector,omitempty"` // Wait for element to appear
	Duration int    `json:"duration,omitempty"` // Wait for specific duration in milliseconds
}

WaitAction waits for a condition or time

type WaitActionOptions

type WaitActionOptions struct {
	Selector string `json:"selector,omitempty"` // Wait for element to appear
	Duration int    `json:"duration,omitempty"` // Wait for specific duration in milliseconds
}

WaitActionOptions represents the options for a wait action

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL