Documentation
¶
Overview ¶
Package crawlerx @Author bcy2007 2023/7/13 11:10
Package crawlerx @Author bcy2007 2023/7/12 17:31
Package crawlerx @Author bcy2007 2023/7/12 16:20
Package crawlerx @Author bcy2007 2023/7/12 16:31
Package crawlerx @Author bcy2007 2023/7/13 11:32
Package crawlerx @Author bcy2007 2023/7/12 17:42
Package crawlerx @Author bcy2007 2023/7/12 16:56
Package crawlerx @Author bcy2007 2023/8/1 11:09
Package crawlerx @Author bcy2007 2023/7/17 11:01
Package crawlerx @Author bcy2007 2023/7/14 10:44
Package crawlerx @Author bcy2007 2023/7/14 10:31
Package crawlerx @Author bcy2007 2023/7/14 11:07
Package crawlerx @Author bcy2007 2023/7/12 16:42
Package crawlerx @Author bcy2007 2023/11/1 10:16
Package crawlerx @Author bcy2007 2023/7/12 16:19
Package crawlerx @Author bcy2007 2023/7/13 11:57
Package crawlerx @Author bcy2007 2023/7/12 17:40
Index ¶
- Variables
- func BrowserEachEvent(browser *rod.Browser, sessionID proto.TargetSessionID, ...) func()
- func EvalOnPage(page *rod.Page, evalJs string) (*proto.RuntimeRemoteObject, error)
- func GetSortedQuery(rawQuery string) (query []string, err error)
- func NewPageScreenShot(targetUrl string, opts ...ConfigOpt) (code string, err error)
- func StartCrawler(url string, opts ...ConfigOpt) (chan ReqInfo, error)
- func StartCrawlerTest(url string, opts ...ConfigOpt) (chan ReqInfo, error)
- func StringArrayContains(array []string, element string) bool
- func StringArrayCover(array []string, element string) (bool, string)
- func StringPrefixList(origin string, prefixes []string) bool
- func StringSuffixList(s string, suffixes []string) bool
- func TargetUrlCheck(targetUrl string, proxy *url.URL) (string, error)
- type AIInput
- type AIInputResult
- type BaseConfig
- type BrowserConfig
- type BrowserInfo
- type BrowserManager
- type BrowserStarter
- func (starter *BrowserStarter) ActionOnPage(page *rod.Page) error
- func (starter *BrowserStarter) GetFormFill(element *rod.Element) string
- func (starter *BrowserStarter) GetUploadFile(element *rod.Element) string
- func (starter *BrowserStarter) HttpPostFile(element *rod.Element) error
- func (starter *BrowserStarter) Start()
- func (starter *BrowserStarter) Test()
- type Config
- type ConfigOpt
- func WithAIInputInf(info string) ConfigOpt
- func WithAIInputUrl(url string) ConfigOpt
- func WithBlackList(keywords ...string) ConfigOpt
- func WithBrowserData(browserConfig *BrowserConfig) ConfigOpt
- func WithBrowserInfo(data string) ConfigOpt
- func WithConcurrent(concurrent int) ConfigOpt
- func WithContext(ctx context.Context) ConfigOpt
- func WithCookieInfo(domain, cookieInfo string) ConfigOpt
- func WithCookies(domain string, cookiesInfo map[string]string) ConfigOpt
- func WithEvalJs(target string, evalJs string) ConfigOpt
- func WithExtraWaitLoadTime(extraWaitLoadTime int) ConfigOpt
- func WithFileInput(fileInput map[string]string) ConfigOpt
- func WithFormFill(formFills map[string]string) ConfigOpt
- func WithFromPlugin(fromPlugin string) ConfigOpt
- func WithFullTimeout(timeout int) ConfigOpt
- func WithHeaderInfo(headerInfo string) ConfigOpt
- func WithHeaders(headersInfo map[string]string) ConfigOpt
- func WithIgnoreQueryName(names ...string) ConfigOpt
- func WithInvalidSuffix(suffix []string) ConfigOpt
- func WithJsResultSave(storage func(s string)) ConfigOpt
- func WithLeakless(leakless string) ConfigOpt
- func WithLocalStorage(storage map[string]string) ConfigOpt
- func WithMaxDepth(depth int) ConfigOpt
- func WithMaxUrl(maxUrl int) ConfigOpt
- func WithPageSizedWaitGroup(pageSizedWaitGroup *utils.SizedWaitGroup) ConfigOpt
- func WithPageTimeout(timeout int) ConfigOpt
- func WithPageVisitFilter(pageVisitFilter *tools.StringCountFilter) ConfigOpt
- func WithResponse(targetUrl string, response string) ConfigOpt
- func WithResultChannel(ch chan ReqInfo) ConfigOpt
- func WithResultSentFilter(resultSentFilter *tools.StringCountFilter) ConfigOpt
- func WithRuntimeID(id string) ConfigOpt
- func WithSaveToDB(b bool) ConfigOpt
- func WithScanRangeLevel(scanRange scanRangeLevel) ConfigOpt
- func WithScanRepeatLevel(scanRepeat repeatLevel) ConfigOpt
- func WithSensitiveWords(words []string) ConfigOpt
- func WithSessionStorage(storage map[string]string) ConfigOpt
- func WithSourceType(sourceType string) ConfigOpt
- func WithStartWaitGroup(waitGroup *utils.SizedWaitGroup) ConfigOpt
- func WithStealth(stealth bool) ConfigOpt
- func WithTargetUrl(targetUrl string) ConfigOpt
- func WithUChan(uChan *tools.UChan) ConfigOpt
- func WithUrlCheck(check bool) ConfigOpt
- func WithUrlTree(tree *tools.UrlTree) ConfigOpt
- func WithVue(vue bool) ConfigOpt
- func WithWhiteList(keywords ...string) ConfigOpt
- type CrawlerCore
- type CrawlerHijack
- type CrawlerHijackHandler
- type CrawlerHijackRequest
- func (hijack *CrawlerHijackRequest) Body() string
- func (hijack *CrawlerHijackRequest) Header(key string) string
- func (hijack *CrawlerHijackRequest) Headers() proto.NetworkHeaders
- func (hijack *CrawlerHijackRequest) IsNavigation() bool
- func (hijack *CrawlerHijackRequest) JSONBody() gson.JSON
- func (hijack *CrawlerHijackRequest) Method() string
- func (hijack *CrawlerHijackRequest) Req() *http.Request
- func (hijack *CrawlerHijackRequest) SetBody(obj interface{}) *CrawlerHijackRequest
- func (hijack *CrawlerHijackRequest) SetContext(ctx context.Context) *CrawlerHijackRequest
- func (hijack *CrawlerHijackRequest) Type() proto.NetworkResourceType
- func (hijack *CrawlerHijackRequest) URL() *url.URL
- type CrawlerHijackResponse
- func (hijack *CrawlerHijackResponse) Body() string
- func (hijack *CrawlerHijackResponse) Fail(reason proto.NetworkErrorReason) *CrawlerHijackResponse
- func (hijack *CrawlerHijackResponse) Headers() http.Header
- func (hijack *CrawlerHijackResponse) Payload() *proto.FetchFulfillRequest
- func (hijack *CrawlerHijackResponse) SetBody(obj interface{}) *CrawlerHijackResponse
- func (hijack *CrawlerHijackResponse) SetHeader(pairs ...string) *CrawlerHijackResponse
- type CrawlerRouter
- type HijackRequest
- type HijackResponse
- type HttpRequest
- func (request *HttpRequest) Do() error
- func (request *HttpRequest) GetRequest() error
- func (request *HttpRequest) GetUrl() string
- func (request *HttpRequest) MultiPartRequest() error
- func (request *HttpRequest) PostRequest() error
- func (request *HttpRequest) Request() error
- func (request *HttpRequest) Show() (string, error)
- type JSEval
- type JsResultSave
- type JsResults
- type OutputBody
- type OutputHeader
- type OutputRequest
- type OutputResponse
- type OutputResult
- type OutputResults
- type ReqInfo
- type RequestResult
- func (result *RequestResult) From() string
- func (result *RequestResult) Method() string
- func (result *RequestResult) RequestBody() string
- func (result *RequestResult) RequestHeaders() map[string]string
- func (result *RequestResult) RequestRaw() ([]byte, error)
- func (result *RequestResult) ResponseBody() string
- func (result *RequestResult) ResponseHeaders() map[string]string
- func (result *RequestResult) Screenshot() string
- func (result *RequestResult) StatusCode() int
- func (result *RequestResult) Type() string
- func (result *RequestResult) Url() string
- type SimpleResult
- func (simpleResult *SimpleResult) From() string
- func (simpleResult *SimpleResult) Method() string
- func (simpleResult *SimpleResult) RequestBody() string
- func (simpleResult *SimpleResult) RequestHeaders() map[string]string
- func (simpleResult *SimpleResult) RequestRaw() ([]byte, error)
- func (simpleResult *SimpleResult) ResponseBody() string
- func (simpleResult *SimpleResult) ResponseHeaders() map[string]string
- func (simpleResult *SimpleResult) Screenshot() string
- func (*SimpleResult) StatusCode() int
- func (simpleResult *SimpleResult) Type() string
- func (simpleResult *SimpleResult) Url() string
- type TestHijackRequest
- func (testHijackRequest *TestHijackRequest) Body() string
- func (testHijackRequest *TestHijackRequest) Header(key string) string
- func (testHijackRequest *TestHijackRequest) Headers() proto.NetworkHeaders
- func (testHijackRequest *TestHijackRequest) JSONBody() gson.JSON
- func (testHijackRequest *TestHijackRequest) Method() string
- func (testHijackRequest *TestHijackRequest) Req() *http.Request
- func (testHijackRequest *TestHijackRequest) Type() proto.NetworkResourceType
- func (testHijackRequest *TestHijackRequest) URL() *url.URL
Constants ¶
This section is empty.
Variables ¶
var CrawlerXExports = map[string]interface{}{ "StartCrawler": StartCrawler, "PageScreenShot": NewPageScreenShot, "browserInfo": WithBrowserInfo, "saveToDB": WithSaveToDB, "runtimeId": WithRuntimeID, "maxUrl": WithMaxUrl, "maxDepth": WithMaxDepth, "concurrent": WithConcurrent, "blacklist": WithBlackList, "whitelist": WithWhiteList, "pageTimeout": WithPageTimeout, "fullTimeout": WithFullTimeout, "extraWaitLoadTime": WithExtraWaitLoadTime, "formFill": WithFormFill, "fileInput": WithFileInput, "headers": WithHeaders, "rawHeaders": WithHeaderInfo, "cookies": WithCookies, "rawCookie": WithCookieInfo, "scanRangeLevel": WithScanRangeLevel, "scanRepeatLevel": WithScanRepeatLevel, "ignoreQueryName": WithIgnoreQueryName, "sensitiveWords": WithSensitiveWords, "leakless": WithLeakless, "localStorage": WithLocalStorage, "sessionStorage": WithSessionStorage, "invalidSuffix": WithInvalidSuffix, "stealth": WithStealth, "runtimeID": WithRuntimeID, "evalJs": WithEvalJs, "jsResultSend": WithJsResultSave, "vue": WithVue, "response": WithResponse, "sourceType": WithSourceType, "fromPlugin": WithFromPlugin, "urlCheck": WithUrlCheck, "aiInputUrl": WithAIInputUrl, "aiInputInfo": WithAIInputInf, "UnLimitRepeat": unlimited, "LowRepeatLevel": lowLevel, "MediumRepeatLevel": midLevel, "HighRepeatLevel": highLevel, "ExtremeRepeatLevel": extremeLevel, "AllDomainScan": mainDomain, "SubMenuScan": subDomain, "UnlimitedDomainScan": unlimitedDomain, }
var RepeatLevelMap = map[int]repeatLevel{
0: unlimited,
1: lowLevel,
2: midLevel,
3: highLevel,
4: extremeLevel,
}
var ScanRangeLevelMap = map[int]scanRangeLevel{
0: mainDomain,
1: subDomain,
2: unlimitedDomain,
}
Functions ¶
func BrowserEachEvent ¶
func BrowserEachEvent(browser *rod.Browser, sessionID proto.TargetSessionID, callbacks ...interface{}) func()
func EvalOnPage ¶
func GetSortedQuery ¶
func NewPageScreenShot ¶
func StartCrawler ¶
StartCrawler 开启一个无头浏览器模拟点击爬虫任务 第一个参数为目标url,后面可以添加零个或多个请求选项,用于对此次请求进行配置 返回值包括channel和错误,从channel中获取爬虫结果
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.pageTimeout(30), crawlerx.concurrent(3)) for item = range ch { yakit.Info(item.Method() + " " + item.Url()) }
```
func StringArrayContains ¶
func StringPrefixList ¶
func StringSuffixList ¶
Types ¶
type AIInputResult ¶
type BaseConfig ¶
type BaseConfig struct {
// contains filtered or unexported fields
}
type BrowserConfig ¶
type BrowserConfig struct {
// contains filtered or unexported fields
}
func NewBrowserConfig ¶
func NewBrowserConfig(exePath, wsAddress string, proxyAddress *url.URL) *BrowserConfig
type BrowserInfo ¶
type BrowserManager ¶
type BrowserManager struct {
// contains filtered or unexported fields
}
func NewBrowserManager ¶
func NewBrowserManager(config *Config) *BrowserManager
func (*BrowserManager) CreateBrowserStarters ¶
func (manager *BrowserManager) CreateBrowserStarters()
func (*BrowserManager) Start ¶
func (manager *BrowserManager) Start()
func (*BrowserManager) Test ¶
func (manager *BrowserManager) Test()
type BrowserStarter ¶
type BrowserStarter struct {
// contains filtered or unexported fields
}
func NewBrowserStarter ¶
func NewBrowserStarter(browserConfig *BrowserConfig, baseConfig *BaseConfig) *BrowserStarter
func (*BrowserStarter) ActionOnPage ¶
func (starter *BrowserStarter) ActionOnPage(page *rod.Page) error
func (*BrowserStarter) GetFormFill ¶
func (starter *BrowserStarter) GetFormFill(element *rod.Element) string
func (*BrowserStarter) GetUploadFile ¶
func (starter *BrowserStarter) GetUploadFile(element *rod.Element) string
func (*BrowserStarter) HttpPostFile ¶
func (starter *BrowserStarter) HttpPostFile(element *rod.Element) error
func (*BrowserStarter) Start ¶
func (starter *BrowserStarter) Start()
func (*BrowserStarter) Test ¶
func (starter *BrowserStarter) Test()
type ConfigOpt ¶
type ConfigOpt func(*Config)
func WithAIInputInf ¶
func WithAIInputUrl ¶
func WithBlackList ¶
blacklist 是一个请求选项 用于设置不会被访问的url链接包含的关键词
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.blacklist("logout", "exit", "delete")) // 设置遇到url中包含logout、exit和delete时不会访问 ...
```
func WithBrowserData ¶
func WithBrowserData(browserConfig *BrowserConfig) ConfigOpt
func WithBrowserInfo ¶
browserInfo 是一个请求选项 用于配制浏览器参数
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" browserInfo = { "ws_address":"", // 浏览器websocket url "exe_path":"", // 浏览器可执行路径 "proxy_address":"", // 代理地址 "proxy_username":"", // 代理用户名 "proxy_password":"", // 代理密码 } browserInfoOpt = crawlerx.browserInfo(json.dumps(browserInfo)) ch, err = crawlerx.StartCrawler(targetUrl, browserInfoOpt) ...
```
func WithConcurrent ¶
concurrent 是一个请求选项 用于设置浏览器同时打开的最大页面数量
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.concurrent(3)) // 设置浏览器同时打开的最大页面数量为3 ...
```
func WithContext ¶
func WithCookieInfo ¶
rawCookie 是一个请求选项 用于设置爬虫发送请求时的cookie
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" cookie = `Apache=5651982500959.057.1731310579958; ULV=1731310579971:11:1:1:5651982500959.057.1731310579958:1727418057693; ALF=1735783078` ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.rawCookie("testphp.vulnweb.com", cookie)) // 原生cookie输入 ...
```
func WithCookies ¶
cookies 是一个请求选项 用于设置爬虫发送请求时的cookie
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" cookieMap = make(map[string]string, 0) cookieMap["Apache"] = "5651982500959.057.1731310579958" cookieMap["ULV"] = "1731310579971:11:1:1:5651982500959.057.1731310579958:1727418057693" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.cookies("testphp.vulnweb.com", cookieMap)) // cookie字典形式输入 ...
```
func WithEvalJs ¶
func WithExtraWaitLoadTime ¶
extraWaitLoadTime 是一个请求选项 用于设置页面加载的额外页面等待时间
防止加载vue网站页面时页面状态为加载完成 实际仍在加载中的情况
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.extraWaitLoadTime(1000)) // 设置页面加载的额外页面等待时间为1000毫秒 ...
```
func WithFileInput ¶
fileInput 是一个请求选项 用于设置页面遇到input submit时默认上传文件
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" fileMap = make(map[string]string, 0) fileMap["default"] = "/path/to/file/test.txt" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.fileInput(fileMap)) // 设置遇到输入框元素中存在对应关键词时输入对应内容 默认输入test ...
```
func WithFormFill ¶
formFill 是一个请求选项 用于设置页面输入框填写内容
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" inputMap = make(map[string]string, 0) inputMap["username"] = "admin" inputMap["password"] = "123321" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.formFill(inputMap)) // 设置遇到输入框元素中存在对应关键词时输入对应内容 默认输入test ...
```
func WithFromPlugin ¶
func WithFullTimeout ¶
fullTimeout 是一个请求选项 用于设置爬虫任务总超时时间
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.fullTimeout(1800)) // 设置爬虫任务总超时时间为1800秒 ...
```
func WithHeaderInfo ¶
rawHeaders 是一个请求选项 用于设置爬虫发送请求时的headers
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" headers = `Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7 Accept-Encoding: gzip, deflate Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7,zh-TW;q=0.6 Cache-Control: max-age=0 Connection: keep-alive Host: testphp.vulnweb.com Upgrade-Insecure-Requests: 1 User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 ` ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.rawHeaders(headers)) // 原生headers输入 ...
```
func WithHeaders ¶
headers 是一个请求选项 用于设置爬虫发送请求时的headers
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" headerMap = make(map[string]string, 0) headerMap["Connection"] = "keep-alive" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.headers(headerMap)) // header以字典形式输入 ...
```
func WithIgnoreQueryName ¶
ignoreQueryName 是一个请求选项 用于设置url中的query名称去重时忽略
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.ignoreQueryName("sid", "tid")) // 设置检测url是否重复时无视sid和tid这两个query ...
```
func WithInvalidSuffix ¶
func WithJsResultSave ¶
func WithLeakless ¶
func WithLocalStorage ¶
func WithMaxDepth ¶
maxDepth 是一个请求选项 用于设置网站最大爬取深度
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.maxDepth(3)) // 设置网站最大爬取深度为3 ...
```
func WithMaxUrl ¶
maxUrl 是一个请求选项 用于设置最大爬取url数量
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.maxUrl(100)) // 设置最大爬取url数量为100 ...
```
func WithPageSizedWaitGroup ¶
func WithPageSizedWaitGroup(pageSizedWaitGroup *utils.SizedWaitGroup) ConfigOpt
func WithPageTimeout ¶
pageTimeout 是一个请求选项 用于设置单个页面超时时间
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.pageTimeout(30)) // 设置单个页面超时时间为30秒 ...
```
func WithPageVisitFilter ¶
func WithPageVisitFilter(pageVisitFilter *tools.StringCountFilter) ConfigOpt
func WithResponse ¶
func WithResultChannel ¶
func WithResultSentFilter ¶
func WithResultSentFilter(resultSentFilter *tools.StringCountFilter) ConfigOpt
func WithRuntimeID ¶
func WithSaveToDB ¶
func WithScanRangeLevel ¶
func WithScanRangeLevel(scanRange scanRangeLevel) ConfigOpt
scanRangeLevel 是一个请求选项 用于设置爬虫扫描范围
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" scanRangeOpt = crawlerx.scanRangeLevel(crawlerx.AllDomainScan) // 主域名扫描 // scanRangeOpt = crawlerx.scanRangeLevel(crawlerx.SubMenuScan) // 子域名扫描 // scanRangeOpt = crawlerx.scanRangeLevel(crawlerx.UnlimitedDomainScan) // 无限制扫描 ch, err = crawlerx.StartCrawler(targetUrl, scanRangeOpt) ...
```
func WithScanRepeatLevel ¶
func WithScanRepeatLevel(scanRepeat repeatLevel) ConfigOpt
scanRepeatLevel 是一个请求选项 用于设置爬虫去重强度
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" scanRepeatOpt = crawlerx.scanRepeatLevel(crawlerx.UnLimitRepeat) // 对page,method,query-name,query-value和post-data敏感 // scanRepeatOpt = crawlerx.scanRepeatLevel(crawlerx.LowRepeatLevel) // 对page,method,query-name和query-value敏感(默认) // scanRepeatOpt = crawlerx.scanRepeatLevel(crawlerx.MediumRepeatLevel) // 对page,method和query-name敏感 // scanRepeatOpt = crawlerx.scanRepeatLevel(crawlerx.HighRepeatLevel) // 对page和method敏感 // scanRepeatOpt = crawlerx.scanRepeatLevel(crawlerx.ExtremeRepeatLevel) // 对page敏感 ch, err = crawlerx.StartCrawler(targetUrl, scanRepeatOpt) ...
```
func WithSensitiveWords ¶
sensitiveWords 是一个请求选项 用于设置页面按钮点击时的敏感词
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" sensitiveWords = "logout,delete" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.sensitiveWords(sensitiveWords.Split(","))) // 当按钮所在元素中存在logout和delete关键词时不会点击 ...
```
func WithSessionStorage ¶
func WithSourceType ¶
func WithStartWaitGroup ¶
func WithStartWaitGroup(waitGroup *utils.SizedWaitGroup) ConfigOpt
func WithStealth ¶
func WithTargetUrl ¶
func WithUrlCheck ¶
urlCheck 是一个请求选项 用于设置是否在爬虫前进行url存活检测
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.urlCheck(true)) ...
```
func WithUrlTree ¶
func WithWhiteList ¶
whitelist 是一个请求选项 用于设置只会被访问的url链接中包含的关键词
Examples: ```
targetUrl = "http://testphp.vulnweb.com/" ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.whitelist("test", "click")) // 设置只会访问url中包含test和click的链接 ...
```
type CrawlerCore ¶
type CrawlerCore struct {
// contains filtered or unexported fields
}
func NewCrawlerCore ¶
func NewCrawlerCore(targetUrl string, opts ...ConfigOpt) (*CrawlerCore, error)
func (*CrawlerCore) Start ¶
func (core *CrawlerCore) Start()
func (*CrawlerCore) Test ¶
func (core *CrawlerCore) Test()
type CrawlerHijack ¶
type CrawlerHijack struct { Request *CrawlerHijackRequest Response *CrawlerHijackResponse OnError func(error) Skip bool CustomState interface{} // contains filtered or unexported fields }
func (*CrawlerHijack) ContinueRequest ¶
func (hijack *CrawlerHijack) ContinueRequest(cq *proto.FetchContinueRequest)
func (*CrawlerHijack) LoadResponse ¶
func (hijack *CrawlerHijack) LoadResponse(opts []lowhttp.LowhttpOpt, loadBody bool) error
type CrawlerHijackHandler ¶
type CrawlerHijackHandler struct {
// contains filtered or unexported fields
}
type CrawlerHijackRequest ¶
type CrawlerHijackRequest struct {
// contains filtered or unexported fields
}
func (*CrawlerHijackRequest) Body ¶
func (hijack *CrawlerHijackRequest) Body() string
func (*CrawlerHijackRequest) Header ¶
func (hijack *CrawlerHijackRequest) Header(key string) string
func (*CrawlerHijackRequest) Headers ¶
func (hijack *CrawlerHijackRequest) Headers() proto.NetworkHeaders
func (*CrawlerHijackRequest) IsNavigation ¶
func (hijack *CrawlerHijackRequest) IsNavigation() bool
func (*CrawlerHijackRequest) JSONBody ¶
func (hijack *CrawlerHijackRequest) JSONBody() gson.JSON
func (*CrawlerHijackRequest) Method ¶
func (hijack *CrawlerHijackRequest) Method() string
func (*CrawlerHijackRequest) Req ¶
func (hijack *CrawlerHijackRequest) Req() *http.Request
func (*CrawlerHijackRequest) SetBody ¶
func (hijack *CrawlerHijackRequest) SetBody(obj interface{}) *CrawlerHijackRequest
func (*CrawlerHijackRequest) SetContext ¶
func (hijack *CrawlerHijackRequest) SetContext(ctx context.Context) *CrawlerHijackRequest
func (*CrawlerHijackRequest) Type ¶
func (hijack *CrawlerHijackRequest) Type() proto.NetworkResourceType
func (*CrawlerHijackRequest) URL ¶
func (hijack *CrawlerHijackRequest) URL() *url.URL
type CrawlerHijackResponse ¶
type CrawlerHijackResponse struct {
// contains filtered or unexported fields
}
func (*CrawlerHijackResponse) Body ¶
func (hijack *CrawlerHijackResponse) Body() string
func (*CrawlerHijackResponse) Fail ¶
func (hijack *CrawlerHijackResponse) Fail(reason proto.NetworkErrorReason) *CrawlerHijackResponse
func (*CrawlerHijackResponse) Headers ¶
func (hijack *CrawlerHijackResponse) Headers() http.Header
func (*CrawlerHijackResponse) Payload ¶
func (hijack *CrawlerHijackResponse) Payload() *proto.FetchFulfillRequest
func (*CrawlerHijackResponse) SetBody ¶
func (hijack *CrawlerHijackResponse) SetBody(obj interface{}) *CrawlerHijackResponse
func (*CrawlerHijackResponse) SetHeader ¶
func (hijack *CrawlerHijackResponse) SetHeader(pairs ...string) *CrawlerHijackResponse
type CrawlerRouter ¶
type CrawlerRouter struct {
// contains filtered or unexported fields
}
func NewBrowserHijackRequests ¶
func NewBrowserHijackRequests(browser *rod.Browser) *CrawlerRouter
func NewPageHijackRequests ¶
func NewPageHijackRequests(page *rod.Page) *CrawlerRouter
func (*CrawlerRouter) Add ¶
func (router *CrawlerRouter) Add(pattern string, resourceType proto.NetworkResourceType, handler func(*CrawlerHijack)) error
func (*CrawlerRouter) Run ¶
func (router *CrawlerRouter) Run()
func (*CrawlerRouter) Stop ¶
func (router *CrawlerRouter) Stop() error
type HijackRequest ¶
type HijackResponse ¶
type HijackResponse interface { Payload() *proto.FetchFulfillRequest Body() string Headers() http.Header }
type HttpRequest ¶
type HttpRequest struct {
// contains filtered or unexported fields
}
func CreateFileRequest ¶
func CreateFileRequest(url, method string, params, files map[string]string) *HttpRequest
func CreateGetRequest ¶
func CreateGetRequest(url string) *HttpRequest
func CreateRequest ¶
func CreateRequest() *HttpRequest
func (*HttpRequest) Do ¶
func (request *HttpRequest) Do() error
func (*HttpRequest) GetRequest ¶
func (request *HttpRequest) GetRequest() error
func (*HttpRequest) GetUrl ¶
func (request *HttpRequest) GetUrl() string
func (*HttpRequest) MultiPartRequest ¶
func (request *HttpRequest) MultiPartRequest() error
func (*HttpRequest) PostRequest ¶
func (request *HttpRequest) PostRequest() error
func (*HttpRequest) Request ¶
func (request *HttpRequest) Request() error
func (*HttpRequest) Show ¶
func (request *HttpRequest) Show() (string, error)
type JSEval ¶
type JSEval struct {
// contains filtered or unexported fields
}
func CreateJsEval ¶
func CreateJsEval() *JSEval
type JsResultSave ¶
type OutputBody ¶
type OutputHeader ¶
type OutputRequest ¶
type OutputRequest struct { Url string `json:"url"` Method string `json:"method"` Headers []*OutputHeader `json:"headers"` Body OutputBody `json:"body"` HTTPRaw string `json:"http_raw"` }
type OutputResponse ¶
type OutputResponse struct { StatusCode int `json:"status_code"` Headers []*OutputHeader `json:"headers"` Body OutputBody `json:"body"` }
type OutputResult ¶
type OutputResult struct { Url string `json:"url"` Request OutputRequest `json:"request"` Response OutputResponse `json:"response"` }
func GeneratorOutput ¶
func GeneratorOutput(reqInfo ReqInfo) *OutputResult
type OutputResults ¶
type OutputResults struct {
// contains filtered or unexported fields
}
type RequestResult ¶
type RequestResult struct {
// contains filtered or unexported fields
}
func (*RequestResult) From ¶
func (result *RequestResult) From() string
func (*RequestResult) Method ¶
func (result *RequestResult) Method() string
func (*RequestResult) RequestBody ¶
func (result *RequestResult) RequestBody() string
func (*RequestResult) RequestHeaders ¶
func (result *RequestResult) RequestHeaders() map[string]string
func (*RequestResult) RequestRaw ¶
func (result *RequestResult) RequestRaw() ([]byte, error)
func (*RequestResult) ResponseBody ¶
func (result *RequestResult) ResponseBody() string
func (*RequestResult) ResponseHeaders ¶
func (result *RequestResult) ResponseHeaders() map[string]string
func (*RequestResult) Screenshot ¶
func (result *RequestResult) Screenshot() string
func (*RequestResult) StatusCode ¶
func (result *RequestResult) StatusCode() int
func (*RequestResult) Type ¶
func (result *RequestResult) Type() string
func (*RequestResult) Url ¶
func (result *RequestResult) Url() string
type SimpleResult ¶
type SimpleResult struct {
// contains filtered or unexported fields
}
func (*SimpleResult) From ¶
func (simpleResult *SimpleResult) From() string
func (*SimpleResult) Method ¶
func (simpleResult *SimpleResult) Method() string
func (*SimpleResult) RequestBody ¶
func (simpleResult *SimpleResult) RequestBody() string
func (*SimpleResult) RequestHeaders ¶
func (simpleResult *SimpleResult) RequestHeaders() map[string]string
func (*SimpleResult) RequestRaw ¶
func (simpleResult *SimpleResult) RequestRaw() ([]byte, error)
func (*SimpleResult) ResponseBody ¶
func (simpleResult *SimpleResult) ResponseBody() string
func (*SimpleResult) ResponseHeaders ¶
func (simpleResult *SimpleResult) ResponseHeaders() map[string]string
func (*SimpleResult) Screenshot ¶
func (simpleResult *SimpleResult) Screenshot() string
func (*SimpleResult) StatusCode ¶
func (*SimpleResult) StatusCode() int
func (*SimpleResult) Type ¶
func (simpleResult *SimpleResult) Type() string
func (*SimpleResult) Url ¶
func (simpleResult *SimpleResult) Url() string
type TestHijackRequest ¶
type TestHijackRequest struct {
// contains filtered or unexported fields
}
func (*TestHijackRequest) Body ¶
func (testHijackRequest *TestHijackRequest) Body() string
func (*TestHijackRequest) Header ¶
func (testHijackRequest *TestHijackRequest) Header(key string) string
func (*TestHijackRequest) Headers ¶
func (testHijackRequest *TestHijackRequest) Headers() proto.NetworkHeaders
func (*TestHijackRequest) JSONBody ¶
func (testHijackRequest *TestHijackRequest) JSONBody() gson.JSON
func (*TestHijackRequest) Method ¶
func (testHijackRequest *TestHijackRequest) Method() string
func (*TestHijackRequest) Req ¶
func (testHijackRequest *TestHijackRequest) Req() *http.Request
func (*TestHijackRequest) Type ¶
func (testHijackRequest *TestHijackRequest) Type() proto.NetworkResourceType
func (*TestHijackRequest) URL ¶
func (testHijackRequest *TestHijackRequest) URL() *url.URL
Source Files
¶
Directories
¶
Path | Synopsis |
---|---|
Package cmd @Author bcy2007 2023/7/14 11:11
|
Package cmd @Author bcy2007 2023/7/14 11:11 |
Package crawlerx @Author bcy2007 2024/4/2 14:44
|
Package crawlerx @Author bcy2007 2024/4/2 14:44 |
Package tools @Author bcy2007 2023/7/12 16:40
|
Package tools @Author bcy2007 2023/7/12 16:40 |
config
Package config https://github.com/unknwon/goconfig
|
Package config https://github.com/unknwon/goconfig |