crawlerx

package

v0.0.0-...-9a26220 Latest Latest Go to latest Published: Apr 6, 2025 License: AGPL-3.0 Imports: 36 Imported by: 0

Details

Valid go.mod file
Redistributable license
Tagged version
Stable version
Learn more about best practices

Repository

github.com/WhfWll/yaklang-autonomous

Links

Open Source Insights

README ¶

CrawlerX 爬虫模块使用说明

Example

yakit.AutoInitYakit()

targetUrl = cli.String("targetUrl")
wsAddress = cli.String("wsAddress")
exePath = cli.String("exePath")
proxy = cli.String("proxy")
proxyUsername = cli.String("proxyUsername")
proxyPassword = cli.String("proxyPassword")
pageTimeout = cli.Int("pageTimeout")
fullTimeout = cli.Int("fullTimeout")
formFill = cli.String("formFill")
fileUpload = cli.String("fileUpload")
header = cli.String("header")
cookie = cli.String("cookie")
scanRange = cli.String("scanRange")
scanRepeat = cli.String("scanRepeat")
maxUrl = cli.Int("maxUrl")
maxDepth = cli.Int("maxDepth")
ignoreQuery = cli.String("ignoreQuery")
extraWaitLoad = cli.Int("extraWaitLoad")

blacklist = cli.String("blacklist")
whitelist = cli.String("whitelist")
sensitiveWords = cli.String("sensitiveWords")
leakless = cli.String("leakless", cli.setDefault("default"))
concurrent = cli.Int("concurrent", cli.setDefault(3))
rawHeaders = cli.String("rawHeaders")
rawCookie = cli.String("rawCookie")

func stringToDict(tempStr) {
    result = make(map[string]string, 0)
    items = tempStr.Split(";")
    for _, item := range items {
        if item.Contains(":") {
            kv := item.Split(":")
            result[kv[0]] = kv[1]
        }
    }
    return result
}

scanRangeMap = {
    "AllDomainScan": crawlerx.AllDomainScan,
    "SubMenuScan": crawlerx.SubMenuScan,
}

scanRepeatMap = {
    "UnLimitRepeat": crawlerx.UnLimitRepeat,
    "LowRepeatLevel": crawlerx.LowRepeatLevel,
    "MediumRepeatLevel": crawlerx.MediumRepeatLevel,
    "HighRepeatLevel": crawlerx.HighRepeatLevel,
    "ExtremeRepeatLevel": crawlerx.ExtremeRepeatLevel,
}

browserInfo = {
    "ws_address":"",
    "exe_path":"",
    "proxy_address":"",
    "proxy_username":"",
    "proxy_password":"",
}
if wsAddress != "" {
    browserInfo["ws_address"] = wsAddress
}
if exePath != "" {
    browserInfo["exe_path"] = exePath
}
if proxy != "" {
    browserInfo["proxy_address"] = proxy
    if proxyUsername != "" {
        browserInfo["proxy_username"] = proxyUsername
        browserInfo["proxy_password"] = proxyPassword
    }
}
browserInfoOpt = crawlerx.browserInfo(json.dumps(browserInfo))

pageTimeoutOpt = crawlerx.pageTimeout(pageTimeout)

fullTimeoutOpt = crawlerx.fullTimeout(fullTimeout)

concurrentOpt = crawlerx.concurrent(concurrent)

opts = [
    browserInfoOpt,
    pageTimeoutOpt,
    fullTimeoutOpt,
    concurrentOpt,
]

if formFill != "" {
    formFillInfo = stringToDict(formFill)
    formFillOpt = crawlerx.formFill(formFillInfo)
    opts = append(opts, formFillOpt)
}

if fileUpload != "" {
    fileUploadInfo = stringToDict(fileUpload)
    fileUploadOpt = crawlerx.fileInput(fileUploadInfo)
    opts = append(opts, fileUploadOpt)
}

if header != "" {
    headerInfo = stringToDict(header)
    headerOpt = crawlerx.headers(headerInfo)
    opts = append(opts, headerOpt)
}

if rawHeaders != "" {
    opts = append(opts, crawlerx.rawHeaders(rawHeaders))
}

if rawCookie != "" {
    opts = append(opts, crawlerx.rawCookie(rawCookie))
}

if cookie != "" {
    cookieInfo = stringToDict(cookie)
    cookieOpt = crawlerx.cookies(cookieInfo)
    opts = append(opts, cookieOpt)
}

if scanRange != "" {
    scanRangeItem = scanRangeMap[scanRange]
    scanRangeOpt = crawlerx.scanRangeLevel(scanRangeItem)
    opts = append(opts, scanRangeOpt)
}

if scanRepeat != "" {
    scanRepeatItem = scanRepeatMap[scanRepeat]
    scanRepeatOpt = crawlerx.scanRepeatLevel(scanRepeatItem)
    opts = append(opts, scanRepeatOpt)
}

if maxUrl != 0 {
    opts = append(opts, crawlerx.maxUrl(maxUrl))
}

if maxDepth != 0 {
    opts = append(opts, crawlerx.maxDepth(maxDepth))
}

if extraWaitLoad != 0 {
    opts = append(opts, crawlerx.extraWaitLoadTime(extraWaitLoad))
}

if ignoreQuery != "" {
    queries = ignoreQuery.Split(",")
    opts = append(opts, crawlerx.ignoreQueryName(queries...))
}

if blacklist != "" {
    opts = append(opts, crawlerx.blacklist(blacklist.Split(",")...))
}

if whitelist != "" {
    opts = append(opts, crawlerx.whitelist(whitelist.Split(",")...))
}

if sensitiveWords != "" {
    opts = append(opts, crawlerx.sensitiveWords(sensitiveWords.Split(",")))
}

if leakless != "" {
    opts = append(opts, crawlerx.leakless(leakless))
}

ch, err = crawlerx.StartCrawler(targetUrl, opts...)
for item = range ch{
    yakit.Info(item.Method() + " " + item.Url())
}

Data Structure

crawlerx.ReqInfo

爬虫结果数据结构

struct

type ReqInfo interface {
    PtrStructMethods(指针结构方法/函数):
        func Url() return(string)
        func Method() return(string)

        func RequestHeaders() return(map[string]string)
        func RequestBody() return(string)

        func StatusCode() return(int)
        func ResponseHeaders() return(map[string]string)
        func ResponseBody() return(string)
}

methods

func (*ReqInfo) Url() return(r0: string) 爬虫结果的url

func (*ReqInfo) Method() return(string) 爬虫结果的请求方法

func (*ReqInfo) RequestHeaders() return(map[string]string) 爬虫结果的请求包头文件

func (*ReqInfo) RequestBody() return(string) 爬虫结果的请求包body

func (*ReqInfo) RequestRaw() return([]byte, error) 爬虫结果的原生请求包

func (*ReqInfo) StatusCode() return(int) 爬虫结果的返回包状态码

func (*ReqInfo) ResponseHeaders() return(map[string]string) 爬虫结果的返回包头文件

func (*ReqInfo) ResponseBody() return(string) 爬虫结果的返回包body

func (*ReqInfo) Type() return(string) 爬虫结果的类型如hijack_result/event url/js url/file upload result

func (*ReqInfo) From() return(string) 该url来源链接，即从哪个链接得到的该链接

API

crawlerx.StartCrawler

设置爬虫参数开始爬虫任务

定义

func crawlerx.StartCrawler(url: string, opts: ...crawlerx.ConfigOpt) return (ch: chan crawlerx.ReqInfo, err: error)

参数

参数名	参数类型	参数解释
url	string	渗透目标
opts	...crawlerx.ConfigOpt	扫描参数

返回值

返回值	返回值类型	返回值解释
ch	chan crawlerx.ReqInfo	爬虫结果传递channel
err	error	错误信息

crawlerx.PageScreenShot

访问固定页面加载完成后截图

定义

func crawlerx.PageScreenShot(url: string, opts: ...crawlerx.ConfigOpt) return (screenshot: string, err: error)

参数

参数名	参数类型	参数解释
url	string	截图目标url
opts	...crawlerx.ConfigOpt	访问参数

返回值

返回值	返回值类型	返回值解释
screenshot	string	目标url页面截图的b64编码
err	error	错误信息

crawlerx.browserInfo

设置浏览器参数

定义

func crawlerx.browserInfo(info: string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
info	string	浏览器参数

浏览器参数为一个json字符串：

{
    "ws_address":"",
    "exe_path":"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome",
    "proxy_address":"http://127.0.0.1:8083",
    "proxy_username":"",
    "proxy_password":"",
}

其中ws_address为远程chrome浏览器地址，exe_path为chrome浏览器可执行文件的路径，这两个参数设置一个就可以，不设置则会默认下载chrome浏览器并运行

proxy_address为代理地址，proxy_username和proxy_password分别为代理的用户名和密码（需要则填写）

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.maxUrl

最大爬虫数量设置

定义

func crawlerx.maxUrl(maxUrlNum: int) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
maxUrlNum	int	最大爬取url数量

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.maxDepth

设置最大爬取深度

定义

func crawlerx.maxDepth(depth: int) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
depth	int	最大爬虫深度

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.concurrent

最大浏览器打开页面数量（相当于并行数量）

定义

func crawlerx.concurrent(concurrentNumber: int) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
concurrentNumber	int	最大浏览器打开页面数量

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.blackList

爬虫黑名单参数设置

定义

func crawlerx.blackList(keywords: ...string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
keywords	...string	黑名单关键词

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.whiteList

爬虫白名单参数设置

定义

func crawlerx.whiteList(keywords: ...string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
keywords	...string	白名单关键词

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.pageTimeout

爬虫单页面超时时间设置

定义

func crawlerx.pageTimeout(timeout: int) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
timeout	int	单页面超时时间

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.fullTimeout

爬虫全局超时时间设置

定义

func crawlerx.fullTimeout(timeout: int) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
timeout	int	爬虫全局超时时间

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.extraWaitLoadTime

设置页面的额外等待时间因为有些时候通过devtools拿到的页面状态为加载完成但是实际上页面仍然在渲染部分内容此时可以通过该函数进行额外的等待时间的设置

定义

func crawlerx.extraWaitLoadTime(timeout: int) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
timeout	int	额外等待时间 (单位Millisecond)

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.formFill

爬虫表单填写设置

定义

func crawlerx.formFill(formFills: map[string]string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
formFills	map[string]string	表单填写内容字典

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.fileInput

爬虫文件上传设置

定义

func crawlerx.fileInput(fileInput: map[string]string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
fileInput	map[string]string	上传文件设置

参数map的key为关键词 value为文件路径；当key为default时 value为默认上传文件

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.headers

爬虫request的header设置

定义

func crawlerx.headers(headers: map[string]string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
headers	map[string]string	header内容

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.rawHeaders

爬虫request的header设置

定义

func crawlerx.rawHeaders(headersInfo: string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
headersInfo	string	header内容

输入为数据包中的原生headers字符串

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.cookies

爬虫request的cookie设置

定义

func crawlerx.cookies(cookies: map[string]string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
cookies	map[string]string	cookie内容

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.rawCookie

爬虫request的cookie设置

定义

func crawlerx.rawCookie(cookieInfo: string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
cookieInfo	string	cookie内容

输入为数据包中的原生cookie字符串

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.scanRangeLevel

爬虫爬取范围

定义

func crawlerx.scanRangeLevel(scanRange: crawlerx.scanRangeLevel) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
scanRange	crawlerx.scanRangeLevel	爬虫爬取范围等级

crawlerx.scanRangeLevel 包括以下几种：

crawlerx.AllDomainScan 表示爬取全域名（默认）

crawlerx.SubMenuScan 表示爬取目标URL和子目录

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.scanRepeatLevel

爬虫结果重复过滤设置

定义

func crawlerx.scanRepeatLevel(scanRepeat: crawlerx.repeatLevel) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
scanRepeat	crawlerx.repeatLevel	爬虫结果重复过滤等级

crawlerx.repeatLevel 包括以下几种：

crawlerx.UnLimitRepeat 对page，method，query-name，query-value和post-data敏感

crawlerx.LowRepeatLevel 对page，method，query-name和query-value敏感（默认）

crawlerx.MediumRepeatLevel 对page，method和query-name敏感

crawlerx.HighRepeatLevel 对page和method敏感

crawlerx.ExtremeRepeatLevel 对page敏感

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.ignoreQueryName

url中的query名称查重忽略设置

定义

func crawlerx.ignoreQueryName(queryNames: ...string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
queryNames	...string	需要跳过查重筛查的query名称

例如现在存在如下几个url：

这两条url可能由于一些特殊情况，导致query中的token不一致，但是页面内容相同，但是两个url毕竟不一致，所以程序默认会认为两个不一样的url都需要进行访问

此时为了避免这种情况我们可以将token输入crawlerx.ignoreQueryName，让程序在进行url去重时忽略token：

... ...
ignore = crawlerx.ignoreQueryName("token")
ch = crawlerx.StartCrawler(urlStr, ignore)
... ...

此时上面两个url在去重检测时会被认为是同一个url，只会对其中一个进行访问

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.sensitiveWords

敏感词设置，遇到元素中存在敏感词则不会进行点击

定义

func crawlerx.sensitiveWords(words: []string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
words	[]string	需要过滤的敏感词

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.leakless

浏览器是否自动进程关闭设置浏览器自动进程关闭进行在windows下会报病毒默认在windows下会关闭如在windows下开启请关闭相关安全软件当关闭时如果强制关闭爬虫进程时chrome.exe会存在后台过多时需要手动进行关闭默认是default, 强制开启为true，强制关闭为false

定义

func crawlerx.leakless(leakless: string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
leakless	string	自动进程关闭设置

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.localStorage

在当前域名下的localstorage中存储键值对

定义

func crawlerx.localStorage(storage: map[string]string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
storage	map[string]string	要存在当前域名下localstorage的键值映射

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.invalidSuffix

设置非法后缀，遇到url拥有该后缀时不进行访问

定义

func crawlerx.invalidSuffix(suffix: []string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
suffix	[]string	不进行爬虫的url后缀切片

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.stealth

设置是否运行反-反爬虫代码

定义

func crawlerx.stealth(stealth: bool) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
stealth	bool	设置是否运行反-反爬虫代码

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.runtimeID

设置爬虫的runtimeID

定义

func crawlerx.runtimeID(id: string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
id	string	设置的runtimeID

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.evalJs

设置在爬到固定页面时执行指定的js代码

定义

func crawlerx.evalJs(target: string, evalJs string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
target	string	执行对应js代码的目标url
evalJs	string	要执行的js代码内容

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.jsResultSend

设置在获得运行js代码结果后的处理（通常指结果传出）

定义

func crawlerx.jsResultSend(send: func(string)) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
send	func(string)	对js代码执行结果的字符串进行操作的函数

stack = make([]string, 0)
strFunc = func(s){
    stack = append(stack, s)
}
opt = crawlerx.jsResultSend(strFunc)

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.vue

强制设置爬虫模式为vue模式，即事件驱动爬虫

定义

func crawlerx.vue(vue: bool) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
vue	bool	是否执行vue模式

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

crawlerx.response

设置指定url的response

定义

func crawlerx.response(targetUrl: string, response: string) return (r0: crawlerx.ConfigOpt)

参数

参数名	参数类型	参数解释
targetUrl	string	指定特定响应内容的url
response	string	指定的响应内容字符串

response为原生response：

"HTTP/1.1 200\r\nSet-Cookie: JSESSSIONID=E8ECA470AF9F5385159DE0E8E9BD6726; Path=/; HttpOnly\r\nContent-Type: text/html; charset=utf-8\r\nDate: Wed, 01 Nov2023 03:44:53GMT\r\nContent-Length: 35\r\n\r\ne165421110ba03099a1c393373c5b43\n\r\n"

返回值

返回值	返回值类型	返回值解释
r0	crawlerx.ConfigOpt	参数设置函数

Documentation ¶

Overview ¶

Package crawlerx @Author bcy2007 2023/7/13 11:10

Package crawlerx @Author bcy2007 2023/7/12 17:31

Package crawlerx @Author bcy2007 2023/7/12 16:20

Package crawlerx @Author bcy2007 2023/7/12 16:31

Package crawlerx @Author bcy2007 2023/7/13 11:32

Package crawlerx @Author bcy2007 2023/7/12 17:42

Package crawlerx @Author bcy2007 2023/7/12 16:56

Package crawlerx @Author bcy2007 2023/8/1 11:09

Package crawlerx @Author bcy2007 2023/7/17 11:01

Package crawlerx @Author bcy2007 2023/7/14 10:44

Package crawlerx @Author bcy2007 2023/7/14 10:31

Package crawlerx @Author bcy2007 2023/7/14 11:07

Package crawlerx @Author bcy2007 2023/7/12 16:42

Package crawlerx @Author bcy2007 2023/11/1 10:16

Package crawlerx @Author bcy2007 2023/7/12 16:19

Package crawlerx @Author bcy2007 2023/7/13 11:57

Package crawlerx @Author bcy2007 2023/7/12 17:40

Index ¶

Variables
func BrowserEachEvent(browser *rod.Browser, sessionID proto.TargetSessionID, ...) func()
func EvalOnPage(page *rod.Page, evalJs string) (*proto.RuntimeRemoteObject, error)
func GetSortedQuery(rawQuery string) (query []string, err error)
func NewPageScreenShot(targetUrl string, opts ...ConfigOpt) (code string, err error)
func StartCrawler(url string, opts ...ConfigOpt) (chan ReqInfo, error)
func StartCrawlerTest(url string, opts ...ConfigOpt) (chan ReqInfo, error)
func StringArrayContains(array []string, element string) bool
func StringArrayCover(array []string, element string) (bool, string)
func StringPrefixList(origin string, prefixes []string) bool
func StringSuffixList(s string, suffixes []string) bool
func TargetUrlCheck(targetUrl string, proxy *url.URL) (string, error)
type AIInput
type AIInputResult
type BaseConfig
type BrowserConfig
- func NewBrowserConfig(exePath, wsAddress string, proxyAddress *url.URL) *BrowserConfig
type BrowserInfo
type BrowserManager
- func NewBrowserManager(config *Config) *BrowserManager
- func (manager *BrowserManager) CreateBrowserStarters()
- func (manager *BrowserManager) Start()
- func (manager *BrowserManager) Test()
type BrowserStarter
- func NewBrowserStarter(browserConfig *BrowserConfig, baseConfig *BaseConfig) *BrowserStarter
- func (starter *BrowserStarter) ActionOnPage(page *rod.Page) error
- func (starter *BrowserStarter) GetFormFill(element *rod.Element) string
- func (starter *BrowserStarter) GetUploadFile(element *rod.Element) string
- func (starter *BrowserStarter) HttpPostFile(element *rod.Element) error
- func (starter *BrowserStarter) Start()
- func (starter *BrowserStarter) Test()
type Config
- func NewConfig() *Config
type ConfigOpt
- func WithAIInputInf(info string) ConfigOpt
- func WithAIInputUrl(url string) ConfigOpt
- func WithBlackList(keywords ...string) ConfigOpt
- func WithBrowserData(browserConfig *BrowserConfig) ConfigOpt
- func WithBrowserInfo(data string) ConfigOpt
- func WithConcurrent(concurrent int) ConfigOpt
- func WithContext(ctx context.Context) ConfigOpt
- func WithCookieInfo(domain, cookieInfo string) ConfigOpt
- func WithCookies(domain string, cookiesInfo map[string]string) ConfigOpt
- func WithEvalJs(target string, evalJs string) ConfigOpt
- func WithExtraWaitLoadTime(extraWaitLoadTime int) ConfigOpt
- func WithFileInput(fileInput map[string]string) ConfigOpt
- func WithFormFill(formFills map[string]string) ConfigOpt
- func WithFromPlugin(fromPlugin string) ConfigOpt
- func WithFullTimeout(timeout int) ConfigOpt
- func WithHeaderInfo(headerInfo string) ConfigOpt
- func WithHeaders(headersInfo map[string]string) ConfigOpt
- func WithIgnoreQueryName(names ...string) ConfigOpt
- func WithInvalidSuffix(suffix []string) ConfigOpt
- func WithJsResultSave(storage func(s string)) ConfigOpt
- func WithLeakless(leakless string) ConfigOpt
- func WithLocalStorage(storage map[string]string) ConfigOpt
- func WithMaxDepth(depth int) ConfigOpt
- func WithMaxUrl(maxUrl int) ConfigOpt
- func WithPageSizedWaitGroup(pageSizedWaitGroup *utils.SizedWaitGroup) ConfigOpt
- func WithPageTimeout(timeout int) ConfigOpt
- func WithPageVisitFilter(pageVisitFilter *tools.StringCountFilter) ConfigOpt
- func WithResponse(targetUrl string, response string) ConfigOpt
- func WithResultChannel(ch chan ReqInfo) ConfigOpt
- func WithResultSentFilter(resultSentFilter *tools.StringCountFilter) ConfigOpt
- func WithRuntimeID(id string) ConfigOpt
- func WithSaveToDB(b bool) ConfigOpt
- func WithScanRangeLevel(scanRange scanRangeLevel) ConfigOpt
- func WithScanRepeatLevel(scanRepeat repeatLevel) ConfigOpt
- func WithSensitiveWords(words []string) ConfigOpt
- func WithSessionStorage(storage map[string]string) ConfigOpt
- func WithSourceType(sourceType string) ConfigOpt
- func WithStartWaitGroup(waitGroup *utils.SizedWaitGroup) ConfigOpt
- func WithStealth(stealth bool) ConfigOpt
- func WithTargetUrl(targetUrl string) ConfigOpt
- func WithUChan(uChan *tools.UChan) ConfigOpt
- func WithUrlCheck(check bool) ConfigOpt
- func WithUrlTree(tree *tools.UrlTree) ConfigOpt
- func WithVue(vue bool) ConfigOpt
- func WithWhiteList(keywords ...string) ConfigOpt
type CrawlerCore
- func NewCrawlerCore(targetUrl string, opts ...ConfigOpt) (*CrawlerCore, error)
- func (core *CrawlerCore) Start()
- func (core *CrawlerCore) Test()
type CrawlerHijack
- func (hijack *CrawlerHijack) ContinueRequest(cq *proto.FetchContinueRequest)
- func (hijack *CrawlerHijack) LoadResponse(opts []lowhttp.LowhttpOpt, loadBody bool) error
type CrawlerHijackHandler
type CrawlerHijackRequest
- func (hijack *CrawlerHijackRequest) Body() string
- func (hijack *CrawlerHijackRequest) Header(key string) string
- func (hijack *CrawlerHijackRequest) Headers() proto.NetworkHeaders
- func (hijack *CrawlerHijackRequest) IsNavigation() bool
- func (hijack *CrawlerHijackRequest) JSONBody() gson.JSON
- func (hijack *CrawlerHijackRequest) Method() string
- func (hijack *CrawlerHijackRequest) Req() *http.Request
- func (hijack *CrawlerHijackRequest) SetBody(obj interface{}) *CrawlerHijackRequest
- func (hijack *CrawlerHijackRequest) SetContext(ctx context.Context) *CrawlerHijackRequest
- func (hijack *CrawlerHijackRequest) Type() proto.NetworkResourceType
- func (hijack *CrawlerHijackRequest) URL() *url.URL
type CrawlerHijackResponse
- func (hijack *CrawlerHijackResponse) Body() string
- func (hijack *CrawlerHijackResponse) Fail(reason proto.NetworkErrorReason) *CrawlerHijackResponse
- func (hijack *CrawlerHijackResponse) Headers() http.Header
- func (hijack *CrawlerHijackResponse) Payload() *proto.FetchFulfillRequest
- func (hijack *CrawlerHijackResponse) SetBody(obj interface{}) *CrawlerHijackResponse
- func (hijack *CrawlerHijackResponse) SetHeader(pairs ...string) *CrawlerHijackResponse
type CrawlerRouter
- func NewBrowserHijackRequests(browser *rod.Browser) *CrawlerRouter
- func NewPageHijackRequests(page *rod.Page) *CrawlerRouter
- func (router *CrawlerRouter) Add(pattern string, resourceType proto.NetworkResourceType, ...) error
- func (router *CrawlerRouter) Run()
- func (router *CrawlerRouter) Stop() error
type HijackRequest
type HijackResponse
type HttpRequest
- func CreateFileRequest(url, method string, params, files map[string]string) *HttpRequest
- func CreateGetRequest(url string) *HttpRequest
- func CreateRequest() *HttpRequest
- func (request *HttpRequest) Do() error
- func (request *HttpRequest) GetRequest() error
- func (request *HttpRequest) GetUrl() string
- func (request *HttpRequest) MultiPartRequest() error
- func (request *HttpRequest) PostRequest() error
- func (request *HttpRequest) Request() error
- func (request *HttpRequest) Show() (string, error)
type JSEval
- func CreateJsEval() *JSEval
type JsResultSave
type JsResults
type OutputBody
type OutputHeader
type OutputRequest
type OutputResponse
type OutputResult
- func GeneratorOutput(reqInfo ReqInfo) *OutputResult
type OutputResults
type ReqInfo
type RequestResult
- func (result *RequestResult) From() string
- func (result *RequestResult) Method() string
- func (result *RequestResult) RequestBody() string
- func (result *RequestResult) RequestHeaders() map[string]string
- func (result *RequestResult) RequestRaw() ([]byte, error)
- func (result *RequestResult) ResponseBody() string
- func (result *RequestResult) ResponseHeaders() map[string]string
- func (result *RequestResult) Screenshot() string
- func (result *RequestResult) StatusCode() int
- func (result *RequestResult) Type() string
- func (result *RequestResult) Url() string
type SimpleResult
- func (simpleResult *SimpleResult) From() string
- func (simpleResult *SimpleResult) Method() string
- func (simpleResult *SimpleResult) RequestBody() string
- func (simpleResult *SimpleResult) RequestHeaders() map[string]string
- func (simpleResult *SimpleResult) RequestRaw() ([]byte, error)
- func (simpleResult *SimpleResult) ResponseBody() string
- func (simpleResult *SimpleResult) ResponseHeaders() map[string]string
- func (simpleResult *SimpleResult) Screenshot() string
- func (*SimpleResult) StatusCode() int
- func (simpleResult *SimpleResult) Type() string
- func (simpleResult *SimpleResult) Url() string
type TestHijackRequest
- func (testHijackRequest *TestHijackRequest) Body() string
- func (testHijackRequest *TestHijackRequest) Header(key string) string
- func (testHijackRequest *TestHijackRequest) Headers() proto.NetworkHeaders
- func (testHijackRequest *TestHijackRequest) JSONBody() gson.JSON
- func (testHijackRequest *TestHijackRequest) Method() string
- func (testHijackRequest *TestHijackRequest) Req() *http.Request
- func (testHijackRequest *TestHijackRequest) Type() proto.NetworkResourceType
- func (testHijackRequest *TestHijackRequest) URL() *url.URL

Constants ¶

This section is empty.

Variables ¶

View Source

var CrawlerXExports = map[string]interface{}{
	"StartCrawler":   StartCrawler,
	"PageScreenShot": NewPageScreenShot,

	"browserInfo":       WithBrowserInfo,
	"saveToDB":          WithSaveToDB,
	"runtimeId":         WithRuntimeID,
	"maxUrl":            WithMaxUrl,
	"maxDepth":          WithMaxDepth,
	"concurrent":        WithConcurrent,
	"blacklist":         WithBlackList,
	"whitelist":         WithWhiteList,
	"pageTimeout":       WithPageTimeout,
	"fullTimeout":       WithFullTimeout,
	"extraWaitLoadTime": WithExtraWaitLoadTime,
	"formFill":          WithFormFill,
	"fileInput":         WithFileInput,
	"headers":           WithHeaders,
	"rawHeaders":        WithHeaderInfo,
	"cookies":           WithCookies,
	"rawCookie":         WithCookieInfo,
	"scanRangeLevel":    WithScanRangeLevel,
	"scanRepeatLevel":   WithScanRepeatLevel,
	"ignoreQueryName":   WithIgnoreQueryName,
	"sensitiveWords":    WithSensitiveWords,
	"leakless":          WithLeakless,
	"localStorage":      WithLocalStorage,
	"sessionStorage":    WithSessionStorage,
	"invalidSuffix":     WithInvalidSuffix,
	"stealth":           WithStealth,
	"runtimeID":         WithRuntimeID,
	"evalJs":            WithEvalJs,
	"jsResultSend":      WithJsResultSave,
	"vue":               WithVue,
	"response":          WithResponse,
	"sourceType":        WithSourceType,
	"fromPlugin":        WithFromPlugin,
	"urlCheck":          WithUrlCheck,
	"aiInputUrl":        WithAIInputUrl,
	"aiInputInfo":       WithAIInputInf,

	"UnLimitRepeat":      unlimited,
	"LowRepeatLevel":     lowLevel,
	"MediumRepeatLevel":  midLevel,
	"HighRepeatLevel":    highLevel,
	"ExtremeRepeatLevel": extremeLevel,

	"AllDomainScan":       mainDomain,
	"SubMenuScan":         subDomain,
	"UnlimitedDomainScan": unlimitedDomain,
}

View Source

var RepeatLevelMap = map[int]repeatLevel{
	0: unlimited,
	1: lowLevel,
	2: midLevel,
	3: highLevel,
	4: extremeLevel,
}

View Source

var ScanRangeLevelMap = map[int]scanRangeLevel{
	0: mainDomain,
	1: subDomain,
	2: unlimitedDomain,
}

Functions ¶

func BrowserEachEvent ¶

func BrowserEachEvent(browser *rod.Browser, sessionID proto.TargetSessionID, callbacks ...interface{}) func()

func EvalOnPage ¶

func EvalOnPage(page *rod.Page, evalJs string) (*proto.RuntimeRemoteObject, error)

func GetSortedQuery ¶

func GetSortedQuery(rawQuery string) (query []string, err error)

func NewPageScreenShot ¶

func NewPageScreenShot(targetUrl string, opts ...ConfigOpt) (code string, err error)

func StartCrawler ¶

func StartCrawler(url string, opts ...ConfigOpt) (chan ReqInfo, error)

StartCrawler 开启一个无头浏览器模拟点击爬虫任务第一个参数为目标url，后面可以添加零个或多个请求选项，用于对此次请求进行配置返回值包括channel和错误，从channel中获取爬虫结果

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.pageTimeout(30), crawlerx.concurrent(3))
for item = range ch {
	yakit.Info(item.Method() + " " + item.Url())
}

```

func StartCrawlerTest ¶

func StartCrawlerTest(url string, opts ...ConfigOpt) (chan ReqInfo, error)

func StringArrayContains ¶

func StringArrayContains(array []string, element string) bool

func StringArrayCover ¶

func StringArrayCover(array []string, element string) (bool, string)

func StringPrefixList ¶

func StringPrefixList(origin string, prefixes []string) bool

func StringSuffixList ¶

func StringSuffixList(s string, suffixes []string) bool

func TargetUrlCheck ¶

func TargetUrlCheck(targetUrl string, proxy *url.URL) (string, error)

Types ¶

type AIInput ¶

type AIInput struct {
	HtmlCod   string `json:"html_cod"`
	OtherInfo string `json:"other_info"`
}

type AIInputResult ¶

type AIInputResult struct {
	Element   string `json:"element"`
	DButt     bool   `json:"dButt"`
	TextInput string `json:"text_input"`
}

type BaseConfig ¶

type BaseConfig struct {
	// contains filtered or unexported fields
}

type BrowserConfig ¶

type BrowserConfig struct {
	// contains filtered or unexported fields
}

func NewBrowserConfig ¶

func NewBrowserConfig(exePath, wsAddress string, proxyAddress *url.URL) *BrowserConfig

type BrowserInfo ¶

type BrowserInfo struct {
	ExePath       string `json:"exe_path,omitempty"`
	WsAddress     string `json:"ws_address,omitempty"`
	ProxyAddress  string `json:"proxy_address,omitempty"`
	ProxyUsername string `json:"proxy_username,omitempty"`
	ProxyPassword string `json:"proxy_password,omitempty"`
}

type BrowserManager ¶

type BrowserManager struct {
	// contains filtered or unexported fields
}

func NewBrowserManager ¶

func NewBrowserManager(config *Config) *BrowserManager

func (*BrowserManager) CreateBrowserStarters ¶

func (manager *BrowserManager) CreateBrowserStarters()

func (*BrowserManager) Start ¶

func (manager *BrowserManager) Start()

func (*BrowserManager) Test ¶

func (manager *BrowserManager) Test()

type BrowserStarter ¶

type BrowserStarter struct {
	// contains filtered or unexported fields
}

func NewBrowserStarter ¶

func NewBrowserStarter(browserConfig *BrowserConfig, baseConfig *BaseConfig) *BrowserStarter

func (*BrowserStarter) ActionOnPage ¶

func (starter *BrowserStarter) ActionOnPage(page *rod.Page) error

func (*BrowserStarter) GetFormFill ¶

func (starter *BrowserStarter) GetFormFill(element *rod.Element) string

func (*BrowserStarter) GetUploadFile ¶

func (starter *BrowserStarter) GetUploadFile(element *rod.Element) string

func (*BrowserStarter) HttpPostFile ¶

func (starter *BrowserStarter) HttpPostFile(element *rod.Element) error

func (*BrowserStarter) Start ¶

func (starter *BrowserStarter) Start()

func (*BrowserStarter) Test ¶

func (starter *BrowserStarter) Test()

type Config ¶

type Config struct {
	// contains filtered or unexported fields
}

func NewConfig ¶

func NewConfig() *Config

type ConfigOpt ¶

type ConfigOpt func(*Config)

func WithAIInputInf ¶

func WithAIInputInf(info string) ConfigOpt

func WithAIInputUrl ¶

func WithAIInputUrl(url string) ConfigOpt

func WithBlackList ¶

func WithBlackList(keywords ...string) ConfigOpt

blacklist 是一个请求选项用于设置不会被访问的url链接包含的关键词

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.blacklist("logout", "exit", "delete")) // 设置遇到url中包含logout、exit和delete时不会访问
...

```

func WithBrowserData ¶

func WithBrowserData(browserConfig *BrowserConfig) ConfigOpt

func WithBrowserInfo ¶

func WithBrowserInfo(data string) ConfigOpt

browserInfo 是一个请求选项用于配制浏览器参数

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
browserInfo = {
   "ws_address":"",		// 浏览器websocket url
   "exe_path":"",		// 浏览器可执行路径
   "proxy_address":"",	// 代理地址
   "proxy_username":"",	// 代理用户名
   "proxy_password":"",	// 代理密码
}
browserInfoOpt = crawlerx.browserInfo(json.dumps(browserInfo))
ch, err = crawlerx.StartCrawler(targetUrl, browserInfoOpt)
...

```

func WithConcurrent ¶

func WithConcurrent(concurrent int) ConfigOpt

concurrent 是一个请求选项用于设置浏览器同时打开的最大页面数量

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.concurrent(3)) // 设置浏览器同时打开的最大页面数量为3
...

```

func WithContext ¶

func WithContext(ctx context.Context) ConfigOpt

func WithCookieInfo ¶

func WithCookieInfo(domain, cookieInfo string) ConfigOpt

rawCookie 是一个请求选项用于设置爬虫发送请求时的cookie

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
cookie = `Apache=5651982500959.057.1731310579958; ULV=1731310579971:11:1:1:5651982500959.057.1731310579958:1727418057693; ALF=1735783078`
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.rawCookie("testphp.vulnweb.com", cookie)) // 原生cookie输入
...

```

func WithCookies ¶

func WithCookies(domain string, cookiesInfo map[string]string) ConfigOpt

cookies 是一个请求选项用于设置爬虫发送请求时的cookie

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
cookieMap = make(map[string]string, 0)
cookieMap["Apache"] = "5651982500959.057.1731310579958"
cookieMap["ULV"] = "1731310579971:11:1:1:5651982500959.057.1731310579958:1727418057693"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.cookies("testphp.vulnweb.com", cookieMap)) // cookie字典形式输入
...

```

func WithEvalJs ¶

func WithEvalJs(target string, evalJs string) ConfigOpt

func WithExtraWaitLoadTime ¶

func WithExtraWaitLoadTime(extraWaitLoadTime int) ConfigOpt

extraWaitLoadTime 是一个请求选项用于设置页面加载的额外页面等待时间

防止加载vue网站页面时页面状态为加载完成实际仍在加载中的情况

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.extraWaitLoadTime(1000)) // 设置页面加载的额外页面等待时间为1000毫秒
...

```

func WithFileInput ¶

func WithFileInput(fileInput map[string]string) ConfigOpt

fileInput 是一个请求选项用于设置页面遇到input submit时默认上传文件

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
fileMap = make(map[string]string, 0)
fileMap["default"] = "/path/to/file/test.txt"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.fileInput(fileMap)) // 设置遇到输入框元素中存在对应关键词时输入对应内容 默认输入test
...

```

func WithFormFill ¶

func WithFormFill(formFills map[string]string) ConfigOpt

formFill 是一个请求选项用于设置页面输入框填写内容

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
inputMap = make(map[string]string, 0)
inputMap["username"] = "admin"
inputMap["password"] = "123321"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.formFill(inputMap)) // 设置遇到输入框元素中存在对应关键词时输入对应内容 默认输入test
...

```

func WithFromPlugin ¶

func WithFromPlugin(fromPlugin string) ConfigOpt

func WithFullTimeout ¶

func WithFullTimeout(timeout int) ConfigOpt

fullTimeout 是一个请求选项用于设置爬虫任务总超时时间

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.fullTimeout(1800)) // 设置爬虫任务总超时时间为1800秒
...

```

func WithHeaderInfo ¶

func WithHeaderInfo(headerInfo string) ConfigOpt

rawHeaders 是一个请求选项用于设置爬虫发送请求时的headers

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
headers = `Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7
Accept-Encoding: gzip, deflate
Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,ja;q=0.7,zh-TW;q=0.6
Cache-Control: max-age=0
Connection: keep-alive
Host: testphp.vulnweb.com
Upgrade-Insecure-Requests: 1
User-Agent: Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36 `

ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.rawHeaders(headers)) // 原生headers输入
...

```

func WithHeaders ¶

func WithHeaders(headersInfo map[string]string) ConfigOpt

headers 是一个请求选项用于设置爬虫发送请求时的headers

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
headerMap = make(map[string]string, 0)
headerMap["Connection"] = "keep-alive"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.headers(headerMap)) // header以字典形式输入
...

```

func WithIgnoreQueryName ¶

func WithIgnoreQueryName(names ...string) ConfigOpt

ignoreQueryName 是一个请求选项用于设置url中的query名称去重时忽略

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.ignoreQueryName("sid", "tid")) // 设置检测url是否重复时无视sid和tid这两个query
...

```

func WithInvalidSuffix ¶

func WithInvalidSuffix(suffix []string) ConfigOpt

func WithJsResultSave ¶

func WithJsResultSave(storage func(s string)) ConfigOpt

func WithLeakless ¶

func WithLeakless(leakless string) ConfigOpt

func WithLocalStorage ¶

func WithLocalStorage(storage map[string]string) ConfigOpt

func WithMaxDepth ¶

func WithMaxDepth(depth int) ConfigOpt

maxDepth 是一个请求选项用于设置网站最大爬取深度

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.maxDepth(3)) // 设置网站最大爬取深度为3
...

```

func WithMaxUrl ¶

func WithMaxUrl(maxUrl int) ConfigOpt

maxUrl 是一个请求选项用于设置最大爬取url数量

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.maxUrl(100)) // 设置最大爬取url数量为100
...

```

func WithPageSizedWaitGroup ¶

func WithPageSizedWaitGroup(pageSizedWaitGroup *utils.SizedWaitGroup) ConfigOpt

func WithPageTimeout ¶

func WithPageTimeout(timeout int) ConfigOpt

pageTimeout 是一个请求选项用于设置单个页面超时时间

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.pageTimeout(30)) // 设置单个页面超时时间为30秒
...

```

func WithPageVisitFilter ¶

func WithPageVisitFilter(pageVisitFilter *tools.StringCountFilter) ConfigOpt

func WithResponse ¶

func WithResponse(targetUrl string, response string) ConfigOpt

func WithResultChannel ¶

func WithResultChannel(ch chan ReqInfo) ConfigOpt

func WithResultSentFilter ¶

func WithResultSentFilter(resultSentFilter *tools.StringCountFilter) ConfigOpt

func WithRuntimeID ¶

func WithRuntimeID(id string) ConfigOpt

func WithSaveToDB ¶

func WithSaveToDB(b bool) ConfigOpt

func WithScanRangeLevel ¶

func WithScanRangeLevel(scanRange scanRangeLevel) ConfigOpt

scanRangeLevel 是一个请求选项用于设置爬虫扫描范围

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
scanRangeOpt = crawlerx.scanRangeLevel(crawlerx.AllDomainScan)	// 主域名扫描
// scanRangeOpt = crawlerx.scanRangeLevel(crawlerx.SubMenuScan)	// 子域名扫描
// scanRangeOpt = crawlerx.scanRangeLevel(crawlerx.UnlimitedDomainScan)	// 无限制扫描
ch, err = crawlerx.StartCrawler(targetUrl, scanRangeOpt)
...

```

func WithScanRepeatLevel ¶

func WithScanRepeatLevel(scanRepeat repeatLevel) ConfigOpt

scanRepeatLevel 是一个请求选项用于设置爬虫去重强度

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
scanRepeatOpt = crawlerx.scanRepeatLevel(crawlerx.UnLimitRepeat)	// 对page，method，query-name，query-value和post-data敏感
// scanRepeatOpt = crawlerx.scanRepeatLevel(crawlerx.LowRepeatLevel)	// 对page，method，query-name和query-value敏感（默认）
// scanRepeatOpt = crawlerx.scanRepeatLevel(crawlerx.MediumRepeatLevel)	// 对page，method和query-name敏感
// scanRepeatOpt = crawlerx.scanRepeatLevel(crawlerx.HighRepeatLevel)	// 对page和method敏感
// scanRepeatOpt = crawlerx.scanRepeatLevel(crawlerx.ExtremeRepeatLevel)	// 对page敏感
ch, err = crawlerx.StartCrawler(targetUrl, scanRepeatOpt)
...

```

func WithSensitiveWords ¶

func WithSensitiveWords(words []string) ConfigOpt

sensitiveWords 是一个请求选项用于设置页面按钮点击时的敏感词

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
sensitiveWords = "logout,delete"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.sensitiveWords(sensitiveWords.Split(","))) // 当按钮所在元素中存在logout和delete关键词时不会点击
...

```

func WithSessionStorage ¶

func WithSessionStorage(storage map[string]string) ConfigOpt

func WithSourceType ¶

func WithSourceType(sourceType string) ConfigOpt

func WithStartWaitGroup ¶

func WithStartWaitGroup(waitGroup *utils.SizedWaitGroup) ConfigOpt

func WithStealth ¶

func WithStealth(stealth bool) ConfigOpt

func WithTargetUrl ¶

func WithTargetUrl(targetUrl string) ConfigOpt

func WithUChan ¶

func WithUChan(uChan *tools.UChan) ConfigOpt

func WithUrlCheck ¶

func WithUrlCheck(check bool) ConfigOpt

urlCheck 是一个请求选项用于设置是否在爬虫前进行url存活检测

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.urlCheck(true))
...

```

func WithUrlTree ¶

func WithUrlTree(tree *tools.UrlTree) ConfigOpt

func WithVue ¶

func WithVue(vue bool) ConfigOpt

func WithWhiteList ¶

func WithWhiteList(keywords ...string) ConfigOpt

whitelist 是一个请求选项用于设置只会被访问的url链接中包含的关键词

Examples: ```

targetUrl = "http://testphp.vulnweb.com/"
ch, err = crawlerx.StartCrawler(targetUrl, crawlerx.whitelist("test", "click")) // 设置只会访问url中包含test和click的链接
...

```

type CrawlerCore ¶

type CrawlerCore struct {
	// contains filtered or unexported fields
}

func NewCrawlerCore ¶

func NewCrawlerCore(targetUrl string, opts ...ConfigOpt) (*CrawlerCore, error)

func (*CrawlerCore) Start ¶

func (core *CrawlerCore) Start()

func (*CrawlerCore) Test ¶

func (core *CrawlerCore) Test()

type CrawlerHijack ¶

type CrawlerHijack struct {
	Request  *CrawlerHijackRequest
	Response *CrawlerHijackResponse
	OnError  func(error)

	Skip bool

	CustomState interface{}
	// contains filtered or unexported fields
}

func (*CrawlerHijack) ContinueRequest ¶

func (hijack *CrawlerHijack) ContinueRequest(cq *proto.FetchContinueRequest)

func (*CrawlerHijack) LoadResponse ¶

func (hijack *CrawlerHijack) LoadResponse(opts []lowhttp.LowhttpOpt, loadBody bool) error

type CrawlerHijackHandler ¶

type CrawlerHijackHandler struct {
	// contains filtered or unexported fields
}

type CrawlerHijackRequest ¶

type CrawlerHijackRequest struct {
	// contains filtered or unexported fields
}

func (*CrawlerHijackRequest) Body ¶

func (hijack *CrawlerHijackRequest) Body() string

func (hijack *CrawlerHijackRequest) Header(key string) string

func (*CrawlerHijackRequest) Headers ¶

func (hijack *CrawlerHijackRequest) Headers() proto.NetworkHeaders

func (*CrawlerHijackRequest) IsNavigation ¶

func (hijack *CrawlerHijackRequest) IsNavigation() bool

func (*CrawlerHijackRequest) JSONBody ¶

func (hijack *CrawlerHijackRequest) JSONBody() gson.JSON

func (*CrawlerHijackRequest) Method ¶

func (hijack *CrawlerHijackRequest) Method() string

func (*CrawlerHijackRequest) Req ¶

func (hijack *CrawlerHijackRequest) Req() *http.Request

func (*CrawlerHijackRequest) SetBody ¶

func (hijack *CrawlerHijackRequest) SetBody(obj interface{}) *CrawlerHijackRequest

func (*CrawlerHijackRequest) SetContext ¶

func (hijack *CrawlerHijackRequest) SetContext(ctx context.Context) *CrawlerHijackRequest

func (*CrawlerHijackRequest) Type ¶

func (hijack *CrawlerHijackRequest) Type() proto.NetworkResourceType

func (*CrawlerHijackRequest) URL ¶

func (hijack *CrawlerHijackRequest) URL() *url.URL

type CrawlerHijackResponse ¶

type CrawlerHijackResponse struct {
	// contains filtered or unexported fields
}

func (*CrawlerHijackResponse) Body ¶

func (hijack *CrawlerHijackResponse) Body() string

func (*CrawlerHijackResponse) Fail ¶

func (hijack *CrawlerHijackResponse) Fail(reason proto.NetworkErrorReason) *CrawlerHijackResponse

func (*CrawlerHijackResponse) Headers ¶

func (hijack *CrawlerHijackResponse) Headers() http.Header

func (*CrawlerHijackResponse) Payload ¶

func (hijack *CrawlerHijackResponse) Payload() *proto.FetchFulfillRequest

func (*CrawlerHijackResponse) SetBody ¶

func (hijack *CrawlerHijackResponse) SetBody(obj interface{}) *CrawlerHijackResponse

func (*CrawlerHijackResponse) SetHeader ¶

func (hijack *CrawlerHijackResponse) SetHeader(pairs ...string) *CrawlerHijackResponse

type CrawlerRouter ¶

type CrawlerRouter struct {
	// contains filtered or unexported fields
}

func NewBrowserHijackRequests ¶

func NewBrowserHijackRequests(browser *rod.Browser) *CrawlerRouter

func NewPageHijackRequests ¶

func NewPageHijackRequests(page *rod.Page) *CrawlerRouter

func (*CrawlerRouter) Add ¶

func (router *CrawlerRouter) Add(pattern string, resourceType proto.NetworkResourceType, handler func(*CrawlerHijack)) error

func (*CrawlerRouter) Run ¶

func (router *CrawlerRouter) Run()

func (*CrawlerRouter) Stop ¶

func (router *CrawlerRouter) Stop() error

type HijackRequest ¶

type HijackRequest interface {
	Type() proto.NetworkResourceType
	Method() string
	URL() *url.URL
	Header(key string) string
	Headers() proto.NetworkHeaders
	Body() string
	JSONBody() gson.JSON
	Req() *http.Request
}

type HijackResponse ¶

type HijackResponse interface {
	Payload() *proto.FetchFulfillRequest
	Body() string
	Headers() http.Header
}

type HttpRequest ¶

type HttpRequest struct {
	// contains filtered or unexported fields
}

func CreateFileRequest ¶

func CreateFileRequest(url, method string, params, files map[string]string) *HttpRequest

func CreateGetRequest ¶

func CreateGetRequest(url string) *HttpRequest

func CreateRequest ¶

func CreateRequest() *HttpRequest

func (*HttpRequest) Do ¶

func (request *HttpRequest) Do() error

func (*HttpRequest) GetRequest ¶

func (request *HttpRequest) GetRequest() error

func (*HttpRequest) GetUrl ¶

func (request *HttpRequest) GetUrl() string

func (*HttpRequest) MultiPartRequest ¶

func (request *HttpRequest) MultiPartRequest() error

func (*HttpRequest) PostRequest ¶

func (request *HttpRequest) PostRequest() error

func (*HttpRequest) Request ¶

func (request *HttpRequest) Request() error

func (*HttpRequest) Show ¶

func (request *HttpRequest) Show() (string, error)

type JSEval ¶

type JSEval struct {
	// contains filtered or unexported fields
}

func CreateJsEval ¶

func CreateJsEval() *JSEval

type JsResultSave ¶

type JsResultSave struct {
	TargetUrl string `json:"target_url"`
	Js        string `json:"js"`
	Result    string `json:"result"`
}

type JsResults ¶

type JsResults []string

type OutputBody ¶

type OutputBody struct {
	Size string `json:"size"`
	Md5  string `json:"md5"`
	Data string `json:"data"`
}

type OutputHeader ¶

type OutputHeader struct {
	Name  string `json:"name"`
	Value string `json:"value"`
}

type OutputRequest ¶

type OutputRequest struct {
	Url     string          `json:"url"`
	Method  string          `json:"method"`
	Headers []*OutputHeader `json:"headers"`
	Body    OutputBody      `json:"body"`
	HTTPRaw string          `json:"http_raw"`
}

type OutputResponse ¶

type OutputResponse struct {
	StatusCode int             `json:"status_code"`
	Headers    []*OutputHeader `json:"headers"`
	Body       OutputBody      `json:"body"`
}

type OutputResult ¶

type OutputResult struct {
	Url      string         `json:"url"`
	Request  OutputRequest  `json:"request"`
	Response OutputResponse `json:"response"`
}

func GeneratorOutput ¶

func GeneratorOutput(reqInfo ReqInfo) *OutputResult

type OutputResults ¶

type OutputResults struct {
	// contains filtered or unexported fields
}

type ReqInfo ¶

type ReqInfo interface {
	Type() string

	Url() string
	Method() string

	RequestHeaders() map[string]string
	RequestBody() string
	RequestRaw() ([]byte, error)

	StatusCode() int
	ResponseHeaders() map[string]string
	ResponseBody() string

	Screenshot() string

	From() string
}

type RequestResult ¶

type RequestResult struct {
	// contains filtered or unexported fields
}

func (*RequestResult) From ¶

func (result *RequestResult) From() string

func (*RequestResult) Method ¶

func (result *RequestResult) Method() string

func (*RequestResult) RequestBody ¶

func (result *RequestResult) RequestBody() string

func (*RequestResult) RequestHeaders ¶

func (result *RequestResult) RequestHeaders() map[string]string

func (*RequestResult) RequestRaw ¶

func (result *RequestResult) RequestRaw() ([]byte, error)

func (*RequestResult) ResponseBody ¶

func (result *RequestResult) ResponseBody() string

func (*RequestResult) ResponseHeaders ¶

func (result *RequestResult) ResponseHeaders() map[string]string

func (*RequestResult) Screenshot ¶

func (result *RequestResult) Screenshot() string

func (*RequestResult) StatusCode ¶

func (result *RequestResult) StatusCode() int

func (*RequestResult) Type ¶

func (result *RequestResult) Type() string

func (*RequestResult) Url ¶

func (result *RequestResult) Url() string

type SimpleResult ¶

type SimpleResult struct {
	// contains filtered or unexported fields
}

func (*SimpleResult) From ¶

func (simpleResult *SimpleResult) From() string

func (*SimpleResult) Method ¶

func (simpleResult *SimpleResult) Method() string

func (*SimpleResult) RequestBody ¶

func (simpleResult *SimpleResult) RequestBody() string

func (*SimpleResult) RequestHeaders ¶

func (simpleResult *SimpleResult) RequestHeaders() map[string]string

func (*SimpleResult) RequestRaw ¶

func (simpleResult *SimpleResult) RequestRaw() ([]byte, error)

func (*SimpleResult) ResponseBody ¶

func (simpleResult *SimpleResult) ResponseBody() string

func (*SimpleResult) ResponseHeaders ¶

func (simpleResult *SimpleResult) ResponseHeaders() map[string]string

func (*SimpleResult) Screenshot ¶

func (simpleResult *SimpleResult) Screenshot() string

func (*SimpleResult) StatusCode ¶

func (*SimpleResult) StatusCode() int

func (*SimpleResult) Type ¶

func (simpleResult *SimpleResult) Type() string

func (*SimpleResult) Url ¶

func (simpleResult *SimpleResult) Url() string

type TestHijackRequest ¶

type TestHijackRequest struct {
	// contains filtered or unexported fields
}

func (*TestHijackRequest) Body ¶

func (testHijackRequest *TestHijackRequest) Body() string

func (testHijackRequest *TestHijackRequest) Header(key string) string

func (*TestHijackRequest) Headers ¶

func (testHijackRequest *TestHijackRequest) Headers() proto.NetworkHeaders

func (*TestHijackRequest) JSONBody ¶

func (testHijackRequest *TestHijackRequest) JSONBody() gson.JSON

func (*TestHijackRequest) Method ¶

func (testHijackRequest *TestHijackRequest) Method() string

func (*TestHijackRequest) Req ¶

func (testHijackRequest *TestHijackRequest) Req() *http.Request

func (*TestHijackRequest) Type ¶

func (testHijackRequest *TestHijackRequest) Type() proto.NetworkResourceType

func (*TestHijackRequest) URL ¶

func (testHijackRequest *TestHijackRequest) URL() *url.URL

Directories ¶

Path	Synopsis
cmd Package cmd @Author bcy2007 2023/7/14 11:11	Package cmd @Author bcy2007 2023/7/14 11:11
preaction Package crawlerx @Author bcy2007 2024/4/2 14:44	Package crawlerx @Author bcy2007 2024/4/2 14:44
tools Package tools @Author bcy2007 2023/7/12 16:40	Package tools @Author bcy2007 2023/7/12 16:40
config Package config https://github.com/unknwon/goconfig	Package config https://github.com/unknwon/goconfig

?	: This menu
/	: Search site
f or F	: Jump to
y or Y	: Canonical URL