update
This commit is contained in:
parent
336658850a
commit
420de10cb1
@ -19,7 +19,10 @@ func StartHTMLCollector(domainList []string, visitURL string, requestHandler IRe
|
||||
// 设置域名白名单, 不设置, 默认所有均可访问
|
||||
c.AllowedDomains = domainList
|
||||
c.OnRequest(requestHandler.OnRequest())
|
||||
c.OnError(requestHandler.OnError())
|
||||
// html处理
|
||||
c.OnHTML(requestHandler.OnHTML())
|
||||
c.OnResponse(requestHandler.OnResponse())
|
||||
c.OnScraped(requestHandler.OnScraped())
|
||||
return c.Visit(visitURL)
|
||||
}
|
||||
|
@ -20,7 +20,7 @@ import (
|
||||
//
|
||||
// Date : 5:59 PM 2021/12/20
|
||||
func TestStartCollector(t *testing.T) {
|
||||
if err := StartHTMLCollector([]string{}, "http://www.baidu.com", &testHandler{}); nil != err {
|
||||
if err := StartHTMLCollector([]string{}, "https://go.zhangdeman.cn", &testHandler{}); nil != err {
|
||||
panic("出现异常 : " + err.Error())
|
||||
}
|
||||
}
|
||||
@ -34,12 +34,16 @@ func (t *testHandler) OnRequest() colly.RequestCallback {
|
||||
}
|
||||
}
|
||||
|
||||
func (t *testHandler) OnError() {
|
||||
fmt.Println("请求异常 : ")
|
||||
func (t *testHandler) OnError() colly.ErrorCallback {
|
||||
return func(response *colly.Response, err error) {
|
||||
fmt.Println("请求异常 : " + err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func (t *testHandler) OnResponse() {
|
||||
|
||||
func (t *testHandler) OnResponse() colly.ResponseCallback {
|
||||
return func(response *colly.Response) {
|
||||
fmt.Println("响应数据 : ", response.StatusCode)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *testHandler) OnHTML() (string, colly.HTMLCallback) {
|
||||
@ -53,6 +57,8 @@ func (t *testHandler) OnHTML() (string, colly.HTMLCallback) {
|
||||
}
|
||||
}
|
||||
|
||||
func (t *testHandler) OnScraped() {
|
||||
func (t *testHandler) OnScraped() colly.ScrapedCallback {
|
||||
return func(response *colly.Response) {
|
||||
|
||||
}
|
||||
}
|
||||
|
@ -20,11 +20,11 @@ type IRequestHandler interface {
|
||||
// OnRequest 在发起请求前被调用
|
||||
OnRequest() colly.RequestCallback
|
||||
// OnError 请求过程中如果发生错误被调用
|
||||
OnError()
|
||||
OnError() colly.ErrorCallback
|
||||
// OnResponse 收到回复后被调用
|
||||
OnResponse()
|
||||
OnResponse() colly.ResponseCallback
|
||||
// OnHTML 在OnResponse之后被调用,如果收到的内容是HTML
|
||||
OnHTML() (string, colly.HTMLCallback)
|
||||
// OnScraped 在OnHTML之后被调用
|
||||
OnScraped()
|
||||
OnScraped() colly.ScrapedCallback
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user