This commit is contained in:
白茶清欢 2021-12-20 19:03:04 +08:00
parent 336658850a
commit 420de10cb1
3 changed files with 18 additions and 9 deletions

View File

@ -19,7 +19,10 @@ func StartHTMLCollector(domainList []string, visitURL string, requestHandler IRe
// 设置域名白名单, 不设置, 默认所有均可访问
c.AllowedDomains = domainList
c.OnRequest(requestHandler.OnRequest())
c.OnError(requestHandler.OnError())
// html处理
c.OnHTML(requestHandler.OnHTML())
c.OnResponse(requestHandler.OnResponse())
c.OnScraped(requestHandler.OnScraped())
return c.Visit(visitURL)
}

View File

@ -20,7 +20,7 @@ import (
//
// Date : 5:59 PM 2021/12/20
func TestStartCollector(t *testing.T) {
if err := StartHTMLCollector([]string{}, "http://www.baidu.com", &testHandler{}); nil != err {
if err := StartHTMLCollector([]string{}, "https://go.zhangdeman.cn", &testHandler{}); nil != err {
panic("出现异常 : " + err.Error())
}
}
@ -34,12 +34,16 @@ func (t *testHandler) OnRequest() colly.RequestCallback {
}
}
func (t *testHandler) OnError() {
fmt.Println("请求异常 : ")
func (t *testHandler) OnError() colly.ErrorCallback {
return func(response *colly.Response, err error) {
fmt.Println("请求异常 : " + err.Error())
}
}
func (t *testHandler) OnResponse() {
func (t *testHandler) OnResponse() colly.ResponseCallback {
return func(response *colly.Response) {
fmt.Println("响应数据 : ", response.StatusCode)
}
}
func (t *testHandler) OnHTML() (string, colly.HTMLCallback) {
@ -53,6 +57,8 @@ func (t *testHandler) OnHTML() (string, colly.HTMLCallback) {
}
}
func (t *testHandler) OnScraped() {
func (t *testHandler) OnScraped() colly.ScrapedCallback {
return func(response *colly.Response) {
}
}

View File

@ -20,11 +20,11 @@ type IRequestHandler interface {
// OnRequest 在发起请求前被调用
OnRequest() colly.RequestCallback
// OnError 请求过程中如果发生错误被调用
OnError()
OnError() colly.ErrorCallback
// OnResponse 收到回复后被调用
OnResponse()
OnResponse() colly.ResponseCallback
// OnHTML 在OnResponse之后被调用如果收到的内容是HTML
OnHTML() (string, colly.HTMLCallback)
// OnScraped 在OnHTML之后被调用
OnScraped()
OnScraped() colly.ScrapedCallback
}