update
This commit is contained in:
parent
336658850a
commit
420de10cb1
@ -19,7 +19,10 @@ func StartHTMLCollector(domainList []string, visitURL string, requestHandler IRe
|
|||||||
// 设置域名白名单, 不设置, 默认所有均可访问
|
// 设置域名白名单, 不设置, 默认所有均可访问
|
||||||
c.AllowedDomains = domainList
|
c.AllowedDomains = domainList
|
||||||
c.OnRequest(requestHandler.OnRequest())
|
c.OnRequest(requestHandler.OnRequest())
|
||||||
|
c.OnError(requestHandler.OnError())
|
||||||
// html处理
|
// html处理
|
||||||
c.OnHTML(requestHandler.OnHTML())
|
c.OnHTML(requestHandler.OnHTML())
|
||||||
|
c.OnResponse(requestHandler.OnResponse())
|
||||||
|
c.OnScraped(requestHandler.OnScraped())
|
||||||
return c.Visit(visitURL)
|
return c.Visit(visitURL)
|
||||||
}
|
}
|
||||||
|
@ -20,7 +20,7 @@ import (
|
|||||||
//
|
//
|
||||||
// Date : 5:59 PM 2021/12/20
|
// Date : 5:59 PM 2021/12/20
|
||||||
func TestStartCollector(t *testing.T) {
|
func TestStartCollector(t *testing.T) {
|
||||||
if err := StartHTMLCollector([]string{}, "http://www.baidu.com", &testHandler{}); nil != err {
|
if err := StartHTMLCollector([]string{}, "https://go.zhangdeman.cn", &testHandler{}); nil != err {
|
||||||
panic("出现异常 : " + err.Error())
|
panic("出现异常 : " + err.Error())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -34,12 +34,16 @@ func (t *testHandler) OnRequest() colly.RequestCallback {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *testHandler) OnError() {
|
func (t *testHandler) OnError() colly.ErrorCallback {
|
||||||
fmt.Println("请求异常 : ")
|
return func(response *colly.Response, err error) {
|
||||||
|
fmt.Println("请求异常 : " + err.Error())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *testHandler) OnResponse() {
|
func (t *testHandler) OnResponse() colly.ResponseCallback {
|
||||||
|
return func(response *colly.Response) {
|
||||||
|
fmt.Println("响应数据 : ", response.StatusCode)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *testHandler) OnHTML() (string, colly.HTMLCallback) {
|
func (t *testHandler) OnHTML() (string, colly.HTMLCallback) {
|
||||||
@ -53,6 +57,8 @@ func (t *testHandler) OnHTML() (string, colly.HTMLCallback) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *testHandler) OnScraped() {
|
func (t *testHandler) OnScraped() colly.ScrapedCallback {
|
||||||
|
return func(response *colly.Response) {
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -20,11 +20,11 @@ type IRequestHandler interface {
|
|||||||
// OnRequest 在发起请求前被调用
|
// OnRequest 在发起请求前被调用
|
||||||
OnRequest() colly.RequestCallback
|
OnRequest() colly.RequestCallback
|
||||||
// OnError 请求过程中如果发生错误被调用
|
// OnError 请求过程中如果发生错误被调用
|
||||||
OnError()
|
OnError() colly.ErrorCallback
|
||||||
// OnResponse 收到回复后被调用
|
// OnResponse 收到回复后被调用
|
||||||
OnResponse()
|
OnResponse() colly.ResponseCallback
|
||||||
// OnHTML 在OnResponse之后被调用,如果收到的内容是HTML
|
// OnHTML 在OnResponse之后被调用,如果收到的内容是HTML
|
||||||
OnHTML() (string, colly.HTMLCallback)
|
OnHTML() (string, colly.HTMLCallback)
|
||||||
// OnScraped 在OnHTML之后被调用
|
// OnScraped 在OnHTML之后被调用
|
||||||
OnScraped()
|
OnScraped() colly.ScrapedCallback
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user