29 lines
781 B
Go
29 lines
781 B
Go
// Package crawler ...
|
|
//
|
|
// Description : crawler ...
|
|
//
|
|
// Author : go_developer@163.com<白茶清欢>
|
|
//
|
|
// Date : 2021-12-20 4:46 PM
|
|
package crawler
|
|
|
|
import "github.com/gocolly/colly"
|
|
|
|
// StartHTMLCollector 获取页面爬虫实例
|
|
//
|
|
// Author : go_developer@163.com<白茶清欢>
|
|
//
|
|
// Date : 4:47 PM 2021/12/20
|
|
func StartHTMLCollector(domainList []string, visitURL string, requestHandler IRequestHandler) error {
|
|
c := colly.NewCollector()
|
|
// 设置域名白名单, 不设置, 默认所有均可访问
|
|
c.AllowedDomains = domainList
|
|
c.OnRequest(requestHandler.OnRequest())
|
|
c.OnError(requestHandler.OnError())
|
|
// html处理
|
|
c.OnHTML(requestHandler.OnHTML())
|
|
c.OnResponse(requestHandler.OnResponse())
|
|
c.OnScraped(requestHandler.OnScraped())
|
|
return c.Visit(visitURL)
|
|
}
|