gopkg/crawler/collector.go
2021-12-20 19:03:04 +08:00

29 lines
781 B
Go

// Package crawler ...
//
// Description : crawler ...
//
// Author : go_developer@163.com<白茶清欢>
//
// Date : 2021-12-20 4:46 PM
package crawler
import "github.com/gocolly/colly"
// StartHTMLCollector 获取页面爬虫实例
//
// Author : go_developer@163.com<白茶清欢>
//
// Date : 4:47 PM 2021/12/20
func StartHTMLCollector(domainList []string, visitURL string, requestHandler IRequestHandler) error {
c := colly.NewCollector()
// 设置域名白名单, 不设置, 默认所有均可访问
c.AllowedDomains = domainList
c.OnRequest(requestHandler.OnRequest())
c.OnError(requestHandler.OnError())
// html处理
c.OnHTML(requestHandler.OnHTML())
c.OnResponse(requestHandler.OnResponse())
c.OnScraped(requestHandler.OnScraped())
return c.Visit(visitURL)
}