gopkg/crawler/collector.go

26 lines
660 B
Go
Raw Normal View History

2021-12-20 18:48:35 +08:00
// Package crawler ...
//
// Description : crawler ...
//
// Author : go_developer@163.com<白茶清欢>
//
// Date : 2021-12-20 4:46 PM
package crawler
import "github.com/gocolly/colly"
// StartHTMLCollector 获取页面爬虫实例
//
// Author : go_developer@163.com<白茶清欢>
//
// Date : 4:47 PM 2021/12/20
func StartHTMLCollector(domainList []string, visitURL string, requestHandler IRequestHandler) error {
c := colly.NewCollector()
// 设置域名白名单, 不设置, 默认所有均可访问
c.AllowedDomains = domainList
c.OnRequest(requestHandler.OnRequest())
// html处理
c.OnHTML(requestHandler.OnHTML())
return c.Visit(visitURL)
}