26 lines
660 B
Go
26 lines
660 B
Go
|
// Package crawler ...
|
||
|
//
|
||
|
// Description : crawler ...
|
||
|
//
|
||
|
// Author : go_developer@163.com<白茶清欢>
|
||
|
//
|
||
|
// Date : 2021-12-20 4:46 PM
|
||
|
package crawler
|
||
|
|
||
|
import "github.com/gocolly/colly"
|
||
|
|
||
|
// StartHTMLCollector 获取页面爬虫实例
|
||
|
//
|
||
|
// Author : go_developer@163.com<白茶清欢>
|
||
|
//
|
||
|
// Date : 4:47 PM 2021/12/20
|
||
|
func StartHTMLCollector(domainList []string, visitURL string, requestHandler IRequestHandler) error {
|
||
|
c := colly.NewCollector()
|
||
|
// 设置域名白名单, 不设置, 默认所有均可访问
|
||
|
c.AllowedDomains = domainList
|
||
|
c.OnRequest(requestHandler.OnRequest())
|
||
|
// html处理
|
||
|
c.OnHTML(requestHandler.OnHTML())
|
||
|
return c.Visit(visitURL)
|
||
|
}
|