gopkg/crawler/collector_test.go
2021-12-20 19:03:04 +08:00

65 lines
1.3 KiB
Go

// Package crawler ...
//
// Description : crawler ...
//
// Author : go_developer@163.com<白茶清欢>
//
// Date : 2021-12-20 5:58 PM
package crawler
import (
"fmt"
"testing"
"github.com/gocolly/colly"
)
// TestStartCollector ...
//
// Author : go_developer@163.com<白茶清欢>
//
// Date : 5:59 PM 2021/12/20
func TestStartCollector(t *testing.T) {
if err := StartHTMLCollector([]string{}, "https://go.zhangdeman.cn", &testHandler{}); nil != err {
panic("出现异常 : " + err.Error())
}
}
type testHandler struct {
}
func (t *testHandler) OnRequest() colly.RequestCallback {
return func(r *colly.Request) {
fmt.Println("开始请求 : ", r.URL)
}
}
func (t *testHandler) OnError() colly.ErrorCallback {
return func(response *colly.Response, err error) {
fmt.Println("请求异常 : " + err.Error())
}
}
func (t *testHandler) OnResponse() colly.ResponseCallback {
return func(response *colly.Response) {
fmt.Println("响应数据 : ", response.StatusCode)
}
}
func (t *testHandler) OnHTML() (string, colly.HTMLCallback) {
return "a[href]", func(e *colly.HTMLElement) {
link := e.Attr("href")
// Print link
fmt.Printf("Link found: %q -> %s\n", e.Text, link)
}
}
func (t *testHandler) OnScraped() colly.ScrapedCallback {
return func(response *colly.Response) {
}
}