65 lines
1.3 KiB
Go
65 lines
1.3 KiB
Go
// Package crawler ...
|
|
//
|
|
// Description : crawler ...
|
|
//
|
|
// Author : go_developer@163.com<白茶清欢>
|
|
//
|
|
// Date : 2021-12-20 5:58 PM
|
|
package crawler
|
|
|
|
import (
|
|
"fmt"
|
|
"testing"
|
|
|
|
"github.com/gocolly/colly"
|
|
)
|
|
|
|
// TestStartCollector ...
|
|
//
|
|
// Author : go_developer@163.com<白茶清欢>
|
|
//
|
|
// Date : 5:59 PM 2021/12/20
|
|
func TestStartCollector(t *testing.T) {
|
|
if err := StartHTMLCollector([]string{}, "https://go.zhangdeman.cn", &testHandler{}); nil != err {
|
|
panic("出现异常 : " + err.Error())
|
|
}
|
|
}
|
|
|
|
type testHandler struct {
|
|
}
|
|
|
|
func (t *testHandler) OnRequest() colly.RequestCallback {
|
|
return func(r *colly.Request) {
|
|
fmt.Println("开始请求 : ", r.URL)
|
|
}
|
|
}
|
|
|
|
func (t *testHandler) OnError() colly.ErrorCallback {
|
|
return func(response *colly.Response, err error) {
|
|
fmt.Println("请求异常 : " + err.Error())
|
|
}
|
|
}
|
|
|
|
func (t *testHandler) OnResponse() colly.ResponseCallback {
|
|
return func(response *colly.Response) {
|
|
fmt.Println("响应数据 : ", response.StatusCode)
|
|
}
|
|
}
|
|
|
|
func (t *testHandler) OnHTML() (string, colly.HTMLCallback) {
|
|
return "a[href]", func(e *colly.HTMLElement) {
|
|
link := e.Attr("href")
|
|
|
|
// Print link
|
|
|
|
fmt.Printf("Link found: %q -> %s\n", e.Text, link)
|
|
|
|
}
|
|
}
|
|
|
|
func (t *testHandler) OnScraped() colly.ScrapedCallback {
|
|
return func(response *colly.Response) {
|
|
|
|
}
|
|
}
|