59 lines
1.0 KiB
Go
59 lines
1.0 KiB
Go
|
// Package crawler ...
|
||
|
//
|
||
|
// Description : crawler ...
|
||
|
//
|
||
|
// Author : go_developer@163.com<白茶清欢>
|
||
|
//
|
||
|
// Date : 2021-12-20 5:58 PM
|
||
|
package crawler
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"testing"
|
||
|
|
||
|
"github.com/gocolly/colly"
|
||
|
)
|
||
|
|
||
|
// TestStartCollector ...
|
||
|
//
|
||
|
// Author : go_developer@163.com<白茶清欢>
|
||
|
//
|
||
|
// Date : 5:59 PM 2021/12/20
|
||
|
func TestStartCollector(t *testing.T) {
|
||
|
if err := StartHTMLCollector([]string{}, "http://www.baidu.com", &testHandler{}); nil != err {
|
||
|
panic("出现异常 : " + err.Error())
|
||
|
}
|
||
|
}
|
||
|
|
||
|
type testHandler struct {
|
||
|
}
|
||
|
|
||
|
func (t *testHandler) OnRequest() colly.RequestCallback {
|
||
|
return func(r *colly.Request) {
|
||
|
fmt.Println("开始请求 : ", r.URL)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (t *testHandler) OnError() {
|
||
|
fmt.Println("请求异常 : ")
|
||
|
}
|
||
|
|
||
|
func (t *testHandler) OnResponse() {
|
||
|
|
||
|
}
|
||
|
|
||
|
func (t *testHandler) OnHTML() (string, colly.HTMLCallback) {
|
||
|
return "a[href]", func(e *colly.HTMLElement) {
|
||
|
link := e.Attr("href")
|
||
|
|
||
|
// Print link
|
||
|
|
||
|
fmt.Printf("Link found: %q -> %s\n", e.Text, link)
|
||
|
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func (t *testHandler) OnScraped() {
|
||
|
|
||
|
}
|