generated from templates/go-generic
173 lines
3.2 KiB
Go
173 lines
3.2 KiB
Go
|
package crawler_test
|
||
|
|
||
|
import (
|
||
|
"fmt"
|
||
|
"slices"
|
||
|
"testing"
|
||
|
|
||
|
"codeflow.dananglin.me.uk/apollo/web-crawler/internal/crawler"
|
||
|
"codeflow.dananglin.me.uk/apollo/web-crawler/internal/util"
|
||
|
)
|
||
|
|
||
|
func TestCrawler(t *testing.T) {
|
||
|
testBaseURL := "https://example.com"
|
||
|
|
||
|
testCrawler, err := crawler.NewCrawler(testBaseURL)
|
||
|
if err != nil {
|
||
|
t.Fatalf("Test 'TestCrawler' FAILED: unexpected error creating the crawler: %v", err)
|
||
|
}
|
||
|
|
||
|
testCasesForEqualDomains := []struct {
|
||
|
name string
|
||
|
rawURL string
|
||
|
want bool
|
||
|
}{
|
||
|
{
|
||
|
name: "Same domain",
|
||
|
rawURL: "https://example.com",
|
||
|
want: true,
|
||
|
},
|
||
|
{
|
||
|
name: "Same domain, different path",
|
||
|
rawURL: "https://example.com/about/contact",
|
||
|
want: true,
|
||
|
},
|
||
|
{
|
||
|
name: "Same domain, different protocol",
|
||
|
rawURL: "http://example.com",
|
||
|
want: true,
|
||
|
},
|
||
|
{
|
||
|
name: "Different domain",
|
||
|
rawURL: "https://blog.person.me.uk",
|
||
|
want: false,
|
||
|
},
|
||
|
{
|
||
|
name: "Different domain, same path",
|
||
|
rawURL: "https://example.org/blog",
|
||
|
want: false,
|
||
|
},
|
||
|
}
|
||
|
|
||
|
for ind, tc := range slices.All(testCasesForEqualDomains) {
|
||
|
t.Run(tc.name, testHasEqualDomains(
|
||
|
testCrawler,
|
||
|
ind+1,
|
||
|
tc.name,
|
||
|
tc.rawURL,
|
||
|
tc.want,
|
||
|
))
|
||
|
}
|
||
|
|
||
|
testCasesForPages := []struct {
|
||
|
rawURL string
|
||
|
wantVisited bool
|
||
|
}{
|
||
|
{
|
||
|
rawURL: "https://example.com/tags/linux",
|
||
|
wantVisited: false,
|
||
|
},
|
||
|
{
|
||
|
rawURL: "https://example.com/blog",
|
||
|
wantVisited: false,
|
||
|
},
|
||
|
{
|
||
|
rawURL: "https://example.com/about/contact.html",
|
||
|
wantVisited: false,
|
||
|
},
|
||
|
{
|
||
|
rawURL: "https://example.com/blog",
|
||
|
wantVisited: true,
|
||
|
},
|
||
|
}
|
||
|
|
||
|
for ind, tc := range slices.All(testCasesForPages) {
|
||
|
name := fmt.Sprintf("Adding %s to the pages map", tc.rawURL)
|
||
|
t.Run(name, testAddPageVisit(
|
||
|
testCrawler,
|
||
|
ind+1,
|
||
|
name,
|
||
|
tc.rawURL,
|
||
|
tc.wantVisited,
|
||
|
))
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func testHasEqualDomains(
|
||
|
testCrawler *crawler.Crawler,
|
||
|
testNum int,
|
||
|
testName string,
|
||
|
rawURL string,
|
||
|
want bool,
|
||
|
) func(t *testing.T) {
|
||
|
return func(t *testing.T) {
|
||
|
t.Parallel()
|
||
|
|
||
|
got, err := testCrawler.HasEqualDomain(rawURL)
|
||
|
if err != nil {
|
||
|
t.Fatalf(
|
||
|
"Test %d - '%s' FAILED: unexpected error: %v",
|
||
|
testNum,
|
||
|
testName,
|
||
|
err,
|
||
|
)
|
||
|
}
|
||
|
|
||
|
if got != want {
|
||
|
t.Errorf(
|
||
|
"Test %d - '%s' FAILED: unexpected domain comparison received: want %t, got %t",
|
||
|
testNum,
|
||
|
testName,
|
||
|
want,
|
||
|
got,
|
||
|
)
|
||
|
} else {
|
||
|
t.Logf(
|
||
|
"Test %d - '%s' PASSED: expected domain comparison received: got %t",
|
||
|
testNum,
|
||
|
testName,
|
||
|
got,
|
||
|
)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
func testAddPageVisit(
|
||
|
testCrawler *crawler.Crawler,
|
||
|
testNum int,
|
||
|
testName string,
|
||
|
rawURL string,
|
||
|
wantVisited bool,
|
||
|
) func(t *testing.T) {
|
||
|
return func(t *testing.T) {
|
||
|
normalisedURL, err := util.NormaliseURL(rawURL)
|
||
|
if err != nil {
|
||
|
t.Fatalf(
|
||
|
"Test %d - '%s' FAILED: unexpected error: %v",
|
||
|
testNum,
|
||
|
testName,
|
||
|
err,
|
||
|
)
|
||
|
}
|
||
|
|
||
|
gotVisited := testCrawler.AddPageVisit(normalisedURL)
|
||
|
|
||
|
if gotVisited != wantVisited {
|
||
|
t.Errorf(
|
||
|
"Test %d - '%s' FAILED: unexpected bool returned after updated pages record: want %t, got %t",
|
||
|
testNum,
|
||
|
testName,
|
||
|
wantVisited,
|
||
|
gotVisited,
|
||
|
)
|
||
|
} else {
|
||
|
t.Logf(
|
||
|
"Test %d - '%s' PASSED: expected bool returned after updated pages record: got %t",
|
||
|
testNum,
|
||
|
testName,
|
||
|
gotVisited,
|
||
|
)
|
||
|
}
|
||
|
}
|
||
|
}
|