2024-08-28 07:39:24 +01:00
|
|
|
package crawler
|
|
|
|
|
|
|
|
import (
|
|
|
|
"cmp"
|
|
|
|
"maps"
|
|
|
|
"slices"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
)
|
|
|
|
|
|
|
|
type report struct {
|
2024-08-28 17:59:56 +01:00
|
|
|
Format string `json:"-"`
|
|
|
|
BaseURL string `json:"baseUrl"`
|
|
|
|
Records []record `json:"records"`
|
2024-08-28 07:39:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
type record struct {
|
2024-08-28 17:59:56 +01:00
|
|
|
Link string `json:"link"`
|
|
|
|
Count int `json:"count"`
|
|
|
|
LinkType string `json:"linkType"`
|
2024-08-28 12:00:25 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
func newReport(format, baseURL string, pages map[string]pageStat) report {
|
2024-08-28 07:39:24 +01:00
|
|
|
records := make([]record, 0)
|
|
|
|
|
|
|
|
for link, stats := range maps.All(pages) {
|
2024-08-28 14:51:34 +01:00
|
|
|
linkType := "internal"
|
|
|
|
if !stats.internal {
|
|
|
|
linkType = "external"
|
|
|
|
}
|
|
|
|
|
2024-08-28 07:39:24 +01:00
|
|
|
record := record{
|
2024-08-28 17:59:56 +01:00
|
|
|
Link: link,
|
|
|
|
Count: stats.count,
|
|
|
|
LinkType: linkType,
|
2024-08-28 07:39:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
records = append(records, record)
|
|
|
|
}
|
|
|
|
|
|
|
|
report := report{
|
2024-08-28 17:59:56 +01:00
|
|
|
Format: format,
|
|
|
|
BaseURL: baseURL,
|
|
|
|
Records: records,
|
2024-08-28 07:39:24 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
report.sortRecords()
|
|
|
|
|
|
|
|
return report
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r *report) sortRecords() {
|
|
|
|
// First sort records by count (in reverse order hopefully)
|
|
|
|
// Then sort records by name if two elements have the same count.
|
2024-08-28 17:59:56 +01:00
|
|
|
slices.SortFunc(r.Records, func(a, b record) int {
|
|
|
|
if n := cmp.Compare(a.Count, b.Count); n != 0 {
|
2024-08-28 07:39:24 +01:00
|
|
|
return -1 * n
|
|
|
|
}
|
|
|
|
|
2024-08-28 17:59:56 +01:00
|
|
|
return strings.Compare(a.Link, b.Link)
|
2024-08-28 07:39:24 +01:00
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r report) String() string {
|
2024-08-28 17:59:56 +01:00
|
|
|
switch r.Format {
|
2024-08-28 12:00:25 +01:00
|
|
|
case "csv":
|
|
|
|
return r.csv()
|
|
|
|
default:
|
|
|
|
return r.text()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r report) text() string {
|
2024-08-28 07:39:24 +01:00
|
|
|
var builder strings.Builder
|
|
|
|
|
|
|
|
titlebar := strings.Repeat("\u2500", 80)
|
|
|
|
|
|
|
|
builder.WriteString("\n" + titlebar)
|
2024-08-28 17:59:56 +01:00
|
|
|
builder.WriteString("\n" + "REPORT for " + r.BaseURL)
|
2024-08-28 07:39:24 +01:00
|
|
|
builder.WriteString("\n" + titlebar)
|
|
|
|
|
2024-08-28 17:59:56 +01:00
|
|
|
for ind := range slices.All(r.Records) {
|
2024-08-28 07:39:24 +01:00
|
|
|
links := "links"
|
2024-08-28 17:59:56 +01:00
|
|
|
if r.Records[ind].Count == 1 {
|
2024-08-28 07:39:24 +01:00
|
|
|
links = "link"
|
|
|
|
}
|
|
|
|
|
2024-08-28 17:59:56 +01:00
|
|
|
builder.WriteString("\nFound " + strconv.Itoa(r.Records[ind].Count) + " " + r.Records[ind].LinkType + " " + links + " to " + r.Records[ind].Link)
|
2024-08-28 07:39:24 +01:00
|
|
|
}
|
|
|
|
|
2024-08-28 12:00:25 +01:00
|
|
|
return builder.String()
|
|
|
|
}
|
|
|
|
|
|
|
|
func (r report) csv() string {
|
|
|
|
var builder strings.Builder
|
|
|
|
|
|
|
|
builder.WriteString("LINK,TYPE,COUNT")
|
|
|
|
|
2024-08-28 17:59:56 +01:00
|
|
|
for ind := range slices.All(r.Records) {
|
|
|
|
builder.WriteString("\n" + r.Records[ind].Link + "," + r.Records[ind].LinkType + "," + strconv.Itoa(r.Records[ind].Count))
|
2024-08-28 12:00:25 +01:00
|
|
|
}
|
2024-08-28 07:39:24 +01:00
|
|
|
|
|
|
|
return builder.String()
|
|
|
|
}
|