diff --git a/README.md b/README.md
index 49c082e..3f01953 100644
--- a/README.md
+++ b/README.md
@@ -49,9 +49,9 @@ Run the application specifying the website that you want to crawl.
```
./crawler --max-workers 3 --max-pages 100 https://crawler-test.com
```
-- Crawl the site and print out a CSV report.
+- Crawl the site and print out a JSON report.
```
- ./crawler --max-workers 3 --max-pages 100 --format csv https://crawler-test.com
+ ./crawler --max-workers 3 --max-pages 100 --format json https://crawler-test.com
```
- Crawl the site and save the report to a CSV file.
```
@@ -67,5 +67,5 @@ You can configure the application with the following flags.
|------|-------------|---------|
| `max-workers` | The maximum number of concurrent workers. | 2 |
| `max-pages` | The maximum number of pages the crawler can discoverd before stopping the crawl. | 10 |
-| `format` | The format of the generated report.
Currently supports `text` and `csv`. | text |
+| `format` | The format of the generated report.
Currently supports `text`, `csv` or `json`. | text |
| `file` | The file to save the generated report to.
Leave this empty to print to the screen instead. | |
diff --git a/internal/crawler/crawler.go b/internal/crawler/crawler.go
index 1590d1f..dd6a104 100644
--- a/internal/crawler/crawler.go
+++ b/internal/crawler/crawler.go
@@ -1,7 +1,9 @@
package crawler
import (
+ "encoding/json"
"fmt"
+ "io"
"net/url"
"os"
"sync"
@@ -174,6 +176,12 @@ func (c *Crawler) GenerateReport() error {
report := newReport(c.reportFormat, c.baseURL.String(), c.pages)
+ if c.reportFormat == "json" {
+ return c.generateJSONReport(report)
+ }
+
+ var writer io.Writer
+
if c.filepath != "" {
file, err := os.Create(c.filepath)
if err != nil {
@@ -181,16 +189,53 @@ func (c *Crawler) GenerateReport() error {
}
defer file.Close()
- fmt.Fprintln(file, report)
+ writer = file
- fmt.Println("\nSuccessfully saved the report to", c.filepath)
+ fmt.Fprintln(file, report)
} else {
- fmt.Fprintln(os.Stdout, report)
+ writer = os.Stdout
+ }
+
+ fmt.Fprintln(writer, report)
+
+ if c.filepath != "" {
+ fmt.Println("\nSuccessfully saved the report to", c.filepath)
}
return nil
}
+func (c *Crawler) generateJSONReport(report report) error {
+ var writer io.Writer
+
+ if c.filepath != "" {
+ file, err := os.Create(c.filepath)
+ if err != nil {
+ return fmt.Errorf("error creating %s: %w", c.filepath, err)
+ }
+ defer file.Close()
+
+ writer = file
+ } else {
+ writer = os.Stdout
+ }
+
+ encoder := json.NewEncoder(writer)
+ encoder.SetIndent("", " ")
+
+ if err := encoder.Encode(report); err != nil {
+ return fmt.Errorf("error marshalling the report to JSON: %w", err)
+ }
+
+ if c.filepath != "" {
+ fmt.Println("\nSuccessfully saved the report to", c.filepath)
+ }
+
+ return nil
+}
+
+// reachedMaxPages evaluates to true if the map has reached the
+// maximum number of entries.
func (c *Crawler) reachedMaxPages() bool {
c.mu.Lock()
defer c.mu.Unlock()
diff --git a/internal/crawler/report.go b/internal/crawler/report.go
index c6e947d..7fcc895 100644
--- a/internal/crawler/report.go
+++ b/internal/crawler/report.go
@@ -9,15 +9,15 @@ import (
)
type report struct {
- format string
- baseURL string
- records []record
+ Format string `json:"-"`
+ BaseURL string `json:"baseUrl"`
+ Records []record `json:"records"`
}
type record struct {
- link string
- count int
- linkType string
+ Link string `json:"link"`
+ Count int `json:"count"`
+ LinkType string `json:"linkType"`
}
func newReport(format, baseURL string, pages map[string]pageStat) report {
@@ -30,18 +30,18 @@ func newReport(format, baseURL string, pages map[string]pageStat) report {
}
record := record{
- link: link,
- count: stats.count,
- linkType: linkType,
+ Link: link,
+ Count: stats.count,
+ LinkType: linkType,
}
records = append(records, record)
}
report := report{
- format: format,
- baseURL: baseURL,
- records: records,
+ Format: format,
+ BaseURL: baseURL,
+ Records: records,
}
report.sortRecords()
@@ -52,17 +52,17 @@ func newReport(format, baseURL string, pages map[string]pageStat) report {
func (r *report) sortRecords() {
// First sort records by count (in reverse order hopefully)
// Then sort records by name if two elements have the same count.
- slices.SortFunc(r.records, func(a, b record) int {
- if n := cmp.Compare(a.count, b.count); n != 0 {
+ slices.SortFunc(r.Records, func(a, b record) int {
+ if n := cmp.Compare(a.Count, b.Count); n != 0 {
return -1 * n
}
- return strings.Compare(a.link, b.link)
+ return strings.Compare(a.Link, b.Link)
})
}
func (r report) String() string {
- switch r.format {
+ switch r.Format {
case "csv":
return r.csv()
default:
@@ -76,16 +76,16 @@ func (r report) text() string {
titlebar := strings.Repeat("\u2500", 80)
builder.WriteString("\n" + titlebar)
- builder.WriteString("\n" + "REPORT for " + r.baseURL)
+ builder.WriteString("\n" + "REPORT for " + r.BaseURL)
builder.WriteString("\n" + titlebar)
- for ind := range slices.All(r.records) {
+ for ind := range slices.All(r.Records) {
links := "links"
- if r.records[ind].count == 1 {
+ if r.Records[ind].Count == 1 {
links = "link"
}
- builder.WriteString("\nFound " + strconv.Itoa(r.records[ind].count) + " " + r.records[ind].linkType + " " + links + " to " + r.records[ind].link)
+ builder.WriteString("\nFound " + strconv.Itoa(r.Records[ind].Count) + " " + r.Records[ind].LinkType + " " + links + " to " + r.Records[ind].Link)
}
return builder.String()
@@ -96,8 +96,8 @@ func (r report) csv() string {
builder.WriteString("LINK,TYPE,COUNT")
- for ind := range slices.All(r.records) {
- builder.WriteString("\n" + r.records[ind].link + "," + r.records[ind].linkType + "," + strconv.Itoa(r.records[ind].count))
+ for ind := range slices.All(r.Records) {
+ builder.WriteString("\n" + r.Records[ind].Link + "," + r.Records[ind].LinkType + "," + strconv.Itoa(r.Records[ind].Count))
}
return builder.String()
diff --git a/internal/crawler/report_test.go b/internal/crawler/report_test.go
index 33fc6b6..3da87cd 100644
--- a/internal/crawler/report_test.go
+++ b/internal/crawler/report_test.go
@@ -24,19 +24,19 @@ func TestReport(t *testing.T) {
}
want := report{
- format: "text",
- baseURL: "https://example.org",
- records: []record{
- {link: "example.org", count: 45, linkType: "internal"},
- {link: "example.org/about/contact", count: 10, linkType: "internal"},
- {link: "example.org/posts", count: 4, linkType: "internal"},
- {link: "example.org/tags", count: 4, linkType: "internal"},
- {link: "mastodon.example.social/@benbarlett", count: 4, linkType: "external"},
- {link: "example.org/tags/golang", count: 2, linkType: "internal"},
- {link: "ben-barlett.dev", count: 1, linkType: "external"},
- {link: "example.org/posts/yet-another-web-crawler-has-emerged", count: 1, linkType: "internal"},
- {link: "github.com/benbarlettdotdev", count: 1, linkType: "external"},
- {link: "github.com/dananglin/web-crawler", count: 1, linkType: "external"},
+ Format: "text",
+ BaseURL: "https://example.org",
+ Records: []record{
+ {Link: "example.org", Count: 45, LinkType: "internal"},
+ {Link: "example.org/about/contact", Count: 10, LinkType: "internal"},
+ {Link: "example.org/posts", Count: 4, LinkType: "internal"},
+ {Link: "example.org/tags", Count: 4, LinkType: "internal"},
+ {Link: "mastodon.example.social/@benbarlett", Count: 4, LinkType: "external"},
+ {Link: "example.org/tags/golang", Count: 2, LinkType: "internal"},
+ {Link: "ben-barlett.dev", Count: 1, LinkType: "external"},
+ {Link: "example.org/posts/yet-another-web-crawler-has-emerged", Count: 1, LinkType: "internal"},
+ {Link: "github.com/benbarlettdotdev", Count: 1, LinkType: "external"},
+ {Link: "github.com/dananglin/web-crawler", Count: 1, LinkType: "external"},
},
}
diff --git a/main.go b/main.go
index 645e4b7..d6c349f 100644
--- a/main.go
+++ b/main.go
@@ -29,7 +29,7 @@ func run() error {
flag.IntVar(&maxWorkers, "max-workers", 2, "The maximum number of concurrent workers")
flag.IntVar(&maxPages, "max-pages", 10, "The maximum number of pages to discover before stopping the crawl")
- flag.StringVar(&format, "format", "text", "The format of the report. Can be 'text' or 'csv'")
+ flag.StringVar(&format, "format", "text", "The format of the report. Valid formats are 'text', 'json' and 'csv'")
flag.StringVar(&file, "file", "", "The file to save the report to")
flag.Parse()