feat: add support for generating JSON reports

2024-08-28 17:59:56 +01:00 · 2024-08-28 17:59:56 +01:00 · 8e5acbe7c7
commit 8e5acbe7c7
parent 0022d7650c
5 changed files with 87 additions and 42 deletions
--- a/README.md
+++ b/README.md
@ -49,9 +49,9 @@ Run the application specifying the website that you want to crawl.
   ```
   ./crawler --max-workers 3 --max-pages 100 https://crawler-test.com
   ```
- Crawl the site and print out a CSV report.
+- Crawl the site and print out a JSON report.
   ```
-   ./crawler --max-workers 3 --max-pages 100 --format csv https://crawler-test.com
+   ./crawler --max-workers 3 --max-pages 100 --format json https://crawler-test.com
   ```
 - Crawl the site and save the report to a CSV file.
   ```
@ -67,5 +67,5 @@ You can configure the application with the following flags.
 |------|-------------|---------|
 | `max-workers` | The maximum number of concurrent workers. | 2 |
 | `max-pages` | The maximum number of pages the crawler can discoverd before stopping the crawl. | 10 |
-| `format` | The format of the generated report.<br>Currently supports `text` and `csv`. | text |
+| `format` | The format of the generated report.<br>Currently supports `text`, `csv` or `json`. | text |
 | `file` | The file to save the generated report to.<br>Leave this empty to print to the screen instead. | |
--- a/internal/crawler/crawler.go
+++ b/internal/crawler/crawler.go
@ -1,7 +1,9 @@
 package crawler

 import (
+	"encoding/json"
 	"fmt"
+	"io"
 	"net/url"
 	"os"
 	"sync"
@ -174,6 +176,12 @@ func (c *Crawler) GenerateReport() error {

 	report := newReport(c.reportFormat, c.baseURL.String(), c.pages)

+	if c.reportFormat == "json" {
+		return c.generateJSONReport(report)
+	}
+
+	var writer io.Writer
+
 	if c.filepath != "" {
 		file, err := os.Create(c.filepath)
 		if err != nil {
@ -181,16 +189,53 @@ func (c *Crawler) GenerateReport() error {
 		}
 		defer file.Close()

-		fmt.Fprintln(file, report)
+		writer = file

-		fmt.Println("\nSuccessfully saved the report to", c.filepath)
+		fmt.Fprintln(file, report)
 	} else {
-		fmt.Fprintln(os.Stdout, report)
+		writer = os.Stdout
+	}
+
+	fmt.Fprintln(writer, report)
+
+	if c.filepath != "" {
+		fmt.Println("\nSuccessfully saved the report to", c.filepath)
 	}

 	return nil
 }

+func (c *Crawler) generateJSONReport(report report) error {
+	var writer io.Writer
+
+	if c.filepath != "" {
+		file, err := os.Create(c.filepath)
+		if err != nil {
+			return fmt.Errorf("error creating %s: %w", c.filepath, err)
+		}
+		defer file.Close()
+
+		writer = file
+	} else {
+		writer = os.Stdout
+	}
+
+	encoder := json.NewEncoder(writer)
+	encoder.SetIndent("", "    ")
+
+	if err := encoder.Encode(report); err != nil {
+		return fmt.Errorf("error marshalling the report to JSON: %w", err)
+	}
+
+	if c.filepath != "" {
+		fmt.Println("\nSuccessfully saved the report to", c.filepath)
+	}
+
+	return nil
+}
+
+// reachedMaxPages evaluates to true if the map has reached the
+// maximum number of entries.
 func (c *Crawler) reachedMaxPages() bool {
 	c.mu.Lock()
 	defer c.mu.Unlock()
--- a/internal/crawler/report.go
+++ b/internal/crawler/report.go
@ -9,15 +9,15 @@ import (
 )

 type report struct {
-	format  string
-	baseURL string
-	records []record
+	Format  string   `json:"-"`
+	BaseURL string   `json:"baseUrl"`
+	Records []record `json:"records"`
 }

 type record struct {
-	link     string
-	count    int
-	linkType string
+	Link     string `json:"link"`
+	Count    int    `json:"count"`
+	LinkType string `json:"linkType"`
 }

 func newReport(format, baseURL string, pages map[string]pageStat) report {
@ -30,18 +30,18 @@ func newReport(format, baseURL string, pages map[string]pageStat) report {
 		}

 		record := record{
-			link:     link,
-			count:    stats.count,
-			linkType: linkType,
+			Link:     link,
+			Count:    stats.count,
+			LinkType: linkType,
 		}

 		records = append(records, record)
 	}

 	report := report{
-		format:  format,
-		baseURL: baseURL,
-		records: records,
+		Format:  format,
+		BaseURL: baseURL,
+		Records: records,
 	}

 	report.sortRecords()
@ -52,17 +52,17 @@ func newReport(format, baseURL string, pages map[string]pageStat) report {
 func (r *report) sortRecords() {
 	// First sort records by count (in reverse order hopefully)
 	// Then sort records by name if two elements have the same count.
-	slices.SortFunc(r.records, func(a, b record) int {
-		if n := cmp.Compare(a.count, b.count); n != 0 {
+	slices.SortFunc(r.Records, func(a, b record) int {
+		if n := cmp.Compare(a.Count, b.Count); n != 0 {
 			return -1 * n
 		}

-		return strings.Compare(a.link, b.link)
+		return strings.Compare(a.Link, b.Link)
 	})
 }

 func (r report) String() string {
-	switch r.format {
+	switch r.Format {
 	case "csv":
 		return r.csv()
 	default:
@ -76,16 +76,16 @@ func (r report) text() string {
 	titlebar := strings.Repeat("\u2500", 80)

 	builder.WriteString("\n" + titlebar)
-	builder.WriteString("\n" + "REPORT for " + r.baseURL)
+	builder.WriteString("\n" + "REPORT for " + r.BaseURL)
 	builder.WriteString("\n" + titlebar)

-	for ind := range slices.All(r.records) {
+	for ind := range slices.All(r.Records) {
 		links := "links"
-		if r.records[ind].count == 1 {
+		if r.Records[ind].Count == 1 {
 			links = "link"
 		}

-		builder.WriteString("\nFound " + strconv.Itoa(r.records[ind].count) + " " + r.records[ind].linkType + " " + links + " to " + r.records[ind].link)
+		builder.WriteString("\nFound " + strconv.Itoa(r.Records[ind].Count) + " " + r.Records[ind].LinkType + " " + links + " to " + r.Records[ind].Link)
 	}

 	return builder.String()
@ -96,8 +96,8 @@ func (r report) csv() string {

 	builder.WriteString("LINK,TYPE,COUNT")

-	for ind := range slices.All(r.records) {
-		builder.WriteString("\n" + r.records[ind].link + "," + r.records[ind].linkType + "," + strconv.Itoa(r.records[ind].count))
+	for ind := range slices.All(r.Records) {
+		builder.WriteString("\n" + r.Records[ind].Link + "," + r.Records[ind].LinkType + "," + strconv.Itoa(r.Records[ind].Count))
 	}

 	return builder.String()
--- a/internal/crawler/report_test.go
+++ b/internal/crawler/report_test.go
@ -24,19 +24,19 @@ func TestReport(t *testing.T) {
 	}

 	want := report{
-		format:  "text",
-		baseURL: "https://example.org",
-		records: []record{
-			{link: "example.org", count: 45, linkType: "internal"},
-			{link: "example.org/about/contact", count: 10, linkType: "internal"},
-			{link: "example.org/posts", count: 4, linkType: "internal"},
-			{link: "example.org/tags", count: 4, linkType: "internal"},
-			{link: "mastodon.example.social/@benbarlett", count: 4, linkType: "external"},
-			{link: "example.org/tags/golang", count: 2, linkType: "internal"},
-			{link: "ben-barlett.dev", count: 1, linkType: "external"},
-			{link: "example.org/posts/yet-another-web-crawler-has-emerged", count: 1, linkType: "internal"},
-			{link: "github.com/benbarlettdotdev", count: 1, linkType: "external"},
-			{link: "github.com/dananglin/web-crawler", count: 1, linkType: "external"},
+		Format:  "text",
+		BaseURL: "https://example.org",
+		Records: []record{
+			{Link: "example.org", Count: 45, LinkType: "internal"},
+			{Link: "example.org/about/contact", Count: 10, LinkType: "internal"},
+			{Link: "example.org/posts", Count: 4, LinkType: "internal"},
+			{Link: "example.org/tags", Count: 4, LinkType: "internal"},
+			{Link: "mastodon.example.social/@benbarlett", Count: 4, LinkType: "external"},
+			{Link: "example.org/tags/golang", Count: 2, LinkType: "internal"},
+			{Link: "ben-barlett.dev", Count: 1, LinkType: "external"},
+			{Link: "example.org/posts/yet-another-web-crawler-has-emerged", Count: 1, LinkType: "internal"},
+			{Link: "github.com/benbarlettdotdev", Count: 1, LinkType: "external"},
+			{Link: "github.com/dananglin/web-crawler", Count: 1, LinkType: "external"},
 		},
 	}

--- a/main.go
+++ b/main.go
@ -29,7 +29,7 @@ func run() error {

 	flag.IntVar(&maxWorkers, "max-workers", 2, "The maximum number of concurrent workers")
 	flag.IntVar(&maxPages, "max-pages", 10, "The maximum number of pages to discover before stopping the crawl")
-	flag.StringVar(&format, "format", "text", "The format of the report. Can be 'text' or 'csv'")
+	flag.StringVar(&format, "format", "text", "The format of the report. Valid formats are 'text', 'json' and 'csv'")
 	flag.StringVar(&file, "file", "", "The file to save the report to")

 	flag.Parse()