web-crawler/main.go

58 lines
1.2 KiB
Go
Raw Normal View History

2024-08-26 10:30:14 +01:00
package main
import (
"errors"
"flag"
2024-08-26 10:30:14 +01:00
"fmt"
"os"
"codeflow.dananglin.me.uk/apollo/web-crawler/internal/crawler"
2024-08-26 10:30:14 +01:00
)
func main() {
if err := run(); err != nil {
os.Stderr.WriteString("ERROR: " + err.Error() + "\n")
2024-08-26 10:30:14 +01:00
os.Exit(1)
}
}
var errNoURLProvided = errors.New("the URL is not provided")
func run() error {
var (
maxWorkers int
maxPages int
format string
file string
)
flag.IntVar(&maxWorkers, "max-workers", 2, "The maximum number of concurrent workers")
flag.IntVar(&maxPages, "max-pages", 10, "The maximum number of pages to discover before stopping the crawl")
flag.StringVar(&format, "format", "text", "The format of the report. Can be 'text' or 'csv'")
flag.StringVar(&file, "file", "", "The file to save the report to")
flag.Parse()
if flag.NArg() < 1 {
return errNoURLProvided
}
baseURL := flag.Arg(0)
c, err := crawler.NewCrawler(baseURL, maxWorkers, maxPages, format, file)
if err != nil {
return fmt.Errorf("unable to create the crawler: %w", err)
}
go c.Crawl(baseURL)
c.Wait()
if err := c.GenerateReport(); err != nil {
return fmt.Errorf("unable to generate the report: %w", err)
}
2024-08-26 10:30:14 +01:00
return nil
}