2024-08-26 10:30:14 +01:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"os"
|
2024-08-27 13:45:18 +01:00
|
|
|
"strconv"
|
2024-08-27 13:11:16 +01:00
|
|
|
|
|
|
|
"codeflow.dananglin.me.uk/apollo/web-crawler/internal/crawler"
|
2024-08-26 10:30:14 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
func main() {
|
|
|
|
if err := run(); err != nil {
|
2024-08-27 07:38:20 +01:00
|
|
|
os.Stderr.WriteString("ERROR: " + err.Error() + "\n")
|
|
|
|
|
2024-08-26 10:30:14 +01:00
|
|
|
os.Exit(1)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func run() error {
|
2024-08-26 18:37:45 +01:00
|
|
|
args := os.Args[1:]
|
|
|
|
|
2024-08-27 13:45:18 +01:00
|
|
|
if len(args) != 3 {
|
|
|
|
return fmt.Errorf("unexpected number of arguments received: want 3, got %d", len(args))
|
2024-08-26 18:37:45 +01:00
|
|
|
}
|
|
|
|
|
2024-08-27 13:45:18 +01:00
|
|
|
baseURL := args[0]
|
|
|
|
|
|
|
|
maxConcurrency, err := strconv.Atoi(args[1])
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("unable to convert the max concurrency (%s) to an integer: %w", args[1], err)
|
2024-08-26 18:37:45 +01:00
|
|
|
}
|
|
|
|
|
2024-08-27 13:45:18 +01:00
|
|
|
maxPages, err := strconv.Atoi(args[2])
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("unable to convert the max pages (%s) to an integer: %w", args[2], err)
|
|
|
|
}
|
2024-08-27 07:38:20 +01:00
|
|
|
|
2024-08-27 13:45:18 +01:00
|
|
|
c, err := crawler.NewCrawler(baseURL, maxConcurrency, maxPages)
|
2024-08-27 13:11:16 +01:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("unable to create the crawler: %w", err)
|
|
|
|
}
|
2024-08-27 07:38:20 +01:00
|
|
|
|
2024-08-27 13:11:16 +01:00
|
|
|
go c.Crawl(baseURL)
|
2024-08-26 19:00:44 +01:00
|
|
|
|
2024-08-27 13:11:16 +01:00
|
|
|
c.Wait()
|
2024-08-27 07:38:20 +01:00
|
|
|
|
2024-08-27 13:11:16 +01:00
|
|
|
c.PrintReport()
|
2024-08-26 18:37:45 +01:00
|
|
|
|
2024-08-26 10:30:14 +01:00
|
|
|
return nil
|
|
|
|
}
|