web-crawler/main.go

88 lines
1.5 KiB
Go
Raw Normal View History

2024-08-26 10:30:14 +01:00
package main
import (
2024-08-26 18:37:45 +01:00
"errors"
2024-08-26 10:30:14 +01:00
"fmt"
2024-08-26 19:00:44 +01:00
"io"
"net/http"
2024-08-26 10:30:14 +01:00
"os"
2024-08-26 19:00:44 +01:00
"strings"
"time"
2024-08-26 10:30:14 +01:00
)
var (
binaryVersion string
buildTime string
goVersion string
gitCommit string
)
func main() {
if err := run(); err != nil {
2024-08-26 18:37:45 +01:00
fmt.Println(err)
2024-08-26 10:30:14 +01:00
os.Exit(1)
}
}
func run() error {
2024-08-26 18:37:45 +01:00
args := os.Args[1:]
if len(args) == 0 {
return errors.New("no website provided")
}
if len(args) > 1 {
return errors.New("too many arguments provided")
}
baseURL := args[0]
2024-08-26 19:00:44 +01:00
htmlBody, err := getHTML(baseURL)
if err != nil {
return err
}
fmt.Println(htmlBody)
2024-08-26 18:37:45 +01:00
2024-08-26 10:30:14 +01:00
return nil
}
2024-08-26 19:00:44 +01:00
func getHTML(rawURL string) (string, error) {
req, err := http.NewRequest(http.MethodGet, rawURL, nil)
if err != nil {
return "", fmt.Errorf("error creating the request: %w", err)
}
client := http.Client{
Timeout: time.Duration(10 * time.Second),
}
resp, err := client.Do(req)
if err != nil {
return "", fmt.Errorf("error getting the response: %w", err)
}
defer resp.Body.Close()
if resp.StatusCode >= 400 {
return "", fmt.Errorf(
"received a bad status from %s: (%d) %s",
rawURL,
resp.StatusCode,
resp.Status,
)
}
contentType := resp.Header.Get("content-type")
if !strings.Contains(contentType, "text/html") {
return "", fmt.Errorf("unexpected content type received: want text/html, got %s", contentType)
}
data, err := io.ReadAll(resp.Body)
if err != nil {
return "", fmt.Errorf("error reading the data from the response: %w", err)
}
return string(data), nil
}