From 8bc4f94c20f6c80707474a270e8d335c0134d76c Mon Sep 17 00:00:00 2001 From: Dan Anglin Date: Thu, 17 Oct 2024 23:17:18 +0100 Subject: [PATCH] checkpoint: URL canonicalisation --- internal/utilities/url_canonicalisation.go | 69 +++++++++ .../utilities/url_canonicalisation_test.go | 131 ++++++++++++++++++ 2 files changed, 200 insertions(+) create mode 100644 internal/utilities/url_canonicalisation.go create mode 100644 internal/utilities/url_canonicalisation_test.go diff --git a/internal/utilities/url_canonicalisation.go b/internal/utilities/url_canonicalisation.go new file mode 100644 index 0000000..3c14432 --- /dev/null +++ b/internal/utilities/url_canonicalisation.go @@ -0,0 +1,69 @@ +package utilities + +import ( + "errors" + "fmt" + "net" + "net/url" + "regexp" +) + +const ( + httpScheme = "http://" + httpsScheme = "https://" +) + +var ( + ErrMissingHostname = errors.New("the hostname is missing from the URL") + ErrHostIsIPAddress = errors.New("the hostname is an IP address") + ErrInvalidURLScheme = errors.New("invalid URL scheme") + ErrURLContainsFragment = errors.New("the URL contains a fragment") + ErrURLContainsPort = errors.New("the URL contains a port") +) + +// ValidateProfileURL validates the given profile URL according to the indieauth +// specification. ValidateProfileURL returns the canonicalised profile URL after +// validation checks. +func ValidateProfileURL(profileURL string) (string, error) { + // Using regex to get and validate the scheme. + // If its missing then set the scheme to https + pattern := regexp.MustCompile(`^[a-z].*:\/\/|^[a-z].*:`) + scheme := pattern.FindString(profileURL) + + if scheme == "" { + profileURL = httpsScheme + profileURL + } else if scheme != httpsScheme && scheme != httpScheme { + return "", ErrInvalidURLScheme + } + + parsedProfileURL, err := url.Parse(profileURL) + if err != nil { + return "", fmt.Errorf("unable to parse the URL %q: %w", profileURL, err) + } + + if parsedProfileURL.Hostname() == "" { + return "", ErrMissingHostname + } + + if ip := net.ParseIP(parsedProfileURL.Hostname()); ip != nil { + return "", ErrHostIsIPAddress + } + + if parsedProfileURL.Fragment != "" { + return "", ErrURLContainsFragment + } + + if parsedProfileURL.Port() != "" { + return "", ErrURLContainsPort + } + + if parsedProfileURL.Scheme == "" { + parsedProfileURL.Scheme = "https" + } + + if parsedProfileURL.Path == "" { + parsedProfileURL.Path = "/" + } + + return parsedProfileURL.String(), nil +} diff --git a/internal/utilities/url_canonicalisation_test.go b/internal/utilities/url_canonicalisation_test.go new file mode 100644 index 0000000..912885c --- /dev/null +++ b/internal/utilities/url_canonicalisation_test.go @@ -0,0 +1,131 @@ +package utilities_test + +import ( + "errors" + "slices" + "testing" + + "codeflow.dananglin.me.uk/apollo/indieauth-server/internal/utilities" +) + +func TestValidateProfileURL(t *testing.T) { + validProfileURLTestCases := []struct { + name string + url string + want string + }{ + { + name: "Canonicalised URL", + url: "https://barry.example.org/", + want: "https://barry.example.org/", + }, + { + name: "Canonicalised URL with path", + url: "https://example.org/username/barry", + want: "https://example.org/username/barry", + }, + { + name: "Canonicalised URL with query string", + url: "http://example.org/users?id=1001", + want: "http://example.org/users?id=1001", + }, + { + name: "Non-canonicalised URL with missing scheme", + url: "barry.example.org/", + want: "https://barry.example.org/", + }, + { + name: "Non-canonicalised URL with missing path", + url: "http://barry.example.org", + want: "http://barry.example.org/", + }, + } + + for _, ta := range slices.All(validProfileURLTestCases) { + t.Run(ta.name, testValidProfileURLs(ta.name, ta.url, ta.want)) + } + + invalidProfileURLTestCases := []struct { + name string + url string + wantError error + }{ + { + name: "URL using the mailto scheme", + url: "mailto:barry@example.org", + wantError: utilities.ErrInvalidURLScheme, + }, + { + name: "URL using a non-http scheme", + url: "postgres://db_user:db_password@some_db_server:5432/db", + wantError: utilities.ErrInvalidURLScheme, + }, + { + name: "URL containing a port", + url: "http://barry.example.org:80/", + wantError: utilities.ErrURLContainsPort, + }, + { + name: "URL containing a fragment", + url: "https://barry.example.org/#fragment", + wantError: utilities.ErrURLContainsFragment, + }, + { + name: "URL host is an IP address", + url: "https://192.168.82.56/", + wantError: utilities.ErrHostIsIPAddress, + }, + { + name: "URL with a missing host", + url: "https:///", + wantError: utilities.ErrMissingHostname, + }, + } + + for _, tb := range slices.All(invalidProfileURLTestCases) { + t.Run(tb.name, testInvalidProfileURL(tb.name, tb.url, tb.wantError)) + } +} + +func testValidProfileURLs(testName, url, wantURL string) func(t *testing.T) { + return func(t *testing.T) { + canonicalisedURL, err := utilities.ValidateProfileURL(url) + if err != nil { + t.Fatalf("FAILED test %q: %v", testName, err) + } + + if canonicalisedURL != wantURL { + t.Errorf("FAILED test %q: want %s, got %s", testName, wantURL, canonicalisedURL) + } else { + t.Logf("PASSED test %q: got %s", testName, canonicalisedURL) + } + } +} + +func testInvalidProfileURL(testName, url string, wantError error) func(t *testing.T) { + return func(t *testing.T) { + if _, err := utilities.ValidateProfileURL(url); err == nil { + t.Errorf( + "FAILED test %q: The expected error was not received using invalid profile URL %q", + testName, + url, + ) + } else { + if !errors.Is(err, wantError) { + t.Errorf( + "FAILED test %q: Unexpected error received using profile URL %q: got %q", + testName, + url, + err.Error(), + ) + } else { + t.Logf( + "PASSED test %q: Expected error received using profile URL %q: got %q", + testName, + url, + err.Error(), + ) + } + } + } +}