checkpoint: URL canonicalisation

This commit is contained in:
Dan Anglin 2024-10-17 23:17:18 +01:00
parent 9cb1f8ed4b
commit 8bc4f94c20
Signed by: dananglin
GPG key ID: 0C1D44CFBEE68638
2 changed files with 200 additions and 0 deletions

View file

@ -0,0 +1,69 @@
package utilities
import (
"errors"
"fmt"
"net"
"net/url"
"regexp"
)
const (
httpScheme = "http://"
httpsScheme = "https://"
)
var (
ErrMissingHostname = errors.New("the hostname is missing from the URL")
ErrHostIsIPAddress = errors.New("the hostname is an IP address")
ErrInvalidURLScheme = errors.New("invalid URL scheme")
ErrURLContainsFragment = errors.New("the URL contains a fragment")
ErrURLContainsPort = errors.New("the URL contains a port")
)
// ValidateProfileURL validates the given profile URL according to the indieauth
// specification. ValidateProfileURL returns the canonicalised profile URL after
// validation checks.
func ValidateProfileURL(profileURL string) (string, error) {
// Using regex to get and validate the scheme.
// If its missing then set the scheme to https
pattern := regexp.MustCompile(`^[a-z].*:\/\/|^[a-z].*:`)
scheme := pattern.FindString(profileURL)
if scheme == "" {
profileURL = httpsScheme + profileURL
} else if scheme != httpsScheme && scheme != httpScheme {
return "", ErrInvalidURLScheme
}
parsedProfileURL, err := url.Parse(profileURL)
if err != nil {
return "", fmt.Errorf("unable to parse the URL %q: %w", profileURL, err)
}
if parsedProfileURL.Hostname() == "" {
return "", ErrMissingHostname
}
if ip := net.ParseIP(parsedProfileURL.Hostname()); ip != nil {
return "", ErrHostIsIPAddress
}
if parsedProfileURL.Fragment != "" {
return "", ErrURLContainsFragment
}
if parsedProfileURL.Port() != "" {
return "", ErrURLContainsPort
}
if parsedProfileURL.Scheme == "" {
parsedProfileURL.Scheme = "https"
}
if parsedProfileURL.Path == "" {
parsedProfileURL.Path = "/"
}
return parsedProfileURL.String(), nil
}

View file

@ -0,0 +1,131 @@
package utilities_test
import (
"errors"
"slices"
"testing"
"codeflow.dananglin.me.uk/apollo/indieauth-server/internal/utilities"
)
func TestValidateProfileURL(t *testing.T) {
validProfileURLTestCases := []struct {
name string
url string
want string
}{
{
name: "Canonicalised URL",
url: "https://barry.example.org/",
want: "https://barry.example.org/",
},
{
name: "Canonicalised URL with path",
url: "https://example.org/username/barry",
want: "https://example.org/username/barry",
},
{
name: "Canonicalised URL with query string",
url: "http://example.org/users?id=1001",
want: "http://example.org/users?id=1001",
},
{
name: "Non-canonicalised URL with missing scheme",
url: "barry.example.org/",
want: "https://barry.example.org/",
},
{
name: "Non-canonicalised URL with missing path",
url: "http://barry.example.org",
want: "http://barry.example.org/",
},
}
for _, ta := range slices.All(validProfileURLTestCases) {
t.Run(ta.name, testValidProfileURLs(ta.name, ta.url, ta.want))
}
invalidProfileURLTestCases := []struct {
name string
url string
wantError error
}{
{
name: "URL using the mailto scheme",
url: "mailto:barry@example.org",
wantError: utilities.ErrInvalidURLScheme,
},
{
name: "URL using a non-http scheme",
url: "postgres://db_user:db_password@some_db_server:5432/db",
wantError: utilities.ErrInvalidURLScheme,
},
{
name: "URL containing a port",
url: "http://barry.example.org:80/",
wantError: utilities.ErrURLContainsPort,
},
{
name: "URL containing a fragment",
url: "https://barry.example.org/#fragment",
wantError: utilities.ErrURLContainsFragment,
},
{
name: "URL host is an IP address",
url: "https://192.168.82.56/",
wantError: utilities.ErrHostIsIPAddress,
},
{
name: "URL with a missing host",
url: "https:///",
wantError: utilities.ErrMissingHostname,
},
}
for _, tb := range slices.All(invalidProfileURLTestCases) {
t.Run(tb.name, testInvalidProfileURL(tb.name, tb.url, tb.wantError))
}
}
func testValidProfileURLs(testName, url, wantURL string) func(t *testing.T) {
return func(t *testing.T) {
canonicalisedURL, err := utilities.ValidateProfileURL(url)
if err != nil {
t.Fatalf("FAILED test %q: %v", testName, err)
}
if canonicalisedURL != wantURL {
t.Errorf("FAILED test %q: want %s, got %s", testName, wantURL, canonicalisedURL)
} else {
t.Logf("PASSED test %q: got %s", testName, canonicalisedURL)
}
}
}
func testInvalidProfileURL(testName, url string, wantError error) func(t *testing.T) {
return func(t *testing.T) {
if _, err := utilities.ValidateProfileURL(url); err == nil {
t.Errorf(
"FAILED test %q: The expected error was not received using invalid profile URL %q",
testName,
url,
)
} else {
if !errors.Is(err, wantError) {
t.Errorf(
"FAILED test %q: Unexpected error received using profile URL %q: got %q",
testName,
url,
err.Error(),
)
} else {
t.Logf(
"PASSED test %q: Expected error received using profile URL %q: got %q",
testName,
url,
err.Error(),
)
}
}
}
}