fix: parse lists within statuses #20

Manually merged
dananglin merged 1 commit from trnasform-html-unordered-lists into main 2024-06-01 14:08:18 +01:00
4 changed files with 47 additions and 14 deletions

View file

@ -88,7 +88,7 @@ func (a Account) Display(noColor bool) string {
metadata += fmt.Sprintf( metadata += fmt.Sprintf(
"\n %s: %s", "\n %s: %s",
utilities.FieldFormat(noColor, field.Name), utilities.FieldFormat(noColor, field.Name),
utilities.StripHTMLTags(field.Value), utilities.ConvertHTMLToText(field.Value),
) )
} }
@ -105,7 +105,7 @@ func (a Account) Display(noColor bool) string {
utilities.FieldFormat(noColor, "Following:"), a.FollowingCount, utilities.FieldFormat(noColor, "Following:"), a.FollowingCount,
utilities.FieldFormat(noColor, "Statuses:"), a.StatusCount, utilities.FieldFormat(noColor, "Statuses:"), a.StatusCount,
utilities.HeaderFormat(noColor, "BIOGRAPHY:"), utilities.HeaderFormat(noColor, "BIOGRAPHY:"),
utilities.WrapLines(utilities.StripHTMLTags(a.Note), "\n ", 80), utilities.WrapLines(utilities.ConvertHTMLToText(a.Note), "\n ", 80),
utilities.HeaderFormat(noColor, "METADATA:"), utilities.HeaderFormat(noColor, "METADATA:"),
metadata, metadata,
utilities.HeaderFormat(noColor, "ACCOUNT URL:"), utilities.HeaderFormat(noColor, "ACCOUNT URL:"),

View file

@ -180,7 +180,7 @@ func (s Status) Display(noColor bool) string {
format, format,
utilities.DisplayNameFormat(noColor, s.Account.DisplayName), s.Account.Username, utilities.DisplayNameFormat(noColor, s.Account.DisplayName), s.Account.Username,
utilities.HeaderFormat(noColor, "CONTENT:"), utilities.HeaderFormat(noColor, "CONTENT:"),
utilities.WrapLines(utilities.StripHTMLTags(s.Content), "\n ", 80), utilities.WrapLines(utilities.ConvertHTMLToText(s.Content), "\n ", 80),
utilities.HeaderFormat(noColor, "STATUS ID:"), utilities.HeaderFormat(noColor, "STATUS ID:"),
s.ID, s.ID,
utilities.HeaderFormat(noColor, "CREATED AT:"), utilities.HeaderFormat(noColor, "CREATED AT:"),

View file

@ -30,7 +30,7 @@ func (t Timeline) Display(noColor bool) string {
createdAt = status.Reblog.CreatedAt createdAt = status.Reblog.CreatedAt
} }
builder.WriteString(utilities.WrapLines(utilities.StripHTMLTags(status.Content), "\n", 80) + "\n\n") builder.WriteString(utilities.WrapLines(utilities.ConvertHTMLToText(status.Content), "\n", 80) + "\n\n")
builder.WriteString(utilities.FieldFormat(noColor, "ID:") + " " + statusID + "\t" + utilities.FieldFormat(noColor, "Created at:") + " " + utilities.FormatTime(createdAt) + "\n") builder.WriteString(utilities.FieldFormat(noColor, "ID:") + " " + statusID + "\t" + utilities.FieldFormat(noColor, "Created at:") + " " + utilities.FormatTime(createdAt) + "\n")
builder.WriteString(separator + "\n") builder.WriteString(separator + "\n")
} }

View file

@ -1,16 +1,34 @@
package utilities package utilities
import ( import (
"io"
"strconv"
"strings" "strings"
"golang.org/x/net/html" "golang.org/x/net/html"
) )
func StripHTMLTags(text string) string { const (
token := html.NewTokenizer(strings.NewReader(text)) htmlNoList int = iota
htmlOrderedList
htmlUnorderedList
)
type htmlConvertState struct {
htmlListType int
orderedListIndex int
}
func ConvertHTMLToText(text string) string {
var builder strings.Builder var builder strings.Builder
state := htmlConvertState{
htmlListType: htmlNoList,
orderedListIndex: 1,
}
token := html.NewTokenizer(strings.NewReader(text))
for { for {
tt := token.Next() tt := token.Next()
switch tt { switch tt {
@ -21,18 +39,33 @@ func StripHTMLTags(text string) string {
builder.WriteString(text) builder.WriteString(text)
case html.StartTagToken, html.EndTagToken: case html.StartTagToken, html.EndTagToken:
tag := token.Token().String() tag := token.Token().String()
builder.WriteString(transformTag(tag)) processTagToken(&state, &builder, tag)
} }
} }
} }
func transformTag(tag string) string { func processTagToken(state *htmlConvertState, writer io.StringWriter, tag string) {
switch tag { switch tag {
case "<br>": case "<br>", "<p>", "</p>", "</li>":
return "\n" _, _ = writer.WriteString("\n")
case "<p>", "</p>": case "<ul>":
return "\n" state.htmlListType = htmlUnorderedList
_, _ = writer.WriteString("\n")
case "<ol>":
state.htmlListType = htmlOrderedList
_, _ = writer.WriteString("\n")
case "</ul>":
state.htmlListType = htmlNoList
case "</ol>":
state.htmlListType = htmlNoList
state.orderedListIndex = 1
case "<li>":
switch state.htmlListType {
case htmlUnorderedList:
_, _ = writer.WriteString("• ")
case htmlOrderedList:
_, _ = writer.WriteString(strconv.Itoa(state.orderedListIndex) + ". ")
state.orderedListIndex++
}
} }
return ""
} }