enbas/internal/utilities/html.go

72 lines
1.4 KiB
Go
Raw Normal View History

package utilities
import (
2024-06-01 01:08:43 +01:00
"io"
2024-06-01 02:57:48 +01:00
"strconv"
"strings"
"golang.org/x/net/html"
)
2024-06-01 02:57:48 +01:00
const (
htmlNoList int = iota
htmlOrderedList
htmlUnorderedList
)
type htmlConvertState struct {
htmlListType int
orderedListNumber int
}
2024-06-01 01:08:43 +01:00
func ConvertHTMLToText(text string) string {
var builder strings.Builder
state := htmlConvertState{
htmlListType: htmlNoList,
orderedListNumber: 1,
}
2024-06-01 01:08:43 +01:00
token := html.NewTokenizer(strings.NewReader(text))
for {
tt := token.Next()
switch tt {
case html.ErrorToken:
return builder.String()
case html.TextToken:
text := token.Token().Data
builder.WriteString(text)
case html.StartTagToken, html.EndTagToken:
tag := token.Token().String()
processTagToken(&state, &builder, tag)
}
}
}
func processTagToken(state *htmlConvertState, writer io.StringWriter, tag string) {
switch tag {
2024-06-01 01:08:43 +01:00
case "<br>", "<p>", "</p>", "</li>":
_, _ = writer.WriteString("\n")
case "<ul>":
state.htmlListType = htmlUnorderedList
2024-06-01 02:57:48 +01:00
_, _ = writer.WriteString("\n")
case "<ol>":
state.htmlListType = htmlOrderedList
2024-06-01 02:57:48 +01:00
_, _ = writer.WriteString("\n")
2024-06-01 01:08:43 +01:00
case "</ul>":
state.htmlListType = htmlNoList
2024-06-01 02:57:48 +01:00
case "</ol>":
state.htmlListType = htmlNoList
state.orderedListNumber = 1
2024-06-01 01:08:43 +01:00
case "<li>":
switch state.htmlListType {
2024-06-01 02:57:48 +01:00
case htmlUnorderedList:
2024-06-01 01:08:43 +01:00
_, _ = writer.WriteString("• ")
2024-06-01 02:57:48 +01:00
case htmlOrderedList:
_, _ = writer.WriteString(strconv.Itoa(state.orderedListNumber) + ". ")
state.orderedListNumber++
2024-06-01 01:08:43 +01:00
}
}
}