checkpoint: parse ordered list

This commit is contained in:
Dan Anglin 2024-06-01 02:57:48 +01:00
parent ffc40d5165
commit a1f5651b4c
Signed by: dananglin
GPG key ID: 0C1D44CFBEE68638

View file

@ -2,15 +2,23 @@ package utilities
import ( import (
"io" "io"
"strconv"
"strings" "strings"
"golang.org/x/net/html" "golang.org/x/net/html"
) )
const (
htmlNoList int = iota
htmlOrderedList
htmlUnorderedList
)
func ConvertHTMLToText(text string) string { func ConvertHTMLToText(text string) string {
var builder strings.Builder var builder strings.Builder
var unorderedList bool htmlList := htmlNoList
orderedListNumber := 1
token := html.NewTokenizer(strings.NewReader(text)) token := html.NewTokenizer(strings.NewReader(text))
@ -24,24 +32,35 @@ func ConvertHTMLToText(text string) string {
builder.WriteString(text) builder.WriteString(text)
case html.StartTagToken, html.EndTagToken: case html.StartTagToken, html.EndTagToken:
tag := token.Token().String() tag := token.Token().String()
unorderedList = processTag(&builder, tag, unorderedList) htmlList, orderedListNumber = processTagToken(&builder, tag, htmlList, orderedListNumber)
} }
} }
} }
func processTag(writer io.StringWriter, tag string, unorderedList bool) bool { func processTagToken(writer io.StringWriter, tag string, htmlList, orderedListNumber int) (int, int) {
switch tag { switch tag {
case "<br>", "<p>", "</p>", "</li>": case "<br>", "<p>", "</p>", "</li>":
_, _ = writer.WriteString("\n") _, _ = writer.WriteString("\n")
case "<ul>": case "<ul>":
unorderedList = true htmlList = htmlUnorderedList
_, _ = writer.WriteString("\n")
case "<ol>":
htmlList = htmlOrderedList
_, _ = writer.WriteString("\n")
case "</ul>": case "</ul>":
unorderedList = false htmlList = htmlNoList
case "</ol>":
htmlList = htmlNoList
orderedListNumber = 1
case "<li>": case "<li>":
if unorderedList { switch htmlList {
case htmlUnorderedList:
_, _ = writer.WriteString("• ") _, _ = writer.WriteString("• ")
case htmlOrderedList:
_, _ = writer.WriteString(strconv.Itoa(orderedListNumber) + ". ")
orderedListNumber++
} }
} }
return unorderedList return htmlList, orderedListNumber
} }