added a type to preserve the state of the conversion processes where necessary

This commit is contained in:
Dan Anglin 2024-06-01 07:13:14 +01:00
parent a1f5651b4c
commit d681404c78
Signed by: dananglin
GPG key ID: 0C1D44CFBEE68638

View file

@ -14,11 +14,18 @@ const (
htmlUnorderedList htmlUnorderedList
) )
type htmlConvertState struct {
htmlListType int
orderedListNumber int
}
func ConvertHTMLToText(text string) string { func ConvertHTMLToText(text string) string {
var builder strings.Builder var builder strings.Builder
htmlList := htmlNoList state := htmlConvertState{
orderedListNumber := 1 htmlListType: htmlNoList,
orderedListNumber: 1,
}
token := html.NewTokenizer(strings.NewReader(text)) token := html.NewTokenizer(strings.NewReader(text))
@ -32,35 +39,33 @@ func ConvertHTMLToText(text string) string {
builder.WriteString(text) builder.WriteString(text)
case html.StartTagToken, html.EndTagToken: case html.StartTagToken, html.EndTagToken:
tag := token.Token().String() tag := token.Token().String()
htmlList, orderedListNumber = processTagToken(&builder, tag, htmlList, orderedListNumber) processTagToken(&state, &builder, tag)
} }
} }
} }
func processTagToken(writer io.StringWriter, tag string, htmlList, orderedListNumber int) (int, int) { func processTagToken(state *htmlConvertState, writer io.StringWriter, tag string) {
switch tag { switch tag {
case "<br>", "<p>", "</p>", "</li>": case "<br>", "<p>", "</p>", "</li>":
_, _ = writer.WriteString("\n") _, _ = writer.WriteString("\n")
case "<ul>": case "<ul>":
htmlList = htmlUnorderedList state.htmlListType = htmlUnorderedList
_, _ = writer.WriteString("\n") _, _ = writer.WriteString("\n")
case "<ol>": case "<ol>":
htmlList = htmlOrderedList state.htmlListType = htmlOrderedList
_, _ = writer.WriteString("\n") _, _ = writer.WriteString("\n")
case "</ul>": case "</ul>":
htmlList = htmlNoList state.htmlListType = htmlNoList
case "</ol>": case "</ol>":
htmlList = htmlNoList state.htmlListType = htmlNoList
orderedListNumber = 1 state.orderedListNumber = 1
case "<li>": case "<li>":
switch htmlList { switch state.htmlListType {
case htmlUnorderedList: case htmlUnorderedList:
_, _ = writer.WriteString("• ") _, _ = writer.WriteString("• ")
case htmlOrderedList: case htmlOrderedList:
_, _ = writer.WriteString(strconv.Itoa(orderedListNumber) + ". ") _, _ = writer.WriteString(strconv.Itoa(state.orderedListNumber) + ". ")
orderedListNumber++ state.orderedListNumber++
} }
} }
return htmlList, orderedListNumber
} }