checkpoint: parse ordered list
This commit is contained in:
parent
ffc40d5165
commit
a1f5651b4c
1 changed files with 26 additions and 7 deletions
|
@ -2,15 +2,23 @@ package utilities
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"io"
|
"io"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
htmlNoList int = iota
|
||||||
|
htmlOrderedList
|
||||||
|
htmlUnorderedList
|
||||||
|
)
|
||||||
|
|
||||||
func ConvertHTMLToText(text string) string {
|
func ConvertHTMLToText(text string) string {
|
||||||
var builder strings.Builder
|
var builder strings.Builder
|
||||||
|
|
||||||
var unorderedList bool
|
htmlList := htmlNoList
|
||||||
|
orderedListNumber := 1
|
||||||
|
|
||||||
token := html.NewTokenizer(strings.NewReader(text))
|
token := html.NewTokenizer(strings.NewReader(text))
|
||||||
|
|
||||||
|
@ -24,24 +32,35 @@ func ConvertHTMLToText(text string) string {
|
||||||
builder.WriteString(text)
|
builder.WriteString(text)
|
||||||
case html.StartTagToken, html.EndTagToken:
|
case html.StartTagToken, html.EndTagToken:
|
||||||
tag := token.Token().String()
|
tag := token.Token().String()
|
||||||
unorderedList = processTag(&builder, tag, unorderedList)
|
htmlList, orderedListNumber = processTagToken(&builder, tag, htmlList, orderedListNumber)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func processTag(writer io.StringWriter, tag string, unorderedList bool) bool {
|
func processTagToken(writer io.StringWriter, tag string, htmlList, orderedListNumber int) (int, int) {
|
||||||
switch tag {
|
switch tag {
|
||||||
case "<br>", "<p>", "</p>", "</li>":
|
case "<br>", "<p>", "</p>", "</li>":
|
||||||
_, _ = writer.WriteString("\n")
|
_, _ = writer.WriteString("\n")
|
||||||
case "<ul>":
|
case "<ul>":
|
||||||
unorderedList = true
|
htmlList = htmlUnorderedList
|
||||||
|
_, _ = writer.WriteString("\n")
|
||||||
|
case "<ol>":
|
||||||
|
htmlList = htmlOrderedList
|
||||||
|
_, _ = writer.WriteString("\n")
|
||||||
case "</ul>":
|
case "</ul>":
|
||||||
unorderedList = false
|
htmlList = htmlNoList
|
||||||
|
case "</ol>":
|
||||||
|
htmlList = htmlNoList
|
||||||
|
orderedListNumber = 1
|
||||||
case "<li>":
|
case "<li>":
|
||||||
if unorderedList {
|
switch htmlList {
|
||||||
|
case htmlUnorderedList:
|
||||||
_, _ = writer.WriteString("• ")
|
_, _ = writer.WriteString("• ")
|
||||||
|
case htmlOrderedList:
|
||||||
|
_, _ = writer.WriteString(strconv.Itoa(orderedListNumber) + ". ")
|
||||||
|
orderedListNumber++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return unorderedList
|
return htmlList, orderedListNumber
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue