diff options
Diffstat (limited to 'lib/parse/parse.go')
-rw-r--r-- | lib/parse/parse.go | 118 |
1 files changed, 100 insertions, 18 deletions
diff --git a/lib/parse/parse.go b/lib/parse/parse.go index 332644d..15e563c 100644 --- a/lib/parse/parse.go +++ b/lib/parse/parse.go @@ -1,35 +1,117 @@ package parse import ( - //"fmt" + "fmt" "io" + "errors" ) -type Element struct { - name string - attributes map[string]string - contents string - embedded *Element -} - -func ReadTag() { -} -func Parse(r io.Reader) (Element, error) { +type Tag struct { + name string + attributes map[string]string +} + +func ReadUntil(r io.Reader, sentinels []byte) (string, byte, error) { b := make([]byte, 1) + var buf []byte + var foundSentinel byte + for { _, err := r.Read(b) if err != nil { - return Element{}, err + return "", 0, errors.New(fmt.Sprintf("Missing '%s'", string(sentinels))) } - switch (b[0]) { - case '<': - fmt.Println("tag") + + found := false + for _, sentinel := range sentinels { + if b[0] == sentinel { + found = true + foundSentinel = sentinel + } } + + if found { + break + } + + buf = append(buf, b[0]) + } + + return string(buf), foundSentinel, nil +} + +func ReadTag(r io.Reader) (*Tag, error) { + e := new(Tag) + e.attributes = make(map[string]string) + var err error + var foundSentinel byte + + e.name, foundSentinel, err = ReadUntil(r, []byte{' ', '>'}) + if err != nil { + return nil, err } - var e Element - e.name = "bob" + for { + if foundSentinel == '>' { + break + } + + key, _, err := ReadUntil(r, []byte{'='}) + if err != nil { + return nil, err + } + + // Single and double quotation marks are significant. + peek := make([]byte, 1) + _, err = r.Read(peek) + if err != nil { + return nil, err + } + + var value string + if peek[0] == '\'' || peek[0] == '"' { + value, _, err = ReadUntil(r, peek) + b := make([]byte, 1) + _, err = r.Read(b) + foundSentinel = b[0] + } else { + value, foundSentinel, err = ReadUntil(r, []byte{' ', '>'}) + value = string(peek) + value + } + e.attributes[key] = value + } + + return e, err +} + +/* + Some elements are empty. Here's a list taken + from https://www.geeksforgeeks.org/html/what-are-empty-elements-in-html/ + var empty = []string{"area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", "track", "wbr"} +*/ + +func Parse(r io.Reader) ([]any, error) { + var document []any + for { + s, _, err := ReadUntil(r, []byte{'<'}) + if err != nil { + if err.Error() == "Missing '<'" { + break + } else { + return nil, err + } + } + if s != "" { + document = append(document, s) + } + + e, err := ReadTag(r) + if err != nil { + return nil, err + } + document = append(document, e) + } - return e, nil + return document, nil } |