diff options
-rw-r--r-- | lib/parse/parse.go | 118 | ||||
-rw-r--r-- | lib/parse/parse_test.go | 98 |
2 files changed, 198 insertions, 18 deletions
diff --git a/lib/parse/parse.go b/lib/parse/parse.go index 332644d..15e563c 100644 --- a/lib/parse/parse.go +++ b/lib/parse/parse.go @@ -1,35 +1,117 @@ package parse import ( - //"fmt" + "fmt" "io" + "errors" ) -type Element struct { - name string - attributes map[string]string - contents string - embedded *Element -} - -func ReadTag() { -} -func Parse(r io.Reader) (Element, error) { +type Tag struct { + name string + attributes map[string]string +} + +func ReadUntil(r io.Reader, sentinels []byte) (string, byte, error) { b := make([]byte, 1) + var buf []byte + var foundSentinel byte + for { _, err := r.Read(b) if err != nil { - return Element{}, err + return "", 0, errors.New(fmt.Sprintf("Missing '%s'", string(sentinels))) } - switch (b[0]) { - case '<': - fmt.Println("tag") + + found := false + for _, sentinel := range sentinels { + if b[0] == sentinel { + found = true + foundSentinel = sentinel + } } + + if found { + break + } + + buf = append(buf, b[0]) + } + + return string(buf), foundSentinel, nil +} + +func ReadTag(r io.Reader) (*Tag, error) { + e := new(Tag) + e.attributes = make(map[string]string) + var err error + var foundSentinel byte + + e.name, foundSentinel, err = ReadUntil(r, []byte{' ', '>'}) + if err != nil { + return nil, err } - var e Element - e.name = "bob" + for { + if foundSentinel == '>' { + break + } + + key, _, err := ReadUntil(r, []byte{'='}) + if err != nil { + return nil, err + } + + // Single and double quotation marks are significant. + peek := make([]byte, 1) + _, err = r.Read(peek) + if err != nil { + return nil, err + } + + var value string + if peek[0] == '\'' || peek[0] == '"' { + value, _, err = ReadUntil(r, peek) + b := make([]byte, 1) + _, err = r.Read(b) + foundSentinel = b[0] + } else { + value, foundSentinel, err = ReadUntil(r, []byte{' ', '>'}) + value = string(peek) + value + } + e.attributes[key] = value + } + + return e, err +} + +/* + Some elements are empty. Here's a list taken + from https://www.geeksforgeeks.org/html/what-are-empty-elements-in-html/ + var empty = []string{"area", "base", "br", "col", "embed", "hr", "img", "input", "link", "meta", "param", "source", "track", "wbr"} +*/ + +func Parse(r io.Reader) ([]any, error) { + var document []any + for { + s, _, err := ReadUntil(r, []byte{'<'}) + if err != nil { + if err.Error() == "Missing '<'" { + break + } else { + return nil, err + } + } + if s != "" { + document = append(document, s) + } + + e, err := ReadTag(r) + if err != nil { + return nil, err + } + document = append(document, e) + } - return e, nil + return document, nil } diff --git a/lib/parse/parse_test.go b/lib/parse/parse_test.go new file mode 100644 index 0000000..3bc837b --- /dev/null +++ b/lib/parse/parse_test.go @@ -0,0 +1,98 @@ +package parse + +import ( + "testing" + "strings" +) + +func TestReadUntil(t *testing.T) { + s := "until=" + want := "until" + msg, _, err := ReadUntil(strings.NewReader(s), []byte{'='}) + if err != nil { + t.Fatal(err) + } + if want != msg { + t.Errorf(`ReadUntil(strings.NewReader(s), []byte{'='}) = %q, %v want "", error`, msg, err) + } +} + +func TestReadTag(t *testing.T) { + tag := "<hello>" + want := new(Tag) + want.name = "hello" + r := strings.NewReader(tag) + b := make([]byte, 1) + // Consume '<' + _, _ = r.Read(b) + msg, err := ReadTag(r) + + if err != nil { + t.Fatal(err) + } + + if want.name != msg.name { + t.Errorf(`ReadTag(strings.NewReader("<hello>")) = %q, %v, want "", error`, msg.name, err) + } +} + +func TestReadTagAttributes(t *testing.T) { + tag := `<hello attribute=value and="another one">` + want := new(Tag) + want.attributes = make(map[string]string) + want.attributes["attribute"] = "value" + want.attributes["and"] = "another one" + r := strings.NewReader(tag) + b := make([]byte, 1) + // Consume '<' + _, _ = r.Read(b) + msg, err := ReadTag(r) + + if err != nil { + t.Fatal(err) + } + + if want.attributes["attribute"] != msg.attributes["attribute"] { + t.Errorf(`ReadTag(strings.NewReader(<hello attribute=value and="another one">)) = %q, %v, want "", error`, msg.attributes["attribute"], err) + } +} + +func TestParseTagContents(t *testing.T) { + elementString := `<p>Contents</p>` + want := make([]any, 3) + { + e := new(Tag) + e.name = "p" + want[0] = e + } + want[1] = "Contents" + { + e := new(Tag) + e.name = "/p" + want[2] = e + } + + msg, err := Parse(strings.NewReader(elementString)) + if err != nil { + t.Fatal(err) + } + + for e := range msg { + same := true + switch msg[e].(type) { + case Tag: + same = msg[e].(Tag).name == want[e].(Tag).name + for k := range msg[e].(Tag).attributes { + if msg[e].(Tag).attributes[k] != want[e].(Tag).attributes[k] { + same = false + break + } + } + case string: + same = msg[e] == want[e] + } + if !same { + t.Errorf(`Parse(strings.NewReader(elementString)) = %q, %v, want "", error`, msg[e], err) + } + } +} |