package translate import ( "io" "pgset/lib/parse" "errors" "strings" "regexp" ) type State struct { // Which type of anchor was last begun: href or id? lastAnchor string // Consume input until further notice? consume bool // Stay on the same line of output until further notice? sameLine bool // Stat on the same line of output at least for this input? sameLineOnce bool // metadata title string language string creator string subject string } func chapter(t *parse.Tag, language string) (string, error) { } func div(t *parse.Tag, language string, state *State) (string, error) { // Divs specify sections of the text, via the "class" attribute // class: // - chapter: new chapter // - project-gutenberg-license: license text // - secthead: sections of license text // - fig: image // - container: meaningless // id: // - pg-start-separator: start of actual text // - pg-end-separator: end of actual text // - pg-header-authlist: list of authors types := []string{"href", "id"} // Least bad expression of this idea in this language classes := map[string]func (*parse.Tag, string) (string, error) { "chapter": chapter, "project-gutenberg-license": project-gutenberg-license, "secthead": secthead, "fig": fig, "container": container, } ids := map[string]func (*parse.Tag, string) (string, error) { "pg-start-separator": pg-start-separator, "pg-end-separator": pg-end-separator, "pg-header-authlist": pg-header-authlist, } if _, ok := t.Attributes["class"]; ok { } else if _, ok := t.Attributes["id"]; ok { } else { // junk } } func style(state *State) { state.consume = true } func slashStyle(state *State) { state.consume = false } func href(t *parse.Tag, language string) (string, error) { switch language { case "Neatroff": // .post.url #chapter23\ntext\n\n return ".post.url " + t.Attributes["href"], nil default: return "", errors.New("Unimplemented") } } func id(t *parse.Tag, language string) (string, error) { switch language { case "Neatroff": // .post.name chapter23 return ".post.name " + strings.Replace(t.Attributes["id"], "#", "", 1), nil default: return "", errors.New("Unimplemented") } } func a(t *parse.Tag, language string, state *State) (string, error) { // There are two kinds of anchors. // href: refer to a resource, internal or external // id: serve as an anchor to be referred to types := []string{"href", "id"} // Least bad expression of this idea in this language m := map[string]func (*parse.Tag, string) (string, error) { "href": href, "id": id, } for _, i := range types { if _, ok := t.Attributes[i]; ok { state.lastAnchor = i s, err := m[i](t, language) if err != nil { return "", err } return s, nil } } return "", errors.New("Broken anchor. " + t.String()) } func slashA(t *parse.Tag, language string, state *State) (string, error) { var s string switch state.lastAnchor { case "href": fallthrough case "id": fallthrough default: s = "\n" } state.lastAnchor = "" return s, nil } func meta(t *parse.Tag, language string, state *State) (string, error) { // Multiple types of this tag. Only those with "name" attribute are interesting s := "" if _, ok := t.Attributes["name"]; ok { s += "name" } return s, nil } func i(state *State) (string, error) { state.sameLine = true return `\fI`, nil } func slashI(state *State) (string, error) { state.sameLine = false state.sameLineOnce = true return `\fP`, nil } func strong(state *State) (string, error) { state.sameLine = true return `\fB`, nil } func slashStrong(state *State) (string, error) { return slashI(state) } var FilterMap = map[string]string{ // HTML Entities be gone "\\&[^;]*;": "", // hi—there → hi – there "([A-Za-z])—([A-Za-z])": "\\1 – \\2", } func Filter(in []byte) ([]byte, error) { for k, v := range FilterMap { regexp, err := regexp.Compile(k) if err != nil { return []byte{}, err } in = regexp.ReplaceAll(in, []byte(v)) } return in, nil } // Among all languages, these are defined: // - Prefix: What should come before a string of body text // - Postfix: What should come after a string of body text // - Header: Start of document // - Footer: End of document // // Maps for these tags, beginning and end if non-empty: // - a: anchors to bookmarks and urls, switch on attribute // - img: images // - p: paragraphs // - meta, link: metadata // - h{1,2,3,4,5}: headings // - strong, i: emphasis // - table, tbody, tr, td: Table of Contents // - div: chapter separations // - br: line breaks // - blockquote: block quotations // - hr: horizontal rule // // All other tags are ignored. // - span: not meaningful, essentially empty // - body: not meaningful // - head: not meaningful // - Non-standard and atypical extensions var Definitions = map[string]map[string]any{ "ConTeXt": { // Must generate. "a": a, "/a": ``, // Standardized metadata. // charset: character set // "meta": func(t *parse.Tag) (string, error) { var s string for k, v := range t.Attributes { s = k + v } return s, nil }, "/meta": ``, "div": func(t *parse.Tag) (string, error) { var s string for k, v := range t.Attributes { s = k + v } return s, nil }, "/div": ``, "Header": func(t *parse.Tag) (string, error) { var s string for k, v := range t.Attributes { s = k + v } return s, nil }, "Footer": func(t *parse.Tag) (string, error) { var s string for k, v := range t.Attributes { s = k + v } return s, nil }, // link tags are empty "link": func(t *parse.Tag) (string, error) { var s string for k, v := range t.Attributes { s = k + v } return s, nil }, // img tags are empty "img": func(t *parse.Tag) (string, error) { var s string for k, v := range t.Attributes { s = k + v } return s, nil }, // Constants. "p": ``, "/p": ``, "i": ``, "/i": ``, "strong": ``, "/strong": ``, "table": ``, "/table": ``, "tr": ``, "/tr": ``, "tbody": ``, "/tbody": ``, "td": ``, "/td": ``, "h1": ``, "/h1": ``, "h2": ``, "/h2": ``, "h3": ``, "/h3": ``, "h4": ``, "/h4": ``, "h5": ``, "/h5": ``, "blockquote": ``, "/blockquote": ``, // br tags are empty "br": ``, // hr tags are empty "hr": ``, "Prefix": ``, "Postfix": ``, }, "kerTeX": {}, "XeTeX": {}, "LaTeX memoir": {}, "Neatroff": { "Header": `.ad pb .pmll 20 999999 .ss 12 0 .ssh 15 .kn 1 .hlm 2 .ds margin 1.25i .po \*[margin] .de header ' sp \*[margin] .. .de footer ' bp .. .de pg . ti +1m .. .de empty .. .blm empty .wh 0 header .wh -\*[margin] footer .ds measure 6i .ll \*[measure] .ds body_size 11 .ds body_spacing 14 .ps \*[body_size] .vs \*[body_spacing] .de reset . ps \*[body_size] . vs \*[body_spacing] .. .de p . ti +1m .. .de /p .. .de i .. .de /i .. .de div . sp .. .de /div . sp .. .\" As the file is traversed, record .sy to a temporary file. .\" Format using sed, source using .so. Idea taken from utmac. .\" Then, at the document's end, write the TOC from the recording. .de a .. .de /a .. .de strong . ft B .. .de /strong . ft .. .de span .. .de /span .. .de h1 . ft \*[FONT_DISPLAY] . ps 60 . vs 80 .. .de /h1 . reset .. .de h2 . ft \*[FONT_SUBHEAD] . ps 40 . vs 60 .. .de /h2 . reset .. .de h3 . h2 . ps 30 . vs 20 .. .de /h3 . /h2 .. .ds page_width 8.5i .ds page_height 11i .ds top_margin 1i .ds side_margin 2i .ds bottom_margin \*[side_margin] .\" .po \*[side_margin] .ll \*[page_width]-(\*[side_margin]*2u) .de head ' sp \*[top_margin] .. .wh 0 head .de foot ' bp .. .wh -\*[bottom_margin] foot`, "Footer": "\n.bp\n", "Prefix": "", "Postfix": "", "NAE": ``, "a": a, "/a": slashA, // Constants. "p": `.pg`, "/p": ``, "i": i, "/i": slashI, "strong": strong, "/strong": slashStrong, "table": ``, "/table": ``, "tr": ``, "/tr": ``, "tbody": ``, "/tbody": ``, "td": ``, "/td": ``, "h1": ``, "/h1": ``, "h2": ``, "/h2": ``, "h3": ``, "/h3": ``, "h4": ``, "/h4": ``, "h5": ``, "/h5": ``, "blockquote": ``, "/blockquote": ``, // br tags are empty "br": ``, // hr tags are empty "hr": ``, "head": ``, "/head": ``, // Unnecessary copy of title "title": style, "/title": slashStyle, "style": style, "/style": slashStyle, "html": ``, "/html": ``, "link": ``, "meta": ``, "body": ``, "/body": ``, "div": ``, "/div": ``, "section": ``, "/section": ``, "span": ``, "/span": ``, "li": ``, "/li": ``, "ul": ``, "/ul": ``, "img": ``, }, "Utmac": {}, "groff": {}, "Heirloom Troff": {}, "SILE": {}, "Typst": {}, } // These keys defined: // - "paper size" // - "type family" // - "columns" // - "justification" // - "margins" // - "headers" // - "page numbers" var Styles = map[string]map[string]map[string]any{ "Margins": { "Neatroff": { "Wide": ``, "Thin": ``, }, "ConTeXt": {}, }, } var debug bool = false func Translate(document []any, language string, output io.Writer) error { output.Write([]byte(Definitions[language]["Header"].(string) + "\n")) state := new(State) var err error for _, i := range document { var out string switch t := i.(type) { case string: if debug { output.Write([]byte("|" + t + "|"+ "\n")) } out = Definitions[language]["Prefix"].(string) + t + Definitions[language]["Postfix"].(string) case *parse.Tag: if debug { output.Write([]byte(t.String() + "\n")) } switch u := Definitions[language][t.Name].(type) { case string: out = u case func(*parse.Tag) (string, error): out, err = u(t) case func(*parse.Tag, string, *State) (string, error): out, err = u(t, language, state) case func(*State) (string, error): out, err = u(state) case func(*State): u(state) out = "" case nil: out = "dne" + t.String() + "\n" } } if err != nil { return err } if out != "" && out != "\n" && !state.consume { if !state.sameLine && !state.sameLineOnce { out += "\n" } if state.sameLineOnce { state.sameLineOnce = false } output.Write([]byte(out)) } else { if debug { output.Write([]byte("empty" + "\n")) } } } output.Write([]byte(Definitions[language]["Footer"].(string) + "\n")) return nil }