From 315e747a4c067752dfc69d3d2d6dc23f0c30d8c0 Mon Sep 17 00:00:00 2001 From: kaa Date: Wed, 3 Jul 2024 08:41:58 -0700 Subject: Public release. --- sift.go | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 132 insertions(+) create mode 100644 sift.go (limited to 'sift.go') diff --git a/sift.go b/sift.go new file mode 100644 index 0000000..284a8fa --- /dev/null +++ b/sift.go @@ -0,0 +1,132 @@ +// Generate CSV from Google Books queries. +// authors, title, publishedDate, publisher, ISBN-13, ISBN-10, URL +package main + +import ( + "bufio" + "encoding/json" + "errors" + "fmt" + "io" + "io/fs" + "log" + "os" + "strings" +) + +func readFile(name string) []byte { + in, err := os.Open(name) + if err != nil { + log.Fatal(err) + } + defer in.Close() + body, err := io.ReadAll(in) + if err != nil { + log.Fatal(err) + } + + return body +} + +func write(writer *bufio.Writer, contents string) { + _, err := writer.Write([]byte(contents)) + if err != nil { + log.Fatal(err) + } +} + +/* Unnecessary complication. +Only one entry needs to be in memory at a time. +type Entry struct { + authors: string + title: string + publishedDate: string + isbn13: string + isbn10: string + url: string +}*/ + +func main() { + // Read a list of file names, named after google books queries + var fullTitle, authors, publishedDate, isbn13, isbn10, url string + + for _, title := range strings.Split(string(readFile("list")), "\n") { + if (len(title) < 3) { + continue + } + + fmt.Println(title) + + // For each title, create csv file + var out *os.File + _, err := os.Stat(title + ".csv") + if errors.Is(err, fs.ErrNotExist) { + //log.Print(err) + out, err = os.Create(title + ".csv") + if err != nil { + log.Fatal(err) + } + } else if err != nil { + log.Fatal(err) + } else { + out, err = os.Open(title + ".csv") + if err != nil { + log.Fatal(err) + } + } + defer out.Close() + + outWriter := bufio.NewWriter(out) + //outWriter := bufio.NewWriter(os.Stdout) + + // Print CSV header + write(outWriter, "Title, Authors, Published Date, ISBN-13, ISBN-10, URL\n") + defer outWriter.Flush() + + // In each provided file, JSON results + rawJson := readFile(title) + var data interface{} + // Parse the JSON into an associative array + json.Unmarshal(rawJson, &data) + // Get through the cruft + m := data.(map[string]interface{}) + for _, subm := range m["items"].([]interface{}) { + switch subm.(type) { + case map[string]interface{}: + m := subm.(map[string]interface{}) + m = m["volumeInfo"].(map[string]interface{}) + isbns := m["industryIdentifiers"].([]interface{}) + for _, v := range isbns { + isbn := v.(map[string]interface{}) + isbnText := isbn["identifier"].(string) + // ISBN-10 is sometimes 9 digits, ISBN-13 is always 13 digits + if len(isbnText) == 13 { + isbn13 = isbnText + } else { + isbn10 = isbnText + } + } + + authorsMap := m["authors"].([]interface{}) + for i, author := range authorsMap { + authorText := author.(string) + if i > 0 { + authors += " & " + authors += authorText + } else { + authors = authorText + } + } + + fullTitle = strings.Replace(m["title"].(string), ",", "", -1) + if m["publishedDate"] != nil { + publishedDate = m["publishedDate"].(string) + } + url = m["infoLink"].(string) + default: + } + write(outWriter, fmt.Sprintf("%s,%s,%s,%s,%s,%s\n", fullTitle, authors, publishedDate, isbn13, isbn10, url)) + + } + } +} -- cgit v1.2.3