diff options
author | kaa <kaa@disroot.org> | 2023-11-10 11:38:45 -0800 |
---|---|---|
committer | kaa <kaa@disroot.org> | 2023-11-10 11:38:45 -0800 |
commit | f7532ab403f59514e7a4e6ce6de0c4ba2af3e7a9 (patch) | |
tree | be6f6ea26a14c62f930b3d44323bf48fad1905ad /win/hyp.c | |
parent | 3b85d827dfef9c1a0f58ee23657baa7682537f18 (diff) |
Diffstat (limited to 'win/hyp.c')
-rw-r--r-- | win/hyp.c | 262 |
1 files changed, 0 insertions, 262 deletions
diff --git a/win/hyp.c b/win/hyp.c deleted file mode 100644 index 697fed5..0000000 --- a/win/hyp.c +++ /dev/null @@ -1,262 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <hyphen.h> -#include "config.h" - -/* Read a tag into a character array and return its length. */ -int -readtag(char *tag, FILE *in, FILE *out) -{ - char ch; - int i = 0; - - /* Data after a space in a tag is irrelevant. */ - while ((ch = fgetc(in)) != '>' && ch != ' ') { - fputc(ch, out); - tag[i] = ch; - ++i; - } - - if (ch == ' ') { - /* Seek to the end of the tag. */ - while (ch != '>' - && ch != EOF) { - fputc(ch, out); - ch = fgetc(in); - } - if (ch == EOF) { - return -1; - } - fputc(ch, out); - } - else - fputc(ch, out); - - tag[i] = '\0'; - return i; -} - -int -cmptag(char *tag2, FILE *in, FILE *out) -{ - char tag1[MAXWLEN]; - readtag(tag1, in, out); - - return strcmp(tag1, tag2); -} - -int -checktag(char *tag, int tagamt, FILE *in, FILE *out) -{ - int i; - for (i = 0; i < tagamt; ++i) - if (strcmp(tag, taglist[i]) == 0) - return i; - return -1; -} - -/* Check if a character should be skipped. */ -int -checkskip(char ch, int skiplen) -{ - int i; - for (i = 0; i < skiplen; ++i) - if (ch == skip[i]) - return 1; - - return 0; -} - -const char *punct = "';.,\"!?:"; -/* Check if a character is punctuation. */ -int -checkpunct(char ch) -{ - int i; - for (i = 0; punct[i] != '\0'; ++i) - if (ch == punct[i]) - return 1; - - return 0; -} - -const char *blank = " \n\r\t"; -/* Check if a character is a blank. */ -int -checkblank(char ch) -{ - int i; - for (i = 0; blank[i] != '\0'; ++i) - if (ch == blank[i]) - return 1; - - return 0; -} - -/* Loop until the body is found. */ -int -findbody(FILE *in, FILE *out) -{ - char ch; - while ((ch = fgetc(in)) != EOF) { - fputc(ch, out); - if (ch == '<' - && cmptag("body", in, out) == 0) - return 1; - } - - return 0; -} - -/* Hyphenate a word, by means of hyphen library. -This is done so as to leverage sufficient hyphenation -patterns, with the ones used here having been taken -from those developed for TeX. */ -void -hypword(char *word, int len, FILE *in, FILE *out, HyphenDict *dict) -{ - if (len < MINWLEN) { - fprintf(out, "%s", word); - return; - } - - char *hyphens = calloc(len + 6, sizeof(char)); - char *hyphword = calloc(len << 1, sizeof(char)); - char **rep = NULL; - int *pos = NULL, *cut = NULL; - hnj_hyphen_hyphenate2(dict, word, len, hyphens, - hyphword, &rep, &pos, &cut); - -/* fprintf(stderr, "%s\n%s\n%s\n", word, hyphens, hyphword); */ - - /* Process the given hyphenation. */ - int i; - char oldch = ' ', oldoldch = '\0'; - for (i = 0; i < strlen(hyphword); ++i) { - if (hyphword[i] == '=' && oldch != ' ' - && oldoldch != ' ') - fputs("­", out); - else if (hyphword[i] != '=') - fputc(hyphword[i], out); - - oldoldch = oldch; - oldch = hyphword[i]; - } - - free(hyphens); - free(hyphword); -} - -/* Hyphenate the words within a tag. */ -void -hyptag(FILE *in, FILE *out, int skiplen, char *tag, HyphenDict *dict) -{ - char ch, word[MAXWLEN], term[MAXWLEN] = "/"; - int i = 0; - - strcat(term, tag); - - while ((ch = fgetc(in)) != EOF) { - if (i < 0) { - fputc(ch, out); - ++i; - if (checkskip(ch, skiplen)) i -= 3; - continue; - } - if (checkblank(ch)) { - word[i] = '\0'; - hypword(word, i, in, out, dict); - i = 0; - } - if (checkpunct(ch)) { - word[i] = '\0'; - hypword(word, i, in, out, dict); - fputc(ch, out); - i = 0; - } - else if (checkskip(ch, skiplen)) { - word[i] = '\0'; - fputs(word, out); - fputc(ch, out); - /* A simple way of working around - HTML character codes. Each is 5 ( epsiv ) - or 6 ( hellip ) characters long, plus '&' and ';'. */ - i = -3; - } - /* Check for closing tag. */ - else if (ch == '<') { - word[i] = ch; - ++i; - word[i] = '\0'; - hypword(word, i, in, out, dict); - i = 0; - readtag(word, in, out); - if (strcmp(word, term) == 0) break; - } - else { - word[i] = ch; - ++i; - } - if (i == MAXWLEN - 1) { - word[i] = '\0'; - hypword(word, i, in, out, dict); - i = 0; - } - } -} - -/* Hyphenate HTML input via `­'. -hyp [in] [out] */ -int -main(int argc, char **argv) -{ - FILE *in; - if (argc < 2) - in = stdin; - else { - in = fopen(argv[1], "r"); - if (in == NULL) { - printf("%s %s\n", argv[1], "inaccessible."); - return 1; - } - } - - FILE *out; - if (argc < 3) - out = stdout; - else { - out = fopen(argv[2], "w"); - if (out == NULL) { - printf("%s %s\n", argv[2], "inaccessible."); - return 2; - } - } - - if (findbody(in, out) == 0) { - puts("There is no body."); - return 3; - } - - HyphenDict *dict = hnj_hyphen_load(dictfile); - if (dict == NULL) { - puts("Dict not readable."); - return 4; - } - dict->utf8 = 1; - - int tagamt = 0; - while (taglist[tagamt][0] != '\0') - ++tagamt; - int skiplen = strlen(skip); - - char ch, tag[MAXWLEN]; - int len; - while ((ch = fgetc(in)) != EOF) { - fputc(ch, out); - if (ch == '<' && (len = readtag(tag, in, out)) > 0 - && checktag(tag, tagamt, in, out) != -1) - hyptag(in, out, skiplen, tag, dict); - } - return 0; -} |