summaryrefslogtreecommitdiff
path: root/win
diff options
context:
space:
mode:
Diffstat (limited to 'win')
-rw-r--r--win/hyp.c262
1 files changed, 0 insertions, 262 deletions
diff --git a/win/hyp.c b/win/hyp.c
deleted file mode 100644
index 697fed5..0000000
--- a/win/hyp.c
+++ /dev/null
@@ -1,262 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <hyphen.h>
-#include "config.h"
-
-/* Read a tag into a character array and return its length. */
-int
-readtag(char *tag, FILE *in, FILE *out)
-{
- char ch;
- int i = 0;
-
- /* Data after a space in a tag is irrelevant. */
- while ((ch = fgetc(in)) != '>' && ch != ' ') {
- fputc(ch, out);
- tag[i] = ch;
- ++i;
- }
-
- if (ch == ' ') {
- /* Seek to the end of the tag. */
- while (ch != '>'
- && ch != EOF) {
- fputc(ch, out);
- ch = fgetc(in);
- }
- if (ch == EOF) {
- return -1;
- }
- fputc(ch, out);
- }
- else
- fputc(ch, out);
-
- tag[i] = '\0';
- return i;
-}
-
-int
-cmptag(char *tag2, FILE *in, FILE *out)
-{
- char tag1[MAXWLEN];
- readtag(tag1, in, out);
-
- return strcmp(tag1, tag2);
-}
-
-int
-checktag(char *tag, int tagamt, FILE *in, FILE *out)
-{
- int i;
- for (i = 0; i < tagamt; ++i)
- if (strcmp(tag, taglist[i]) == 0)
- return i;
- return -1;
-}
-
-/* Check if a character should be skipped. */
-int
-checkskip(char ch, int skiplen)
-{
- int i;
- for (i = 0; i < skiplen; ++i)
- if (ch == skip[i])
- return 1;
-
- return 0;
-}
-
-const char *punct = "';.,\"!?:";
-/* Check if a character is punctuation. */
-int
-checkpunct(char ch)
-{
- int i;
- for (i = 0; punct[i] != '\0'; ++i)
- if (ch == punct[i])
- return 1;
-
- return 0;
-}
-
-const char *blank = " \n\r\t";
-/* Check if a character is a blank. */
-int
-checkblank(char ch)
-{
- int i;
- for (i = 0; blank[i] != '\0'; ++i)
- if (ch == blank[i])
- return 1;
-
- return 0;
-}
-
-/* Loop until the body is found. */
-int
-findbody(FILE *in, FILE *out)
-{
- char ch;
- while ((ch = fgetc(in)) != EOF) {
- fputc(ch, out);
- if (ch == '<'
- && cmptag("body", in, out) == 0)
- return 1;
- }
-
- return 0;
-}
-
-/* Hyphenate a word, by means of hyphen library.
-This is done so as to leverage sufficient hyphenation
-patterns, with the ones used here having been taken
-from those developed for TeX. */
-void
-hypword(char *word, int len, FILE *in, FILE *out, HyphenDict *dict)
-{
- if (len < MINWLEN) {
- fprintf(out, "%s", word);
- return;
- }
-
- char *hyphens = calloc(len + 6, sizeof(char));
- char *hyphword = calloc(len << 1, sizeof(char));
- char **rep = NULL;
- int *pos = NULL, *cut = NULL;
- hnj_hyphen_hyphenate2(dict, word, len, hyphens,
- hyphword, &rep, &pos, &cut);
-
-/* fprintf(stderr, "%s\n%s\n%s\n", word, hyphens, hyphword); */
-
- /* Process the given hyphenation. */
- int i;
- char oldch = ' ', oldoldch = '\0';
- for (i = 0; i < strlen(hyphword); ++i) {
- if (hyphword[i] == '=' && oldch != ' '
- && oldoldch != ' ')
- fputs("&shy;", out);
- else if (hyphword[i] != '=')
- fputc(hyphword[i], out);
-
- oldoldch = oldch;
- oldch = hyphword[i];
- }
-
- free(hyphens);
- free(hyphword);
-}
-
-/* Hyphenate the words within a tag. */
-void
-hyptag(FILE *in, FILE *out, int skiplen, char *tag, HyphenDict *dict)
-{
- char ch, word[MAXWLEN], term[MAXWLEN] = "/";
- int i = 0;
-
- strcat(term, tag);
-
- while ((ch = fgetc(in)) != EOF) {
- if (i < 0) {
- fputc(ch, out);
- ++i;
- if (checkskip(ch, skiplen)) i -= 3;
- continue;
- }
- if (checkblank(ch)) {
- word[i] = '\0';
- hypword(word, i, in, out, dict);
- i = 0;
- }
- if (checkpunct(ch)) {
- word[i] = '\0';
- hypword(word, i, in, out, dict);
- fputc(ch, out);
- i = 0;
- }
- else if (checkskip(ch, skiplen)) {
- word[i] = '\0';
- fputs(word, out);
- fputc(ch, out);
- /* A simple way of working around
- HTML character codes. Each is 5 ( epsiv )
- or 6 ( hellip ) characters long, plus '&' and ';'. */
- i = -3;
- }
- /* Check for closing tag. */
- else if (ch == '<') {
- word[i] = ch;
- ++i;
- word[i] = '\0';
- hypword(word, i, in, out, dict);
- i = 0;
- readtag(word, in, out);
- if (strcmp(word, term) == 0) break;
- }
- else {
- word[i] = ch;
- ++i;
- }
- if (i == MAXWLEN - 1) {
- word[i] = '\0';
- hypword(word, i, in, out, dict);
- i = 0;
- }
- }
-}
-
-/* Hyphenate HTML input via `&shy;'.
-hyp [in] [out] */
-int
-main(int argc, char **argv)
-{
- FILE *in;
- if (argc < 2)
- in = stdin;
- else {
- in = fopen(argv[1], "r");
- if (in == NULL) {
- printf("%s %s\n", argv[1], "inaccessible.");
- return 1;
- }
- }
-
- FILE *out;
- if (argc < 3)
- out = stdout;
- else {
- out = fopen(argv[2], "w");
- if (out == NULL) {
- printf("%s %s\n", argv[2], "inaccessible.");
- return 2;
- }
- }
-
- if (findbody(in, out) == 0) {
- puts("There is no body.");
- return 3;
- }
-
- HyphenDict *dict = hnj_hyphen_load(dictfile);
- if (dict == NULL) {
- puts("Dict not readable.");
- return 4;
- }
- dict->utf8 = 1;
-
- int tagamt = 0;
- while (taglist[tagamt][0] != '\0')
- ++tagamt;
- int skiplen = strlen(skip);
-
- char ch, tag[MAXWLEN];
- int len;
- while ((ch = fgetc(in)) != EOF) {
- fputc(ch, out);
- if (ch == '<' && (len = readtag(tag, in, out)) > 0
- && checktag(tag, tagamt, in, out) != -1)
- hyptag(in, out, skiplen, tag, dict);
- }
- return 0;
-}