diff options
Diffstat (limited to 'win')
| -rw-r--r-- | win/hyp.c | 262 | 
1 files changed, 0 insertions, 262 deletions
diff --git a/win/hyp.c b/win/hyp.c deleted file mode 100644 index 697fed5..0000000 --- a/win/hyp.c +++ /dev/null @@ -1,262 +0,0 @@ -#include <stdio.h> -#include <string.h> -#include <stdlib.h> -#include <hyphen.h> -#include "config.h" - -/* Read a tag into a character array and return its length. */ -int -readtag(char *tag, FILE *in, FILE *out) -{ -	char ch; -	int i = 0; - -	/* Data after a space in a tag is irrelevant. */ -	while ((ch = fgetc(in)) != '>' && ch != ' ') { -		fputc(ch, out); -		tag[i] = ch; -		++i; -	} - -	if (ch == ' ') { -		/* Seek to the end of the tag. */ -		while (ch != '>' -		&& ch != EOF) { -			fputc(ch, out); -			ch = fgetc(in); -		} -		if (ch == EOF) { -			return -1; -		} -		fputc(ch, out); -	} -	else -		fputc(ch, out); - -	tag[i] = '\0'; -	return i; -} - -int -cmptag(char *tag2, FILE *in, FILE *out) -{ -	char tag1[MAXWLEN]; -	readtag(tag1, in, out); - -	return strcmp(tag1, tag2); -} - -int -checktag(char *tag, int tagamt, FILE *in, FILE *out) -{ -	int i; -	for (i = 0; i < tagamt; ++i) -		if (strcmp(tag, taglist[i]) == 0) -			return i; -	return -1; -} - -/* Check if a character should be skipped. */ -int -checkskip(char ch, int skiplen) -{ -	int i; -	for (i = 0; i < skiplen; ++i) -		if (ch == skip[i]) -			return 1; - -	return 0; -} - -const char *punct = "';.,\"!?:"; -/* Check if a character is punctuation. */ -int -checkpunct(char ch) -{ -	int i; -	for (i = 0; punct[i] != '\0'; ++i) -		if (ch == punct[i]) -			return 1; - -	return 0; -} - -const char *blank = " \n\r\t"; -/* Check if a character is a blank. */ -int -checkblank(char ch) -{ -	int i; -	for (i = 0; blank[i] != '\0'; ++i) -		if (ch == blank[i]) -			return 1; - -	return 0; -} - -/* Loop until the body is found. */ -int -findbody(FILE *in, FILE *out) -{ -	char ch; -	while ((ch = fgetc(in)) != EOF) { -		fputc(ch, out); -		if (ch == '<' -		&& cmptag("body", in, out) == 0) -			return 1; -	} - -	return 0; -} - -/* Hyphenate a word, by means of hyphen library. -This is done so as to leverage sufficient hyphenation -patterns, with the ones used here having been taken -from those developed for TeX. */ -void -hypword(char *word, int len, FILE *in, FILE *out, HyphenDict *dict) -{ -	if (len < MINWLEN) { -		fprintf(out, "%s", word); -		return; -	} - -	char *hyphens = calloc(len + 6, sizeof(char)); -	char *hyphword = calloc(len << 1, sizeof(char)); -	char **rep = NULL; -	int *pos = NULL, *cut = NULL; -	hnj_hyphen_hyphenate2(dict, word, len, hyphens, -		hyphword, &rep, &pos, &cut); - -/*	fprintf(stderr, "%s\n%s\n%s\n", word, hyphens, hyphword); */ - -	/* Process the given hyphenation. */ -	int i; -	char oldch = ' ', oldoldch = '\0'; -	for (i = 0; i < strlen(hyphword); ++i) { -		if (hyphword[i] == '=' && oldch != ' ' -			&& oldoldch != ' ') -			fputs("­", out); -		else if (hyphword[i] != '=') -			fputc(hyphword[i], out); - -		oldoldch = oldch; -		oldch = hyphword[i]; -	} - -	free(hyphens); -	free(hyphword); -} - -/* Hyphenate the words within a tag. */ -void -hyptag(FILE *in, FILE *out, int skiplen, char *tag, HyphenDict *dict) -{ -	char ch, word[MAXWLEN], term[MAXWLEN] = "/"; -	int i = 0; - -	strcat(term, tag); - -	while ((ch = fgetc(in)) != EOF) { -		if (i < 0) { -			fputc(ch, out); -			++i; -			if (checkskip(ch, skiplen)) i -= 3; -			continue; -		} -		if (checkblank(ch)) { -			word[i] = '\0'; -			hypword(word, i, in, out, dict); -			i = 0; -		} -		if (checkpunct(ch)) { -			word[i] = '\0'; -			hypword(word, i, in, out, dict); -			fputc(ch, out); -			i = 0; -		} -		else if (checkskip(ch, skiplen)) { -			word[i] = '\0'; -			fputs(word, out); -			fputc(ch, out); -			/* A simple way of working around -			HTML character codes. Each is 5 ( epsiv ) -			 or 6 ( hellip ) characters long, plus '&' and ';'. */ -			i = -3; -		}		 -		/* Check for closing tag. */ -		else if (ch == '<') { -			word[i] = ch; -			++i; -			word[i] = '\0'; -			hypword(word, i, in, out, dict); -			i = 0; -			readtag(word, in, out); -			if (strcmp(word, term) == 0) break; -		} -		else { -			word[i] = ch; -			++i; -		} -		if (i == MAXWLEN - 1) { -			word[i] = '\0'; -			hypword(word, i, in, out, dict); -			i = 0; -		} -	} -} - -/* Hyphenate HTML input via `­'. -hyp [in] [out] */ -int -main(int argc, char **argv) -{ -	FILE *in; -	if (argc < 2) -		in = stdin; -	else { -		in = fopen(argv[1], "r"); -		if (in == NULL) { -			printf("%s %s\n", argv[1], "inaccessible."); -			return 1; -		} -	} - -	FILE *out; -	if (argc < 3) -		out = stdout; -	else { -		out = fopen(argv[2], "w"); -		if (out == NULL) { -			printf("%s %s\n", argv[2], "inaccessible."); -			return 2; -		} -	} - -	if (findbody(in, out) == 0) { -		puts("There is no body."); -		return 3; -	} - -	HyphenDict *dict = hnj_hyphen_load(dictfile); -	if (dict == NULL) { -		puts("Dict not readable."); -		return 4; -	} -	dict->utf8 = 1; - -	int tagamt = 0; -	while (taglist[tagamt][0] != '\0') -		++tagamt; -	int skiplen = strlen(skip); - -	char ch, tag[MAXWLEN]; -	int len; -	while ((ch = fgetc(in)) != EOF) { -		fputc(ch, out); -		if (ch == '<' && (len = readtag(tag, in, out)) > 0 -			&& checktag(tag, tagamt, in, out) != -1) -			hyptag(in, out, skiplen, tag, dict); -	} -	return 0; -}  | 
