PERFORCE change 144011 for review
Konrad Jankowski
konrad at FreeBSD.org
Tue Jun 24 06:46:39 UTC 2008
http://perforce.freebsd.org/chv.cgi?CH=144011
Change 144011 by konrad at vspredator on 2008/06/24 06:46:07
style(9) fixes.
Additional functionalty added.
Affected files ...
.. //depot/projects/soc2008/konrad_collation/colldef/colldef.c#4 edit
Differences ...
==== //depot/projects/soc2008/konrad_collation/colldef/colldef.c#4 (text+ko) ====
@@ -37,7 +37,6 @@
#include <strings.h>
#include <unistd.h>
-#define DEFAULT_IN_FILE "../colldef_in.src.simple"
#define DEFAULT_OUT_FILE "LC_COLLATE"
#define WEIGHT_TABLE_SIZE (1 << 16)
#define NWEIGHTS 4
@@ -45,21 +44,22 @@
struct sym_entry {
char *name;
int val;
- SLIST_ENTRY(sym_entry) sym_next;
+ SLIST_ENTRY(sym_entry) next;
};
-SLIST_HEAD(, sym_entry) head;
+static SLIST_HEAD(, sym_entry) head;
/*
* This will have to be a structure, to at least accomodate symbol chaining.
*/
-uint8_t weight_table[WEIGHT_TABLE_SIZE][NWEIGHTS];
-struct weight_uncompressed {
+static uint8_t weight_table[WEIGHT_TABLE_SIZE][NWEIGHTS];
+
+static struct weight_uncompressed {
uint16_t w[NWEIGHTS];
char used;
} weights_uncompressed[WEIGHT_TABLE_SIZE];
-int verbose = 0;
+static int verbose = 0;
void usage(char *name);
void assign_weights(int codepoint, char *weights);
@@ -76,9 +76,10 @@
if ((sym = malloc(sizeof(*sym))) == NULL)
err(1, "add_symbol: malloc(%d)", sizeof(*sym));
- sym->name = strdup(name);
+ if ((sym->name = strdup(name)) == NULL)
+ err(1, "add_symbol: strdup(%d)", strlen(name) + 1);
sym->val = counter++;
- SLIST_INSERT_HEAD(&head, sym, sym_next);
+ SLIST_INSERT_HEAD(&head, sym, next);
}
/*
@@ -90,10 +91,10 @@
{
struct sym_entry *sym;
- SLIST_FOREACH(sym, &head, sym_next) {
+ SLIST_FOREACH(sym, &head, next)
if (strcmp(sym->name, name) == 0)
return sym->val;
- }
+
return 0;
}
@@ -102,10 +103,9 @@
{
struct sym_entry *sym;
- SLIST_FOREACH(sym, &head, sym_next) {
+ SLIST_FOREACH(sym, &head, next)
printf("sym->name=%s sym->val=%d\n",
sym->name, sym->val);
- }
}
void
@@ -122,6 +122,10 @@
}
}
+/*
+ * Decode a unicode codepoint stored in UTF-8 format, each byte coded
+ * as a hexadecimal constant.
+ */
int
get_codepoint(char *p, char **p_end)
{
@@ -131,7 +135,10 @@
wchar_t out = 0;
do {
- /* without leading "0x" will also work */
+ /*
+ * Scanf without leading "0x" will also work,
+ * but we don't have such cases in our input data.
+ */
p[0] = '0';
sscanf(p, "%x%n", &tmp, &len);
p += len;
@@ -141,30 +148,31 @@
len = mbtowc(&out, synthesis, i);
assert(len == i || synthesis[0] == 0);
if (p_end != NULL)
- *p_end = p; /* return where we got to */
+ *p_end = p; /* Return where we got to. */
return tmp;
}
void
-process_file(char *name)
+process_file(FILE *f)
{
unsigned codepoint = 0;
char buf[512], *p;
bzero(weights_uncompressed, sizeof(weights_uncompressed));
SLIST_INIT(&head);
- if (freopen(name, "r", stdin) == NULL)
- err(1, "freopen: %s", name);
- while (fgets(buf, sizeof(buf), stdin)) {
+ while (fgets(buf, sizeof(buf), f)) {
#ifdef DEBUG
fputs(buf, stdout);
#endif
buf[strlen(buf) - 1] = 0;
switch (buf[0]) {
case '<':
- /*assert(buf[1] == 'X');*/
- if (buf[1] == 'X') /* XXX */
+ /*
+ * XXX. The assumption here is that collating
+ * symbols start with 'X'.
+ */
+ if (buf[1] == 'X')
add_symbol(buf);
break;
case '\\':
@@ -174,10 +182,15 @@
}
}
-
- fclose(stdin); /* not really needed - freopen closes */
}
+/*
+ * This function decodes a weight, which can be given as:
+ * 1. a symbol name in angle brackets - in which case we have to look it up in
+ * our symbol table.
+ * 2. IGNORE keyword - this weight should be ignored.
+ * 3. A literal hexadecimal value prefixed with "\x".
+ */
int
get_weight_val(char **p)
{
@@ -189,17 +202,18 @@
case '<':
p3 = strchr(p2, '>');
assert(p3);
- p3[1] = 0; /* truncate the string for get_symval */
+ p3[1] = 0; /* Truncate the string for get_symval. */
ret = get_symval(p2);
- *p = p3 + 2; /* skip over ';' also */
- p3[1] = ';'; /* restore string; we shouldn't care... */
+ if (ret == 0)
+ errx(1, "get_weight_val: symbol %s not found",
+ p2);
+ *p = p3 + 2; /* Skip over ';' also. */
+ p3[1] = ';'; /* Restore string; we shouldn't care... */
return ret;
- case 'I':
- /* IGNORE */
- *p += 7;
- return 0; /* IGNORE means 0 (I think) */
- case '\\':
- /* we get literal value, instead of symbol */
+ case 'I': /* IGNORE */
+ *p += 7; /* IGNORE has 6 letters, + ';'. */
+ return 0; /* IGNORE means 0 (I think). */
+ case '\\': /* Literal value. Decode it. */
ret = get_codepoint(p2, &p3);
*p = p3 + 1;
return ret;
@@ -209,21 +223,28 @@
}
}
+/*
+ * Take a string of four weights, separated by semicolons,
+ * decode them and assign to the weight table at the given codepoint
+ * position.
+ */
void
assign_weights(int codepoint, char *weights)
{
- int i;
+ int i, val;
assert(weights != NULL);
weights_uncompressed[codepoint].used = 1;
for (i = 0; i < 4; i++) {
- int val;
-
val = get_weight_val(&weights);
weights_uncompressed[codepoint].w[i] = val;
}
}
+/*
+ * Assign new weight value to all codepoint with the given value 'val'.
+ * Do this only for the first level (w[0]).
+ */
void
reduce(int val, int new_val)
{
@@ -242,15 +263,15 @@
* Find 2 minimums from the given set.
* Optimised to only make one pass throught the set. (data locality)
*/
-#define MIN_MAX (1<<15)
+#define MIN_MAX (1 << 15)
int
find_min(int start, int *min2_ret)
{
int min = MIN_MAX, min2, min3 = MIN_MAX;
- int i;
+ int i, val;
for (i = 0; i < WEIGHT_TABLE_SIZE; i++) {
- int val = weights_uncompressed[i].w[0];
+ val = weights_uncompressed[i].w[0];
/* 1. case, at first we find no a minimum */
if (val >= start && val < min) {
min2 = min;
@@ -275,15 +296,14 @@
int i;
printf("%d: ", level);
- for (i = 0; i < WEIGHT_TABLE_SIZE; i++) {
+ for (i = 0; i < WEIGHT_TABLE_SIZE; i++)
if (weights_uncompressed[i].used &&
weights_uncompressed[i].w[0] == level)
printf(" (%d %d %d)", weights_uncompressed[i].w[1],
- weights_uncompressed[i].w[2],
- weights_uncompressed[i].w[3]);
+ weights_uncompressed[i].w[2],
+ weights_uncompressed[i].w[3]);
- }
- printf("\n");
+ putchar('\n');
}
/*
@@ -319,66 +339,75 @@
}
void
-binary_output(char *out_file)
+binary_output(FILE *f)
{
int i, j;
- int out;
-
- /*
- * I just use open and write, instead of stdio in this case.
- * This program isn't meant to be portable from UNIX.
- */
- if ((out = open(out_file, O_WRONLY | O_CREAT | O_TRUNC, 0644)) == -1)
- err(1, "open(%s)", out_file);
+
for (i = 0; i < WEIGHT_TABLE_SIZE; i++)
for (j = 0; j < NWEIGHTS; j++)
weight_table[i][j] = weights_uncompressed[i].w[j];
- if (write(out, weight_table, sizeof(weight_table)) !=
- sizeof(weight_table))
+ if (fwrite(weight_table, sizeof(weight_table), 1, f) != 1)
errx(1, "not full write");
- close(out);
}
/*
- * I divided the process into 3 main functions, so we could optionally
- * process multiple input files with better argument processing.
- * Another questionable optimisation.
+ * I divided the process into 3 main functions, so we can
+ * process multiple input files with one call.
*/
int
main(int argc, char *argv[])
{
+ FILE *f, *of;
int ch;
- char *in_file = DEFAULT_IN_FILE;
- char *out_file = DEFAULT_OUT_FILE;
+ char name[512];
- while ((ch = getopt(argc, argv, "hf:o:v")) != -1) {
+ while ((ch = getopt(argc, argv, "h:v")) != -1) {
switch (ch) {
- case 'f':
- in_file = optarg;
- break;
- case 'o':
- out_file = optarg;
- break;
case 'v':
verbose = 1;
- break;
+ break;
default:
usage(argv[0]);
}
}
+ argv += optind;
+ argc -= optind;
/*
- * need to setlocale to an UTF-8 locale, so thet
- * mbtowc works correctly
+ * Need to setlocale to an UTF-8 locale, so that
+ * mbtowc works correctly.
*/
setlocale(LC_ALL, "en_US.UTF-8");
- process_file(in_file);
+ if (argc) for (; argc; argc--, argv++) {
+ if ((f = fopen(argv[0], "r")) == NULL)
+ err(1, "fopen: %s", argv[0]);
+ process_file(f);
+ fclose(f);
+#ifdef DEBUG2
+ dump_table();
+#endif
+ compress_weights();
+ snprintf(name, sizeof(name),
+ "%s.%s", argv[0], DEFAULT_OUT_FILE);
+ if ((of = fopen(name, "w")) == NULL)
+ err(1, "fopen: %s", name);
+ binary_output(of);
+ fclose(of);
+ } else {
+ process_file(stdin);
#ifdef DEBUG2
- dump_table();
+ dump_table();
#endif
- compress_weights();
- binary_output(out_file);
+ compress_weights();
+ /*
+ * We could write to stdout here...
+ */
+ if ((of = fopen(DEFAULT_OUT_FILE, "w")) == NULL)
+ err(1, "fopen: %s", name);
+ binary_output(of);
+ fclose(of);
+ }
return 0;
}
@@ -387,9 +416,11 @@
usage(char *name)
{
printf( "usage: "
- "\t%s [-f input_file] [-o output_file]\n"
- "\tdefault output file is LC_COLLATE\n"
+ "\t%s [-h] [-v] [input_file_1] ... [input_file_n] \n"
+ "\t output file name is LC_COLLATE\n"
+ "\t if one or more input files given as arguments, "
+ "\t output file name is the input file name with\n"
+ "LC_COLLATE concatenaded.\n"
, name);
exit(1);
}
-
More information about the p4-projects
mailing list