[sword-cvs] icu-sword/source/tools/genprops/misc ucdmerge.c,NONE,1.1 ucdstrip.c,NONE,1.1 ucdstrip.pl,NONE,1.1
sword@www.crosswire.org
sword@www.crosswire.org
Tue, 9 Sep 2003 19:43:00 -0700
- Previous message: [sword-cvs] icu-sword/source/tools/genpname .cvsignore,NONE,1.1 Makefile.in,NONE,1.1 SyntheticPropertyAliases.txt,NONE,1.1 data.h,NONE,1.1 genpname.cpp,NONE,1.1 genpname.dsp,NONE,1.1 genpname.vcproj,NONE,1.1 preparse.pl,NONE,1.1
- Next message: [sword-cvs] icu-sword/source/samples/date .cvsignore,1.2,1.3 Makefile.in,1.3,1.4 date.c,1.3,1.4 date.dsp,1.3,1.4 date.dsw,1.2,1.3 date.sln,NONE,1.1 date.vcproj,NONE,1.1 readme.txt,NONE,1.1 uprint.c,1.2,1.3 uprint.h,1.2,1.3
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]
Update of /usr/local/cvsroot/icu-sword/source/tools/genprops/misc
In directory www:/tmp/cvs-serv19862/source/tools/genprops/misc
Added Files:
ucdmerge.c ucdstrip.c ucdstrip.pl
Log Message:
ICU 2.6 commit
--- NEW FILE: ucdmerge.c ---
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: ucdmerge.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003feb20
* created by: Markus W. Scherer
*
* Simple tool for Unicode Character Database files with semicolon-delimited fields.
* Merges adjacent, identical per-code point data lines into one line with range syntax.
*
* To compile, just call a C compiler/linker with this source file.
* On Windows: cl ucdmerge.c
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
static const char *
skipWhitespace(const char *s) {
while(*s==' ' || *s=='\t') {
++s;
}
return s;
}
/* return the first character position after the end of the data */
static char *
endOfData(const char *l) {
char *end;
char c;
end=strchr(l, '#');
if(end!=NULL) {
/* ignore whitespace before the comment */
while(l!=end && ((c=*(end-1))==' ' || c=='\t')) {
--end;
}
} else {
end=strchr(l, 0);
}
return end;
}
static int
sameData(const char *l1, const char *l2) {
char *end1, *end2;
int length;
/* find the first semicolon in each line - there must be one */
l1=strchr(l1, ';')+1;
l2=strchr(l2, ';')+1;
/* find the end of data: end of string or start of comment */
end1=endOfData(l1);
end2=endOfData(l2);
/* compare the line data portions */
length=end1-l1;
return length==(end2-l2) && 0==memcmp(l1, l2, length);
}
extern int
main(int argc, const char *argv[]) {
static char line[2000], firstLine[2000], lastLine[2000];
char *end;
long first, last, c;
int finished;
first=last=-1;
finished=0;
for(;;) {
if(gets(line)!=NULL) {
/* parse the initial code point, if any */
c=strtol(line, &end, 16);
if(end!=line && *skipWhitespace(end)==';') {
/* single code point followed by semicolon and data, keep c */
} else {
c=-1;
}
} else {
line[0]=0;
c=-1;
finished=1;
}
if(last>=0 && (c!=(last+1) || !sameData(firstLine, line))) {
/* output the current range */
if(first==last) {
/* there was no range, just output the one line we found */
puts(firstLine);
} else {
/* there was a real range, merge their lines */
end=strchr(lastLine, '#');
if(end==NULL) {
/* no comment in second line */
printf("%04lX..%04lX%s\n",
first, last, /* code point range */
strchr(firstLine, ';'));/* first line starting from the first ; */
} else if(strchr(firstLine, '#')==NULL) {
/* no comment in first line */
printf("%04lX..%04lX%s%s\n",
first, last, /* code point range */
strchr(firstLine, ';'), /* first line starting from the first ; */
end); /* comment from second line */
} else {
/* merge comments from both lines */
printf("%04lX..%04lX%s..%s\n",
first, last, /* code point range */
strchr(firstLine, ';'), /* first line starting from the first ; */
skipWhitespace(end+1)); /* comment from second line, after # and spaces */
}
}
first=last=-1;
}
if(c<0) {
if(finished) {
break;
}
/* no data on this line, output as is */
puts(line);
} else {
/* data on this line, store for possible range compaction */
if(last<0) {
/* set as the first line in a possible range */
first=last=c;
strcpy(firstLine, line);
lastLine[0]=0;
} else /* must be c==(last+1) && sameData() because of previous conditions */ {
/* continue with the current range */
last=c;
strcpy(lastLine, line);
}
}
}
return 0;
}
--- NEW FILE: ucdstrip.c ---
/*
*******************************************************************************
*
* Copyright (C) 2003, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: ucdstrip.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003feb20
* created by: Markus W. Scherer
*
* Simple tool for Unicode Character Database files with semicolon-delimited fields.
* Removes comments behind data lines but not in others.
*
* To compile, just call a C compiler/linker with this source file.
* On Windows: cl ucdstrip.c
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
/* return the first character position after the end of the data */
static char *
endOfData(const char *l) {
char *end;
char c;
end=strchr(l, '#');
if(end!=NULL) {
/* ignore whitespace before the comment */
while(l!=end && ((c=*(end-1))==' ' || c=='\t')) {
--end;
}
} else {
end=strchr(l, 0);
}
return end;
}
extern int
main(int argc, const char *argv[]) {
static char line[2000];
char *end;
while(gets(line)!=NULL) {
if(strtol(line, &end, 16)>=0 && end!=line) {
/* code point or range followed by semicolon and data, remove comment */
*endOfData(line)=0;
}
puts(line);
}
return 0;
}
--- NEW FILE: ucdstrip.pl ---
#!/usr/lib/perl -p
# Copyright (c) 2001-2003 International Business Machines
# Corporation and others. All Rights Reserved.
# Simple tool for Unicode Character Database files with semicolon-delimited fields.
# Removes comments behind data lines but not in others.
# The Perl option -p above runs a while(<>) loop and prints the expression output.
s/^([0-9a-fA-F]+.+?) *#.*/\1/;
- Previous message: [sword-cvs] icu-sword/source/tools/genpname .cvsignore,NONE,1.1 Makefile.in,NONE,1.1 SyntheticPropertyAliases.txt,NONE,1.1 data.h,NONE,1.1 genpname.cpp,NONE,1.1 genpname.dsp,NONE,1.1 genpname.vcproj,NONE,1.1 preparse.pl,NONE,1.1
- Next message: [sword-cvs] icu-sword/source/samples/date .cvsignore,1.2,1.3 Makefile.in,1.3,1.4 date.c,1.3,1.4 date.dsp,1.3,1.4 date.dsw,1.2,1.3 date.sln,NONE,1.1 date.vcproj,NONE,1.1 readme.txt,NONE,1.1 uprint.c,1.2,1.3 uprint.h,1.2,1.3
- Messages sorted by:
[ date ]
[ thread ]
[ subject ]
[ author ]