blob: fca86767a270cae7cda5c90f458f2444af8a0bf0 [file] [log] [blame]
/*
*******************************************************************************
*
* Copyright (C) 2003-2013, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: ucdstrip.c
* encoding: US-ASCII
* tab size: 8 (not used)
* indentation:4
*
* created on: 2003feb20
* created by: Markus W. Scherer
*
* Simple tool for Unicode Character Database files with semicolon-delimited fields.
* Removes comments behind data lines but not in others.
*
* To compile, just call a C compiler/linker with this source file.
* On Windows: cl ucdstrip.c
*/
#include <stdio.h>
#include <string.h>
extern int
main(int argc, const char *argv[]) {
static char line[2000];
/*
* Careful: Do not strip a comment right after the
* UTF-8 signature byte sequence EF BB BF (U+FEFF "BOM")
* which can occur on the first line of a UTF-8 text file.
*/
while(gets(line)!=NULL) {
char *end=strrchr(line, '#');
char c;
/*
* Assume that a data line comment is preceded by some white space.
* This also protects data like '#' in UCA_Rules.txt.
*/
if(end!=NULL && end!=line && ((c=*(end-1))==' ' || c=='\t')) {
/* ignore whitespace before the comment */
while(end!=line && ((c=*(end-1))==' ' || c=='\t')) {
--end;
}
*end=0;
}
puts(line);
}
return 0;
}