unicode/c/genprops/misc/ucdstrip.c - external/github.com/unicode-org/icu - Git at Google

 /*
 *******************************************************************************
 *
 *   Copyright (C) 2003-2013, International Business Machines
 *   Corporation and others.  All Rights Reserved.
 *
 *******************************************************************************
 *   file name:  ucdstrip.c
 *   encoding:   US-ASCII
 *   tab size:   8 (not used)
 *   indentation:4
 *
 *   created on: 2003feb20
 *   created by: Markus W. Scherer
 *
 *   Simple tool for Unicode Character Database files with semicolon-delimited fields.
 *   Removes comments behind data lines but not in others.
 *
 *   To compile, just call a C compiler/linker with this source file.
 *   On Windows: cl ucdstrip.c
 */

 #include <stdio.h>
 #include <string.h>

 extern int
 main(int argc, const char *argv[]) {
     static char line[2000];

     /*
      * Careful: Do not strip a comment right after the
      * UTF-8 signature byte sequence EF BB BF (U+FEFF "BOM")
      * which can occur on the first line of a UTF-8 text file.
      */
     while(gets(line)!=NULL) {
         char *end=strrchr(line, '#');
         char c;
         /*
          * Assume that a data line comment is preceded by some white space.
          * This also protects data like '#' in UCA_Rules.txt.
          */
         if(end!=NULL && end!=line && ((c=*(end-1))==' ' || c=='\t')) {
             /* ignore whitespace before the comment */
             while(end!=line && ((c=*(end-1))==' ' || c=='\t')) {
                 --end;
             }
             *end=0;
         }
         puts(line);
     }

     return 0;
 }
	/*
	*******************************************************************************
	*
	* Copyright (C) 2003-2013, International Business Machines
	* Corporation and others. All Rights Reserved.
	*
	*******************************************************************************
	* file name: ucdstrip.c
	* encoding: US-ASCII
	* tab size: 8 (not used)
	* indentation:4
	*
	* created on: 2003feb20
	* created by: Markus W. Scherer
	*
	* Simple tool for Unicode Character Database files with semicolon-delimited fields.
	* Removes comments behind data lines but not in others.
	*
	* To compile, just call a C compiler/linker with this source file.
	* On Windows: cl ucdstrip.c
	*/

	#include <stdio.h>
	#include <string.h>

	extern int
	main(int argc, const char *argv[]) {
	static char line[2000];

	/*
	* Careful: Do not strip a comment right after the
	* UTF-8 signature byte sequence EF BB BF (U+FEFF "BOM")
	* which can occur on the first line of a UTF-8 text file.
	*/
	while(gets(line)!=NULL) {
	char *end=strrchr(line, '#');
	char c;
	/*
	* Assume that a data line comment is preceded by some white space.
	* This also protects data like '#' in UCA_Rules.txt.
	*/
	if(end!=NULL && end!=line && ((c=*(end-1))==' ' \|\| c=='\t')) {
	/* ignore whitespace before the comment */
	while(end!=line && ((c=*(end-1))==' ' \|\| c=='\t')) {
	--end;
	}
	*end=0;
	}
	puts(line);
	}

	return 0;
	}