blob: 1ed9bd98227e44c495da7cd1f0361134a8290010 [file] [log] [blame]
/*
**********************************************************************
* Copyright (C) 1999, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Date Name Description
* 12/05/99 aliu Creation.
**********************************************************************
*/
#include <stdio.h>
#include <stdlib.h>
#include "unicode/utypes.h"
#include "cmemory.h"
#include "cstring.h"
#include "filestrm.h"
/*********************************************************************
mkhelper exists to enable ICU to use some of the same build files that
are used on UNIX. It was originally written to support the building
of the numerous ICU converters on Windows, but it is general enough to
be useful in other contexts. mkhelper is useful only on Windows; on
UNIX you would accomplish the same task with a ten line perl script.
mkhelper reads one or more input files and parses them looking for one
or more makefile variable name definitions. For example, a typical
input file looks like this:
# A list of UCM's to build
UCM_SOURCE = ibm-1038.ucm ibm-1047.ucm ibm-1089.ucm ibm-1123.ucm \
ibm-1140.ucm ibm-1141.ucm ibm-1142.ucm ibm-1143.ucm ibm-1144.ucm
For this file, with no string-modifying options specified, mkhelper
would emit the following to stdout:
ibm-1038.ucm
ibm-1047.ucm
...
ibm-1144.ucm
The input file can contain comments, blank lines, and variable
definitions. Lines are continued if they end with '\'. The variable
definitions assign a variable name (UCM_SOURCE in the above example)
to a list of zero or more strings.
mkhelper reads one or more files of this format. Its parsing is very
rudimentary, so input files cannot contain anything other than the
elements described above. It looks for variable names specified on
the command line, and records their definitions. It then does
optional string substitution on those definition strings. Finally it
emits them to stdout, one per line, with optional prefix and/or suffix
strings.
If multiple files are read, definitions in later files (on the command
line) replace definitions with the same name in earlier files.
Because of problems with the Windows shell, it's difficult to use
environment variables of the form %VAR% on the command line. To get
around this, mkhelper supports the alternative syntax $VAR$ in the
prefix, suffix, and string substitution options. Use "$$" to indicate
the literal character '$'. If a substring of the form $VAR$ is seen
in one of these command line parameters, mkhelper interprets it as an
environment variable and substitutes its definition.
Alan Liu 12/5/99 ****************************************************/
/* GLOBALS THAT ENCODE THE COMMAND-LINE ARGS */
const char* PREFIX = NULL; /* [-p] Prepend to each line */
const char* SUFFIX = NULL; /* [-s] Append to each line */
const char* OLD = NULL; /* [-old] Old pattern */
const char* NEW = NULL; /* [-new] New pattern */
#define MAX_VARS 8
int VARS_COUNT = 0;
const char* VARS[MAX_VARS]; /* [-v] Variables to look for */
#define MAX_FILES 8
int FILES_COUNT = 0;
const char* FILES[MAX_FILES]; /* Input files*/
/* GLOBALS THAT STORE THE VAR DEFS FROM THE FILES */
int VARS_DEF_COUNT[MAX_VARS]; /* Number of names in this var's def */
int VARS_DEF_FILE[MAX_VARS]; /* The index of the last file to define
this var. This is used to let files
listed later on the command line
override files listed earlier. We only
use the last file's definition. -1 if
def not seen in any file. */
char** VARS_DEF[MAX_VARS]; /* Actual definitions */
int VARS_DEF_I[MAX_VARS]; /* Index var used by readVarDefs */
UBool VERBOSE = FALSE;
/* CONSTANTS */
const char COMMENT = '#';
const char CR = ((char)13);
const char LF = ((char)10);
const char MINUS = '-';
const char SPACE = ' ';
const char TAB = ((char)9);
const char NUL = ((char)0);
const char CONTINUE = '\\';
/* GLOBAL INPUT BUFFER */
#define BUFLEN 2048
char BUFFER[BUFLEN];
int32_t LINE_NO;
/* processFile return code and callback */
enum FileStat { FS_ERR, FS_VARS_SEEN, FS_NO_VARS };
typedef void (*TokenHandler)(char* token, int32_t len, int fileNo, int varNo);
/* PROTOTYPES */
int main(int argc, char *argv[]);
void substitute(int varNo);
void parseCommandLine(int argc, char *argv[]);
void usage(const char* argv0);
void countVarDefs(char* token, int32_t len, int fileNo, int varNo);
void readVarDefs(char* token, int32_t len, int fileNo, int varNo);
enum FileStat processFile(FileStream*, TokenHandler, int fileNo);
void processLine(int fileNo, int varNo, char* line, UBool isFirstLine, TokenHandler proc);
char* substituteEnvironmentVars(char*);
char* skipWhiteSpace(char* p);
int32_t readLine(FileStream*);
void assert(int); /* use of UBool as arg causes warnings */
void memassert(void* a);
/**
* Emit usage and exit.
*/
void usage(const char* argv0) {
fprintf(stderr,
"\n"
"Usage: mkhelper [options] infile...\n"
" -p prefix Prepend the given string to each line\n"
" -s suffix Append the given string to each line\n"
" -old str String to replace with -new str in text. Comparison\n"
" is case-sensitive. ONLY ONE REPLACEMENT MADE PER STRING.\n"
" -new str String to replace -old str in text\n"
" -n var... One or more variable names to parse\n"
" -v Be verbose\n"
" infile... One or more input files. Files listed earlier are\n"
" overridden by files listed later.\n"
"\n"
"Read one or more simple UNIX-style makefile fragments, parse one or\n"
"more variable definitions that define lists of strings, and emit those\n"
"strings to stdout, one per line, after performing optional string\n"
"manipulations. Definitions in later files replace those in earlier\n"
"files. Use $VAR$ for environment vars. See source code for details.\n"
, argv0);
exit(1);
}
int main(int argc, char *argv[]) {
int i, j;
UBool err=FALSE;
parseCommandLine(argc, argv);
for (i=0; i<VARS_COUNT; ++i) {
VARS_DEF_FILE[i] = -1;
}
for (i=0; i<FILES_COUNT && !err; ++i) {
FileStream *in;
in = T_FileStream_open(FILES[i], "r");
if (in == 0) {
/* This is just a warning, since some files may be
intentionally absent, like ucmlocal.mk. */
if (VERBOSE) fprintf(stderr, "Warning(mkhelper): cannot open %s\n", FILES[i]);
break;
}
if (VERBOSE) fprintf(stderr, "mkhelper: Reading %s\n", FILES[i]);
switch (processFile(in, countVarDefs, i)) {
case FS_ERR:
err = TRUE;
break;
case FS_VARS_SEEN:
/* Got some vars on pass 1; make pass 2 */
T_FileStream_rewind(in);
if (FS_VARS_SEEN != processFile(in, readVarDefs, i)) {
fprintf(stderr, "Error(mkhelper): %s pass 2 failed -- should never happen!\n",
FILES[i]);
}
break;
case FS_NO_VARS:
break;
}
T_FileStream_close(in);
}
if (err) {
fprintf(stderr, "Error(mkheler): file processing failed\n");
} else {
for (i=0; i<VARS_COUNT; ++i) {
if (VARS_DEF_FILE[i] < 0) {
if (VERBOSE) fprintf(stderr, "Warning(mkhelper): %s: not seen\n", VARS[i]);
} else {
/* Now process the variable defs and output them */
assert(VARS_DEF_I[i] == VARS_DEF_COUNT[i]);
if (VERBOSE) fprintf(stderr, "mkhelper: %s = %d names in %s\n", VARS[i],
VARS_DEF_COUNT[i],
FILES[VARS_DEF_FILE[i]]);
if (OLD) {
substitute(i);
}
for (j=0; j<VARS_DEF_COUNT[i]; ++j) {
fprintf(stdout, "%s%s%s\n",
PREFIX!=NULL?PREFIX:"",
VARS_DEF[i][j],
SUFFIX!=NULL?SUFFIX:"");
}
}
}
}
}
/**
* Do old->new string pattern substitution on the given variable def.
* We just do one replacement, but it would be easy to extend this to
* to more. (Just feeling lazy right now.)
*/
void substitute(int varNo) {
int i;
int32_t oldLen = uprv_strlen(OLD);
int32_t newLen = uprv_strlen(NEW);
assert(oldLen > 0 && newLen > 0);
for (i=0; i<VARS_DEF_COUNT[varNo]; ++i) {
char* oldDef = VARS_DEF[varNo][i];
char* match = uprv_strstr(oldDef, OLD);
if (match != NULL) {
char* newDef = uprv_malloc(uprv_strlen(oldDef) + 1 + newLen - oldLen);
memassert(newDef);
uprv_strncpy(newDef, oldDef, match - oldDef);
newDef[match - oldDef] = NUL;
uprv_strcat(newDef, NEW);
uprv_strcat(newDef, match + oldLen);
VARS_DEF[varNo][i] = newDef;
/* Check for multiple matches and issue warning if found. */
match = uprv_strstr(newDef, OLD);
if (match) {
fprintf(stderr,
"Warning(mkhelper): ignoring multiple matches of \"%s\" in \"%s\"\n",
OLD, oldDef);
}
uprv_free(oldDef);
}
}
}
/**
* Recognize "$VAR$" as an environment variable escape (for $ itself,
* recognize $$). The lets us work around difficulties with the
* Windows shell. Return newly allocated string with substitution
* made.
*/
char* substituteEnvironmentVars(char* str) {
char *p, *pp;
char *result;
int32_t len;
int pass;
/* Make 2 passes. First go through and compute the final length.
Next, go through and construct new string. */
for (pass=0; pass<2; ++pass) {
p = str;
/* Do pass-specific initialization */
if (pass==0) {
len = uprv_strlen(str);
} else {
/* len was computed in pass 1 */
pp = result = uprv_malloc(len+1);
memassert(result);
}
while (*p) {
char* q = NULL;
if (*p == '$') {
q = ++p; /* p points after $ now */
while (*q && *q != '$') { ++q; }
if (!*q) {
fprintf(stderr, "Warning(mkhelper): unterminated $ in \"%s\"\n", str);
q = NULL; /* Indicate failure to find variable */
} else {
/* q points to closing $ now */
int32_t varNameLen = q - p;
if (varNameLen == 0) {
/* This is a "$$", which we change to "$"*/
if (pass==0) {
/* subtract one from length */
--len;
} else {
*pp++ = '$';
}
} else {
char* env;
*q = NUL; /* just temporarily */
env = getenv(p);
if (env == NULL) {
fprintf(stderr, "Error(mkhelper): undefined environment variable \"%s\"\n",
p);
exit(1);
}
*q = '$'; /* restore */
if (pass==0) {
len = len - 2 - varNameLen + uprv_strlen(env);
} else {
uprv_strcpy(pp, env);
pp += uprv_strlen(pp);
}
}
p = q+1; /* Move p to after closing '$' */
}
}
if (q == NULL) {
/* No variable at this point */
if (pass==0) {
++p;
} else {
*pp++ = *p++;
}
}
}
}
/* Write zero, check len */
*pp = NUL;
assert((pp - result) == len);
return result;
}
/**
* Token handler callback that counts the number of defs for each var.
* Also records which file is the _last_ to define a variable, so later
* files override earlier ones.
*/
void countVarDefs(char* line, int32_t len, int fileNo, int varNo) {
if (!line) {
VARS_DEF_COUNT[varNo] = 0;
VARS_DEF_FILE[varNo] = fileNo;
} else {
++VARS_DEF_COUNT[varNo];
}
}
/**
* This is a callback for processFile that read the var definitions in.
* It assumes that a previous pass has been made with countVarDefs().
*/
void readVarDefs(char* line, int32_t len, int fileNo, int varNo) {
char* copy;
/* Ignore defintions not in the last defining file, as recorded in
VARS_DEF_FILE. */
if (fileNo != VARS_DEF_FILE[varNo]) {
return;
}
if (!line) {
/* Allocate the array of pointers */
VARS_DEF[varNo] = uprv_malloc(sizeof(char*) * VARS_DEF_COUNT[varNo]);
memassert(VARS_DEF[varNo]);
/* Initialize index */
VARS_DEF_I[varNo] = 0;
} else {
/* Sanity check */
assert(VARS_DEF_I[varNo] < VARS_DEF_COUNT[varNo]);
/* Allocate buffer */
copy = VARS_DEF[varNo][VARS_DEF_I[varNo]] = uprv_malloc(len + 1);
memassert(copy);
/* Copy */
uprv_strncpy(copy, line, len);
copy[len] = NUL;
++VARS_DEF_I[varNo];
}
}
/**
* Handle a line of a variable def. Call the token handler once with NULL (for
* each varNo) then once with each token.
*/
void processLine(int fileNo, int varNo, char* line, UBool isFirstLine, TokenHandler proc) {
if (isFirstLine) {
/* Call once with NULL for initialization */
(*proc)(NULL, 0, fileNo, varNo);
}
for (;;) {
int32_t len = 0;
char* p;
line = skipWhiteSpace(line);
if (!*line || (*line == CONTINUE && line[1] == NUL)) {
break;
}
/* Find the end */
p = line;
while (*p && *p != SPACE && *p != TAB &&
!(*p == CONTINUE && p[1] == NUL)) { ++p; }
len = p - line;
assert(len > 0);
(*proc)(line, len, fileNo, varNo);
line = p;
}
}
/**
* Make a pass through a file, looking for variable def lines that
* match our desired var, and handing them off to processLine.
*/
enum FileStat processFile(FileStream* in, TokenHandler proc, int fileNo) {
int32_t len;
char* p;
int j;
UBool varsSeen = FALSE;
LINE_NO = 0;
while ((len = readLine(in)) >= 0) {
if (len) {
UBool varFound = FALSE;
/* Skip white space */
p = skipWhiteSpace(BUFFER);
/* Is the next word a var? */
for (j=0; j<VARS_COUNT && !varFound; ++j) {
if (0 == uprv_strncmp(VARS[j], p, uprv_strlen(VARS[j]))) {
/* Yes, found a var, maybe...parse more to see */
char* savep = p;
UBool isFirstLine = TRUE;
p += uprv_strlen(VARS[j]); /* Go past var name */
/* Now look for /\s*=/ */
if (*p != SPACE && *p != TAB && *p != '=') {
p = savep;
continue; /* Didn't see it after all */
}
p = skipWhiteSpace(p);
if (*p != '=') {
p = savep;
continue; /* Didn't see it after all */
}
++p;
/* Now it's definite */
varsSeen = varFound = TRUE;
p = skipWhiteSpace(p);
/* Now read file names until we get to the end of this
line, including line continuation characters. */
for (;;) {
processLine(fileNo, j, p, isFirstLine, proc);
isFirstLine = FALSE;
if (*(p + uprv_strlen(p) - 1) == CONTINUE) {
if ((len = readLine(in)) < 0) {
fprintf(stderr, "Error(mkhelper): unexpected eof after continuation char\n");
return FS_ERR; /*fail*/
}
p = skipWhiteSpace(BUFFER);
} else {
/* Last line processed -- look for more variables */
break;
}
}
}
}
if (!varFound) {
fprintf(stderr, "Warning(mkhelper): ignoring %s line %ld: %s\n",
FILES[fileNo], LINE_NO, BUFFER);
}
}
}
return varsSeen ? FS_VARS_SEEN : FS_NO_VARS;
}
char* skipWhiteSpace(char* p) {
while (*p == SPACE || *p == TAB) { ++p; }
return p;
}
void parseCommandLine(int argc, char *argv[]) {
int i;
for (i=1; i<argc; ++i) {
const char* arg = argv[i];
if (arg[0] == '-') {
/* Handle options */
switch (arg[1]) {
case 'p':
case 's':
case 'o':
case 'n':
if ((i+1) >= argc) {
fprintf(stderr, "%s must be followed by something\n", arg);
usage(argv[0]);
}
break;
}
switch (arg[1]) {
case 'p':
if (PREFIX) {
fprintf(stderr, "Multiple -p options not allowed!\n", arg);
usage(argv[0]);
}
PREFIX = substituteEnvironmentVars(argv[++i]);
break;
case 's':
if (SUFFIX) {
fprintf(stderr, "Multiple -s options not allowed!\n", arg);
usage(argv[0]);
}
SUFFIX = substituteEnvironmentVars(argv[++i]);
break;
case 'v':
VERBOSE = TRUE;
break;
case 'o':
if (OLD) {
fprintf(stderr, "Multiple -old options not allowed!\n",
arg);
usage(argv[0]);
}
OLD = substituteEnvironmentVars(argv[++i]);
break;
case 'n':
if (arg[2] == 'e') {
if (NEW) {
fprintf(stderr, "Multiple -new options not allowed!\n",
arg);
usage(argv[0]);
}
NEW = substituteEnvironmentVars(argv[++i]);
} else {
if (VARS_COUNT == MAX_VARS) {
fprintf(stderr, "Too many -n options -- fix tool and recompile!\n",
arg);
usage(argv[0]);
}
VARS[VARS_COUNT++] = argv[++i];
}
break;
default:
fprintf(stderr, "Bad option %s\n", arg);
usage(argv[0]);
break;
}
} else {
if (FILES_COUNT == MAX_FILES) {
fprintf(stderr, "Too many input files -- fix tool and recompile!\n",
arg);
usage(argv[0]);
}
FILES[FILES_COUNT++] = arg;
}
}
/* Make sure at least one input file and one variable exist */
if (FILES_COUNT < 1 || VARS_COUNT < 1) {
fprintf(stderr, "Please specify at least one variable and one input file\n");
usage(argv[0]);
}
/* Need both old & new or neither */
if ((OLD != NULL) != (NEW != NULL)) {
fprintf(stderr, "Specify both -old and -new, or neither\n");
usage(argv[0]);
}
if (VERBOSE) {
if (OLD) {
fprintf(stderr, "mkhelper: Substituting \"%s\" -> \"%s\"\n", OLD, NEW);
}
if (PREFIX) {
fprintf(stderr, "mkhelper: Prefix \"%s\"\n", PREFIX);
}
if (SUFFIX) {
fprintf(stderr, "mkhelper: Suffix \"%s\"\n", SUFFIX);
}
}
}
/**
* Read one line into BUFFER, trim any comment, remove trailing white space
* and line separators, bump the LINE_NO, return length. Return negative
* value when EOF reached.
*/
int32_t readLine(FileStream* in) {
char* p;
++LINE_NO;
if (T_FileStream_readLine(in, BUFFER, BUFLEN) == NULL) {
return -1; /* EOF */
}
/* Trim off trailing comment */
p = uprv_strchr(BUFFER, COMMENT);
if (p != 0) {
/* Back up past any space or tab characters before
* the comment character. */
while (p > BUFFER && (p[-1] == SPACE || p[-1] == TAB)) {
p--;
}
*p = NUL;
}
/* Delete any trailing ^J and/or ^M characters */
p = BUFFER + uprv_strlen(BUFFER);
while (p > BUFFER && (p[-1] == CR || p[-1] == LF)) {
p--;
}
*p = NUL;
return uprv_strlen(BUFFER);
}
void assert(int a) {
if (!a) {
fprintf(stderr, "Error(mkhelper): assertion failure\n");
exit(1);
}
}
void memassert(void* a) {
if (!a) {
fprintf(stderr, "Error(mkhelper): out of memory\n");
exit(1);
}
}