poppler/CharCodeToUnicode.cc - third_party/poppler - Git at Google

 //========================================================================
 //
 // CharCodeToUnicode.cc
 //
 // Copyright 2001-2003 Glyph & Cog, LLC
 //
 //========================================================================

 //========================================================================
 //
 // Modified under the Poppler project - http://poppler.freedesktop.org
 //
 // All changes made under the Poppler project to this file are licensed
 // under GPL version 2 or later
 //
 // Copyright (C) 2006, 2008-2010 Albert Astals Cid <aacid@kde.org>
 // Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org>
 // Copyright (C) 2007 Koji Otani <sho@bbr.jp>
 // Copyright (C) 2008 Michael Vrable <mvrable@cs.ucsd.edu>
 // Copyright (C) 2008 Vasile Gaburici <gaburici@cs.umd.edu>
 // Copyright (C) 2010 William Bader <williambader@hotmail.com>
 // Copyright (C) 2010 Jakub Wilk <ubanus@users.sf.net>
 //
 // To see a description of the changes please see the Changelog file that
 // came with your tarball or type make ChangeLog if you are building from git
 //
 //========================================================================

 #include <config.h>

 #ifdef USE_GCC_PRAGMAS
 #pragma implementation
 #endif

 #include <stdio.h>
 #include <string.h>
 #include "goo/gmem.h"
 #include "goo/gfile.h"
 #include "goo/GooLikely.h"
 #include "goo/GooString.h"
 #include "Error.h"
 #include "GlobalParams.h"
 #include "PSTokenizer.h"
 #include "CharCodeToUnicode.h"

 //------------------------------------------------------------------------

 struct CharCodeToUnicodeString {
   CharCode c;
   Unicode *u;
   int len;
 };

 //------------------------------------------------------------------------

 static int getCharFromString(void *data) {
   char *p;
   int c;

   p = *(char **)data;
   if (*p) {
     c = *p++;
     *(char **)data = p;
   } else {
     c = EOF;
   }
   return c;
 }

 static int getCharFromFile(void *data) {
   return fgetc((FILE *)data);
 }

 //------------------------------------------------------------------------

 CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GooString *fileName,
 							GooString *collection) {
   FILE *f;
   Unicode *mapA;
   CharCode size, mapLenA;
   char buf[64];
   Unicode u;
   CharCodeToUnicode *ctu;

   if (!(f = fopen(fileName->getCString(), "r"))) {
     error(-1, "Couldn't open cidToUnicode file '%s'",
 	  fileName->getCString());
     return NULL;
   }

   size = 32768;
   mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
   mapLenA = 0;

   while (getLine(buf, sizeof(buf), f)) {
     if (mapLenA == size) {
       size *= 2;
       mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
     }
     if (sscanf(buf, "%x", &u) == 1) {
       mapA[mapLenA] = u;
     } else {
       error(-1, "Bad line (%d) in cidToUnicode file '%s'",
 	    (int)(mapLenA + 1), fileName->getCString());
       mapA[mapLenA] = 0;
     }
     ++mapLenA;
   }
   fclose(f);

   ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue,
 			      NULL, 0, 0);
   gfree(mapA);
   return ctu;
 }

 CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(
 						    GooString *fileName) {
   FILE *f;
   Unicode *mapA;
   CharCodeToUnicodeString *sMapA;
   CharCode size, oldSize, len, sMapSizeA, sMapLenA;
   char buf[256];
   char *tok;
   Unicode u0;
   int uBufSize = 8;
   Unicode *uBuf = (Unicode *)gmallocn(uBufSize, sizeof(Unicode));
   CharCodeToUnicode *ctu;
   int line, n, i;
   char *tokptr;

   if (!(f = fopen(fileName->getCString(), "r"))) {
     gfree(uBuf);
     error(-1, "Couldn't open unicodeToUnicode file '%s'",
 	  fileName->getCString());
     return NULL;
   }

   size = 4096;
   mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
   memset(mapA, 0, size * sizeof(Unicode));
   len = 0;
   sMapA = NULL;
   sMapSizeA = sMapLenA = 0;

   line = 0;
   while (getLine(buf, sizeof(buf), f)) {
     ++line;
     if (!(tok = strtok_r(buf, " \t\r\n", &tokptr)) ||
 	sscanf(tok, "%x", &u0) != 1) {
       error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
 	    line, fileName->getCString());
       continue;
     }
     n = 0;
     while ((tok = strtok_r(NULL, " \t\r\n", &tokptr))) {
       if (n >= uBufSize)
       {
         uBufSize += 8;
         uBuf = (Unicode *)greallocn(uBuf, uBufSize, sizeof(Unicode));
       }
       if (sscanf(tok, "%x", &uBuf[n]) != 1) {
 	error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
 	      line, fileName->getCString());
 	break;
       }
       ++n;
     }
     if (n < 1) {
       error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
 	    line, fileName->getCString());
       continue;
     }
     if (u0 >= size) {
       oldSize = size;
       while (u0 >= size) {
 	size *= 2;
       }
       mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
       memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
     }
     if (n == 1) {
       mapA[u0] = uBuf[0];
     } else {
       mapA[u0] = 0;
       if (sMapLenA == sMapSizeA) {
 	sMapSizeA += 16;
 	sMapA = (CharCodeToUnicodeString *)
 	          greallocn(sMapA, sMapSizeA, sizeof(CharCodeToUnicodeString));
       }
       sMapA[sMapLenA].c = u0;
       sMapA[sMapLenA].u = (Unicode*)gmallocn(n, sizeof(Unicode));
       for (i = 0; i < n; ++i) {
 	sMapA[sMapLenA].u[i] = uBuf[i];
       }
       sMapA[sMapLenA].len = n;
       ++sMapLenA;
     }
     if (u0 >= len) {
       len = u0 + 1;
     }
   }
   fclose(f);

   ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue,
 			      sMapA, sMapLenA, sMapSizeA);
   gfree(mapA);
   gfree(uBuf);
   return ctu;
 }

 CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
   return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0);
 }

 CharCodeToUnicode *CharCodeToUnicode::parseCMap(GooString *buf, int nBits) {
   CharCodeToUnicode *ctu;
   char *p;

   ctu = new CharCodeToUnicode(NULL);
   p = buf->getCString();
   ctu->parseCMap1(&getCharFromString, &p, nBits);
   return ctu;
 }

 CharCodeToUnicode *CharCodeToUnicode::parseCMapFromFile(GooString *fileName,
   int nBits) {
   CharCodeToUnicode *ctu;
   FILE *f;

   ctu = new CharCodeToUnicode(NULL);
   if ((f = globalParams->findToUnicodeFile(fileName))) {
     ctu->parseCMap1(&getCharFromFile, f, nBits);
     fclose(f);
   } else {
     error(-1, "Couldn't find ToUnicode CMap file for '%s'",
 	  fileName->getCString());
   }
   return ctu;
 }

 void CharCodeToUnicode::mergeCMap(GooString *buf, int nBits) {
   char *p;

   p = buf->getCString();
   parseCMap1(&getCharFromString, &p, nBits);
 }

 void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
 				   int nBits) {
   PSTokenizer *pst;
   char tok1[256], tok2[256], tok3[256];
   int nDigits, n1, n2, n3;
   CharCode i;
   CharCode code1, code2;
   GooString *name;
   FILE *f;

   nDigits = nBits / 4;
   pst = new PSTokenizer(getCharFunc, data);
   pst->getToken(tok1, sizeof(tok1), &n1);
   while (pst->getToken(tok2, sizeof(tok2), &n2)) {
     if (!strcmp(tok2, "usecmap")) {
       if (tok1[0] == '/') {
 	name = new GooString(tok1 + 1);
 	if ((f = globalParams->findToUnicodeFile(name))) {
 	  parseCMap1(&getCharFromFile, f, nBits);
 	  fclose(f);
 	} else {
 	  error(-1, "Couldn't find ToUnicode CMap file for '%s'",
 		name->getCString());
 	}
 	delete name;
       }
       pst->getToken(tok1, sizeof(tok1), &n1);
     } else if (!strcmp(tok2, "beginbfchar")) {
       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
 	if (!strcmp(tok1, "endbfchar")) {
 	  break;
 	}
 	if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
 	    !strcmp(tok2, "endbfchar")) {
 	  error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
 	  break;
 	}
 	if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
 	      tok2[0] == '<' && tok2[n2 - 1] == '>')) {
 	  if (!(n1 == 4 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && tok1[1] == '0' && tok1[2] == '0' &&
 	        tok2[0] == '<' && tok2[n2 - 1] == '>')) {
 	    error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
 	    continue;
 	  }
 	}
 	tok1[n1 - 1] = tok2[n2 - 1] = '\0';
 	if (sscanf(tok1 + 1, "%x", &code1) != 1) {
 	  error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
 	  continue;
 	}
 	addMapping(code1, tok2 + 1, n2 - 2, 0);
       }
       pst->getToken(tok1, sizeof(tok1), &n1);
     } else if (!strcmp(tok2, "beginbfrange")) {
       while (pst->getToken(tok1, sizeof(tok1), &n1)) {
 	if (!strcmp(tok1, "endbfrange")) {
 	  break;
 	}
 	if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
 	    !strcmp(tok2, "endbfrange") ||
 	    !pst->getToken(tok3, sizeof(tok3), &n3) ||
 	    !strcmp(tok3, "endbfrange")) {
 	  error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
 	  break;
 	}
 	if (!(((n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>') ||
 	       (n1 == 4 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && tok1[1] == '0' && tok1[2] == '0')) &&
 	      ((n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>') ||
 	       (n2 == 4 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' && tok1[1] == '0' && tok1[2] == '0')))) {
 	  error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
 	  continue;
 	}
 	tok1[n1 - 1] = tok2[n2 - 1] = '\0';
 	if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
 	    sscanf(tok2 + 1, "%x", &code2) != 1) {
 	  error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
 	  continue;
 	}
 	if (!strcmp(tok3, "[")) {
 	  i = 0;
 	  while (pst->getToken(tok1, sizeof(tok1), &n1) &&
 		 code1 + i <= code2) {
 	    if (!strcmp(tok1, "]")) {
 	      break;
 	    }
 	    if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
 	      tok1[n1 - 1] = '\0';
 	      addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
 	    } else {
 	      error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
 	    }
 	    ++i;
 	  }
 	} else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
 	  tok3[n3 - 1] = '\0';
 	  for (i = 0; code1 <= code2; ++code1, ++i) {
 	    addMapping(code1, tok3 + 1, n3 - 2, i);
 	  }

 	} else {
 	  error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
 	}
       }
       pst->getToken(tok1, sizeof(tok1), &n1);
     } else {
       strcpy(tok1, tok2);
     }
   }
   delete pst;
 }

 void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
 				   int offset) {
   CharCode oldLen, i;
   Unicode u;
   char uHex[5];
   int j;

   if (code >= mapLen) {
     oldLen = mapLen;
     mapLen = (code + 256) & ~255;
     if (unlikely(code >= mapLen)) {
       error(-1, "Illegal code value in CharCodeToUnicode::addMapping");
       return;
     } else {
       map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
       for (i = oldLen; i < mapLen; ++i) {
         map[i] = 0;
       }
 	}
   }
   if (n <= 4) {
     if (sscanf(uStr, "%x", &u) != 1) {
       error(-1, "Illegal entry in ToUnicode CMap");
       return;
     }
     map[code] = u + offset;
   } else {
     if (sMapLen >= sMapSize) {
       sMapSize = sMapSize + 16;
       sMap = (CharCodeToUnicodeString *)
 	       greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
     }
     map[code] = 0;
     sMap[sMapLen].c = code;
     sMap[sMapLen].len = n / 4;
     sMap[sMapLen].u = (Unicode*)gmallocn(sMap[sMapLen].len, sizeof(Unicode));
     for (j = 0; j < sMap[sMapLen].len; ++j) {
       strncpy(uHex, uStr + j*4, 4);
       uHex[4] = '\0';
       if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
 	error(-1, "Illegal entry in ToUnicode CMap");
       }
     }
     sMap[sMapLen].u[sMap[sMapLen].len - 1] += offset;
     ++sMapLen;
   }
 }

 CharCodeToUnicode::CharCodeToUnicode(GooString *tagA) {
   CharCode i;

   tag = tagA;
   mapLen = 256;
   map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
   for (i = 0; i < mapLen; ++i) {
     map[i] = 0;
   }
   sMap = NULL;
   sMapLen = sMapSize = 0;
   refCnt = 1;
 #if MULTITHREADED
   gInitMutex(&mutex);
 #endif
 }

 CharCodeToUnicode::CharCodeToUnicode(GooString *tagA, Unicode *mapA,
 				     CharCode mapLenA, GBool copyMap,
 				     CharCodeToUnicodeString *sMapA,
 				     int sMapLenA, int sMapSizeA) {
   tag = tagA;
   mapLen = mapLenA;
   if (copyMap) {
     map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
     memcpy(map, mapA, mapLen * sizeof(Unicode));
   } else {
     map = mapA;
   }
   sMap = sMapA;
   sMapLen = sMapLenA;
   sMapSize = sMapSizeA;
   refCnt = 1;
 #if MULTITHREADED
   gInitMutex(&mutex);
 #endif
 }

 CharCodeToUnicode::~CharCodeToUnicode() {
   if (tag) {
     delete tag;
   }
   gfree(map);
   if (sMap) {
     for (int i = 0; i < sMapLen; ++i) gfree(sMap[i].u);
     gfree(sMap);
   }
 #if MULTITHREADED
   gDestroyMutex(&mutex);
 #endif
 }

 void CharCodeToUnicode::incRefCnt() {
 #if MULTITHREADED
   gLockMutex(&mutex);
 #endif
   ++refCnt;
 #if MULTITHREADED
   gUnlockMutex(&mutex);
 #endif
 }

 void CharCodeToUnicode::decRefCnt() {
   GBool done;

 #if MULTITHREADED
   gLockMutex(&mutex);
 #endif
   done = --refCnt == 0;
 #if MULTITHREADED
   gUnlockMutex(&mutex);
 #endif
   if (done) {
     delete this;
   }
 }

 GBool CharCodeToUnicode::match(GooString *tagA) {
   return tag && !tag->cmp(tagA);
 }

 void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
   int i, j;

   if (len == 1) {
     map[c] = u[0];
   } else {
     for (i = 0; i < sMapLen; ++i) {
       if (sMap[i].c == c) {
 	gfree(sMap[i].u);
 	break;
       }
     }
     if (i == sMapLen) {
       if (sMapLen == sMapSize) {
 	sMapSize += 8;
 	sMap = (CharCodeToUnicodeString *)
 	         greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
       }
       ++sMapLen;
     }
     map[c] = 0;
     sMap[i].c = c;
     sMap[i].len = len;
     sMap[i].u = (Unicode*)gmallocn(len, sizeof(Unicode));
     for (j = 0; j < len; ++j) {
       sMap[i].u[j] = u[j];
     }
   }
 }

 int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode **u) {
   int i;

   if (c >= mapLen) {
     return 0;
   }
   if (map[c]) {
     *u = &map[c];
     return 1;
   }
   for (i = sMapLen - 1; i >= 0; --i) { // in reverse so CMap takes precedence
     if (sMap[i].c == c) {
       *u = sMap[i].u;
       return sMap[i].len;
     }
   }
   return 0;
 }

 int CharCodeToUnicode::mapToCharCode(Unicode* u, CharCode *c, int usize) {
   //look for charcode in map
   if (usize == 1) {
     for (CharCode i=0; i<mapLen; i++) {
       if (map[i] == *u) {
         *c = i;
         return 1;
       }
     }
     *c = 'x';
   } else {
     int i, j;
     //for each entry in the sMap
     for (i=0; i<sMapLen; i++) {
       //if the entry's unicode length isn't the same are usize, the strings
       // are obviously differents
       if (sMap[i].len != usize) continue;
       //compare the string char by char
       for (j=0; j<sMap[i].len; j++) {
         if (sMap[i].u[j] != u[j]) {
           continue;
         }
       }

       //we have the same strings
       if (j==sMap[i].len) {
         *c = sMap[i].c;
         return 1;
       }
     }
   }
   return 0;
 }

 //------------------------------------------------------------------------

 CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) {
   int i;

   size = sizeA;
   cache = (CharCodeToUnicode **)gmallocn(size, sizeof(CharCodeToUnicode *));
   for (i = 0; i < size; ++i) {
     cache[i] = NULL;
   }
 }

 CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
   int i;

   for (i = 0; i < size; ++i) {
     if (cache[i]) {
       cache[i]->decRefCnt();
     }
   }
   gfree(cache);
 }

 CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GooString *tag) {
   CharCodeToUnicode *ctu;
   int i, j;

   if (cache[0] && cache[0]->match(tag)) {
     cache[0]->incRefCnt();
     return cache[0];
   }
   for (i = 1; i < size; ++i) {
     if (cache[i] && cache[i]->match(tag)) {
       ctu = cache[i];
       for (j = i; j >= 1; --j) {
 	cache[j] = cache[j - 1];
       }
       cache[0] = ctu;
       ctu->incRefCnt();
       return ctu;
     }
   }
   return NULL;
 }

 void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) {
   int i;

   if (cache[size - 1]) {
     cache[size - 1]->decRefCnt();
   }
   for (i = size - 1; i >= 1; --i) {
     cache[i] = cache[i - 1];
   }
   cache[0] = ctu;
   ctu->incRefCnt();
 }
	//========================================================================
	//
	// CharCodeToUnicode.cc
	//
	// Copyright 2001-2003 Glyph & Cog, LLC
	//
	//========================================================================

	//========================================================================
	//
	// Modified under the Poppler project - http://poppler.freedesktop.org
	//
	// All changes made under the Poppler project to this file are licensed
	// under GPL version 2 or later
	//
	// Copyright (C) 2006, 2008-2010 Albert Astals Cid <aacid@kde.org>
	// Copyright (C) 2007 Julien Rebetez <julienr@svn.gnome.org>
	// Copyright (C) 2007 Koji Otani <sho@bbr.jp>
	// Copyright (C) 2008 Michael Vrable <mvrable@cs.ucsd.edu>
	// Copyright (C) 2008 Vasile Gaburici <gaburici@cs.umd.edu>
	// Copyright (C) 2010 William Bader <williambader@hotmail.com>
	// Copyright (C) 2010 Jakub Wilk <ubanus@users.sf.net>
	//
	// To see a description of the changes please see the Changelog file that
	// came with your tarball or type make ChangeLog if you are building from git
	//
	//========================================================================

	#include <config.h>

	#ifdef USE_GCC_PRAGMAS
	#pragma implementation
	#endif

	#include <stdio.h>
	#include <string.h>
	#include "goo/gmem.h"
	#include "goo/gfile.h"
	#include "goo/GooLikely.h"
	#include "goo/GooString.h"
	#include "Error.h"
	#include "GlobalParams.h"
	#include "PSTokenizer.h"
	#include "CharCodeToUnicode.h"

	//------------------------------------------------------------------------

	struct CharCodeToUnicodeString {
	CharCode c;
	Unicode *u;
	int len;
	};

	//------------------------------------------------------------------------

	static int getCharFromString(void *data) {
	char *p;
	int c;

	p = (char *)data;
	if (*p) {
	c = *p++;
	(char *)data = p;
	} else {
	c = EOF;
	}
	return c;
	}

	static int getCharFromFile(void *data) {
	return fgetc((FILE *)data);
	}

	//------------------------------------------------------------------------

	CharCodeToUnicode CharCodeToUnicode::parseCIDToUnicode(GooString fileName,
	GooString *collection) {
	FILE *f;
	Unicode *mapA;
	CharCode size, mapLenA;
	char buf[64];
	Unicode u;
	CharCodeToUnicode *ctu;

	if (!(f = fopen(fileName->getCString(), "r"))) {
	error(-1, "Couldn't open cidToUnicode file '%s'",
	fileName->getCString());
	return NULL;
	}

	size = 32768;
	mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
	mapLenA = 0;

	while (getLine(buf, sizeof(buf), f)) {
	if (mapLenA == size) {
	size *= 2;
	mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
	}
	if (sscanf(buf, "%x", &u) == 1) {
	mapA[mapLenA] = u;
	} else {
	error(-1, "Bad line (%d) in cidToUnicode file '%s'",
	(int)(mapLenA + 1), fileName->getCString());
	mapA[mapLenA] = 0;
	}
	++mapLenA;
	}
	fclose(f);

	ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue,
	NULL, 0, 0);
	gfree(mapA);
	return ctu;
	}

	CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(
	GooString *fileName) {
	FILE *f;
	Unicode *mapA;
	CharCodeToUnicodeString *sMapA;
	CharCode size, oldSize, len, sMapSizeA, sMapLenA;
	char buf[256];
	char *tok;
	Unicode u0;
	int uBufSize = 8;
	Unicode uBuf = (Unicode )gmallocn(uBufSize, sizeof(Unicode));
	CharCodeToUnicode *ctu;
	int line, n, i;
	char *tokptr;

	if (!(f = fopen(fileName->getCString(), "r"))) {
	gfree(uBuf);
	error(-1, "Couldn't open unicodeToUnicode file '%s'",
	fileName->getCString());
	return NULL;
	}

	size = 4096;
	mapA = (Unicode *)gmallocn(size, sizeof(Unicode));
	memset(mapA, 0, size * sizeof(Unicode));
	len = 0;
	sMapA = NULL;
	sMapSizeA = sMapLenA = 0;

	line = 0;
	while (getLine(buf, sizeof(buf), f)) {
	++line;
	if (!(tok = strtok_r(buf, " \t\r\n", &tokptr)) \|\|
	sscanf(tok, "%x", &u0) != 1) {
	error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
	line, fileName->getCString());
	continue;
	}
	n = 0;
	while ((tok = strtok_r(NULL, " \t\r\n", &tokptr))) {
	if (n >= uBufSize)
	{
	uBufSize += 8;
	uBuf = (Unicode *)greallocn(uBuf, uBufSize, sizeof(Unicode));
	}
	if (sscanf(tok, "%x", &uBuf[n]) != 1) {
	error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
	line, fileName->getCString());
	break;
	}
	++n;
	}
	if (n < 1) {
	error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
	line, fileName->getCString());
	continue;
	}
	if (u0 >= size) {
	oldSize = size;
	while (u0 >= size) {
	size *= 2;
	}
	mapA = (Unicode *)greallocn(mapA, size, sizeof(Unicode));
	memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
	}
	if (n == 1) {
	mapA[u0] = uBuf[0];
	} else {
	mapA[u0] = 0;
	if (sMapLenA == sMapSizeA) {
	sMapSizeA += 16;
	sMapA = (CharCodeToUnicodeString *)
	greallocn(sMapA, sMapSizeA, sizeof(CharCodeToUnicodeString));
	}
	sMapA[sMapLenA].c = u0;
	sMapA[sMapLenA].u = (Unicode*)gmallocn(n, sizeof(Unicode));
	for (i = 0; i < n; ++i) {
	sMapA[sMapLenA].u[i] = uBuf[i];
	}
	sMapA[sMapLenA].len = n;
	++sMapLenA;
	}
	if (u0 >= len) {
	len = u0 + 1;
	}
	}
	fclose(f);

	ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue,
	sMapA, sMapLenA, sMapSizeA);
	gfree(mapA);
	gfree(uBuf);
	return ctu;
	}

	CharCodeToUnicode CharCodeToUnicode::make8BitToUnicode(Unicode toUnicode) {
	return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0);
	}

	CharCodeToUnicode CharCodeToUnicode::parseCMap(GooString buf, int nBits) {
	CharCodeToUnicode *ctu;
	char *p;

	ctu = new CharCodeToUnicode(NULL);
	p = buf->getCString();
	ctu->parseCMap1(&getCharFromString, &p, nBits);
	return ctu;
	}

	CharCodeToUnicode CharCodeToUnicode::parseCMapFromFile(GooString fileName,
	int nBits) {
	CharCodeToUnicode *ctu;
	FILE *f;

	ctu = new CharCodeToUnicode(NULL);
	if ((f = globalParams->findToUnicodeFile(fileName))) {
	ctu->parseCMap1(&getCharFromFile, f, nBits);
	fclose(f);
	} else {
	error(-1, "Couldn't find ToUnicode CMap file for '%s'",
	fileName->getCString());
	}
	return ctu;
	}

	void CharCodeToUnicode::mergeCMap(GooString *buf, int nBits) {
	char *p;

	p = buf->getCString();
	parseCMap1(&getCharFromString, &p, nBits);
	}

	void CharCodeToUnicode::parseCMap1(int (getCharFunc)(void ), void *data,
	int nBits) {
	PSTokenizer *pst;
	char tok1[256], tok2[256], tok3[256];
	int nDigits, n1, n2, n3;
	CharCode i;
	CharCode code1, code2;
	GooString *name;
	FILE *f;

	nDigits = nBits / 4;
	pst = new PSTokenizer(getCharFunc, data);
	pst->getToken(tok1, sizeof(tok1), &n1);
	while (pst->getToken(tok2, sizeof(tok2), &n2)) {
	if (!strcmp(tok2, "usecmap")) {
	if (tok1[0] == '/') {
	name = new GooString(tok1 + 1);
	if ((f = globalParams->findToUnicodeFile(name))) {
	parseCMap1(&getCharFromFile, f, nBits);
	fclose(f);
	} else {
	error(-1, "Couldn't find ToUnicode CMap file for '%s'",
	name->getCString());
	}
	delete name;
	}
	pst->getToken(tok1, sizeof(tok1), &n1);
	} else if (!strcmp(tok2, "beginbfchar")) {
	while (pst->getToken(tok1, sizeof(tok1), &n1)) {
	if (!strcmp(tok1, "endbfchar")) {
	break;
	}
	if (!pst->getToken(tok2, sizeof(tok2), &n2) \|\|
	!strcmp(tok2, "endbfchar")) {
	error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
	break;
	}
	if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
	tok2[0] == '<' && tok2[n2 - 1] == '>')) {
	if (!(n1 == 4 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && tok1[1] == '0' && tok1[2] == '0' &&
	tok2[0] == '<' && tok2[n2 - 1] == '>')) {
	error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
	continue;
	}
	}
	tok1[n1 - 1] = tok2[n2 - 1] = '\0';
	if (sscanf(tok1 + 1, "%x", &code1) != 1) {
	error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
	continue;
	}
	addMapping(code1, tok2 + 1, n2 - 2, 0);
	}
	pst->getToken(tok1, sizeof(tok1), &n1);
	} else if (!strcmp(tok2, "beginbfrange")) {
	while (pst->getToken(tok1, sizeof(tok1), &n1)) {
	if (!strcmp(tok1, "endbfrange")) {
	break;
	}
	if (!pst->getToken(tok2, sizeof(tok2), &n2) \|\|
	!strcmp(tok2, "endbfrange") \|\|
	!pst->getToken(tok3, sizeof(tok3), &n3) \|\|
	!strcmp(tok3, "endbfrange")) {
	error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
	break;
	}
	if (!(((n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>') \|\|
	(n1 == 4 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' && tok1[1] == '0' && tok1[2] == '0')) &&
	((n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>') \|\|
	(n2 == 4 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>' && tok1[1] == '0' && tok1[2] == '0')))) {
	error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
	continue;
	}
	tok1[n1 - 1] = tok2[n2 - 1] = '\0';
	if (sscanf(tok1 + 1, "%x", &code1) != 1 \|\|
	sscanf(tok2 + 1, "%x", &code2) != 1) {
	error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
	continue;
	}
	if (!strcmp(tok3, "[")) {
	i = 0;
	while (pst->getToken(tok1, sizeof(tok1), &n1) &&
	code1 + i <= code2) {
	if (!strcmp(tok1, "]")) {
	break;
	}
	if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
	tok1[n1 - 1] = '\0';
	addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
	} else {
	error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
	}
	++i;
	}
	} else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
	tok3[n3 - 1] = '\0';
	for (i = 0; code1 <= code2; ++code1, ++i) {
	addMapping(code1, tok3 + 1, n3 - 2, i);
	}

	} else {
	error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
	}
	}
	pst->getToken(tok1, sizeof(tok1), &n1);
	} else {
	strcpy(tok1, tok2);
	}
	}
	delete pst;
	}

	void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
	int offset) {
	CharCode oldLen, i;
	Unicode u;
	char uHex[5];
	int j;

	if (code >= mapLen) {
	oldLen = mapLen;
	mapLen = (code + 256) & ~255;
	if (unlikely(code >= mapLen)) {
	error(-1, "Illegal code value in CharCodeToUnicode::addMapping");
	return;
	} else {
	map = (Unicode *)greallocn(map, mapLen, sizeof(Unicode));
	for (i = oldLen; i < mapLen; ++i) {
	map[i] = 0;
	}
	}
	}
	if (n <= 4) {
	if (sscanf(uStr, "%x", &u) != 1) {
	error(-1, "Illegal entry in ToUnicode CMap");
	return;
	}
	map[code] = u + offset;
	} else {
	if (sMapLen >= sMapSize) {
	sMapSize = sMapSize + 16;
	sMap = (CharCodeToUnicodeString *)
	greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
	}
	map[code] = 0;
	sMap[sMapLen].c = code;
	sMap[sMapLen].len = n / 4;
	sMap[sMapLen].u = (Unicode*)gmallocn(sMap[sMapLen].len, sizeof(Unicode));
	for (j = 0; j < sMap[sMapLen].len; ++j) {
	strncpy(uHex, uStr + j*4, 4);
	uHex[4] = '\0';
	if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
	error(-1, "Illegal entry in ToUnicode CMap");
	}
	}
	sMap[sMapLen].u[sMap[sMapLen].len - 1] += offset;
	++sMapLen;
	}
	}

	CharCodeToUnicode::CharCodeToUnicode(GooString *tagA) {
	CharCode i;

	tag = tagA;
	mapLen = 256;
	map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
	for (i = 0; i < mapLen; ++i) {
	map[i] = 0;
	}
	sMap = NULL;
	sMapLen = sMapSize = 0;
	refCnt = 1;
	#if MULTITHREADED
	gInitMutex(&mutex);
	#endif
	}

	CharCodeToUnicode::CharCodeToUnicode(GooString tagA, Unicode mapA,
	CharCode mapLenA, GBool copyMap,
	CharCodeToUnicodeString *sMapA,
	int sMapLenA, int sMapSizeA) {
	tag = tagA;
	mapLen = mapLenA;
	if (copyMap) {
	map = (Unicode *)gmallocn(mapLen, sizeof(Unicode));
	memcpy(map, mapA, mapLen * sizeof(Unicode));
	} else {
	map = mapA;
	}
	sMap = sMapA;
	sMapLen = sMapLenA;
	sMapSize = sMapSizeA;
	refCnt = 1;
	#if MULTITHREADED
	gInitMutex(&mutex);
	#endif
	}

	CharCodeToUnicode::~CharCodeToUnicode() {
	if (tag) {
	delete tag;
	}
	gfree(map);
	if (sMap) {
	for (int i = 0; i < sMapLen; ++i) gfree(sMap[i].u);
	gfree(sMap);
	}
	#if MULTITHREADED
	gDestroyMutex(&mutex);
	#endif
	}

	void CharCodeToUnicode::incRefCnt() {
	#if MULTITHREADED
	gLockMutex(&mutex);
	#endif
	++refCnt;
	#if MULTITHREADED
	gUnlockMutex(&mutex);
	#endif
	}

	void CharCodeToUnicode::decRefCnt() {
	GBool done;

	#if MULTITHREADED
	gLockMutex(&mutex);
	#endif
	done = --refCnt == 0;
	#if MULTITHREADED
	gUnlockMutex(&mutex);
	#endif
	if (done) {
	delete this;
	}
	}

	GBool CharCodeToUnicode::match(GooString *tagA) {
	return tag && !tag->cmp(tagA);
	}

	void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
	int i, j;

	if (len == 1) {
	map[c] = u[0];
	} else {
	for (i = 0; i < sMapLen; ++i) {
	if (sMap[i].c == c) {
	gfree(sMap[i].u);
	break;
	}
	}
	if (i == sMapLen) {
	if (sMapLen == sMapSize) {
	sMapSize += 8;
	sMap = (CharCodeToUnicodeString *)
	greallocn(sMap, sMapSize, sizeof(CharCodeToUnicodeString));
	}
	++sMapLen;
	}
	map[c] = 0;
	sMap[i].c = c;
	sMap[i].len = len;
	sMap[i].u = (Unicode*)gmallocn(len, sizeof(Unicode));
	for (j = 0; j < len; ++j) {
	sMap[i].u[j] = u[j];
	}
	}
	}

	int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode **u) {
	int i;

	if (c >= mapLen) {
	return 0;
	}
	if (map[c]) {
	*u = &map[c];
	return 1;
	}
	for (i = sMapLen - 1; i >= 0; --i) { // in reverse so CMap takes precedence
	if (sMap[i].c == c) {
	*u = sMap[i].u;
	return sMap[i].len;
	}
	}
	return 0;
	}

	int CharCodeToUnicode::mapToCharCode(Unicode* u, CharCode *c, int usize) {
	//look for charcode in map
	if (usize == 1) {
	for (CharCode i=0; i<mapLen; i++) {
	if (map[i] == *u) {
	*c = i;
	return 1;
	}
	}
	*c = 'x';
	} else {
	int i, j;
	//for each entry in the sMap
	for (i=0; i<sMapLen; i++) {
	//if the entry's unicode length isn't the same are usize, the strings
	// are obviously differents
	if (sMap[i].len != usize) continue;
	//compare the string char by char
	for (j=0; j<sMap[i].len; j++) {
	if (sMap[i].u[j] != u[j]) {
	continue;
	}
	}

	//we have the same strings
	if (j==sMap[i].len) {
	*c = sMap[i].c;
	return 1;
	}
	}
	}
	return 0;
	}

	//------------------------------------------------------------------------

	CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) {
	int i;

	size = sizeA;
	cache = (CharCodeToUnicode *)gmallocn(size, sizeof(CharCodeToUnicode ));
	for (i = 0; i < size; ++i) {
	cache[i] = NULL;
	}
	}

	CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
	int i;

	for (i = 0; i < size; ++i) {
	if (cache[i]) {
	cache[i]->decRefCnt();
	}
	}
	gfree(cache);
	}

	CharCodeToUnicode CharCodeToUnicodeCache::getCharCodeToUnicode(GooString tag) {
	CharCodeToUnicode *ctu;
	int i, j;

	if (cache[0] && cache[0]->match(tag)) {
	cache[0]->incRefCnt();
	return cache[0];
	}
	for (i = 1; i < size; ++i) {
	if (cache[i] && cache[i]->match(tag)) {
	ctu = cache[i];
	for (j = i; j >= 1; --j) {
	cache[j] = cache[j - 1];
	}
	cache[0] = ctu;
	ctu->incRefCnt();
	return ctu;
	}
	}
	return NULL;
	}

	void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) {
	int i;

	if (cache[size - 1]) {
	cache[size - 1]->decRefCnt();
	}
	for (i = size - 1; i >= 1; --i) {
	cache[i] = cache[i - 1];
	}
	cache[0] = ctu;
	ctu->incRefCnt();
	}