source/common/ucnvsel.cpp - external/github.com/unicode-org/icu - Git at Google

 /*
 *******************************************************************************
 *
 *   Copyright (C) 2008, International Business Machines
 *   Corporation, Google and others.  All Rights Reserved.
 *
 *******************************************************************************
 */
 // Author : eldawy@google.com (Mohamed Eldawy)
 // ucnvsel.cpp
 //
 // Purpose: To generate a list of encodings capable of handling
 // a given Unicode text
 //
 // Started 09-April-2008

 /**
  * \file
  *
  * This is an implementation of an encoding selector.
  * The goal is, given a unicode string, find the encodings
  * this string can be mapped to. To make processing faster
  * a trie is built when you call ucnvsel_open() that
  * stores all encodings a codepoint can map to
  */

 #include "unicode/ucnvsel.h"

 #include <string.h>

 #include "unicode/uchar.h"
 #include "unicode/uniset.h"
 #include "unicode/ucnv.h"
 #include "unicode/ustring.h"
 #include "unicode/uchriter.h"
 #include "utrie.h"
 #include "propsvec.h"
 #include "uenumimp.h"
 #include "cmemory.h"
 #include "cstring.h"


 U_NAMESPACE_USE

 // maximum possible serialized trie that can ever be reached
 // this was obtained by attempting to serialize a trie for all fallback mapping
 // and for all roundtrip mappings and then selecting the maximum
 // this value actually adds around 30KB of unneeded extra space (the actual
 // maximum space is around 220000).
 // the reasoning is to make it still work if lots of other converters were
 // added to ICU
 #define CAPACITY 250000


 struct UConverterSelector {
   uint8_t* serializedTrie;
   uint32_t serializedTrieSize;
   UTrie constructedTrie;     // 16 bit trie containing offsets into pv
   uint32_t* pv;              // table of bits!
   int32_t pvCount;
   char** encodings;          // which encodings did user ask to use?
   int32_t encodingsCount;
 };


 /* internal function */
 void generateSelectorData(UConverterSelector* result,
                           const USet* excludedEncodings,
                           const UConverterUnicodeSet whichSet,
                           UErrorCode* status);


 U_CAPI int32_t ucnvsel_swap(const UDataSwapper *ds,
                                  const void *inData,
                                  int32_t length,
                                  void *outData,
                                  UErrorCode *status);


 /* open a selector. If converterList is NULL, build for all converters.
    If excludedCodePoints is NULL, don't exclude any codepoints */
 U_CAPI UConverterSelector* ucnvsel_open(const char* const*  converterList,
                                       int32_t converterListSize,
                                       const USet* excludedCodePoints,
                                       const UConverterUnicodeSet whichSet,
                                       UErrorCode* status ) {
   // allocate a new converter
   UConverterSelector* newSelector;
   int32_t i;  // for loop counter

   // the compiler should realize the tail recursion here and optimize
   // accordingly. This call is to get around the constness of
   // converterList by smallest amount of code modification
   if(converterListSize == 0 && converterList != NULL) {
     return ucnvsel_open(NULL, 0, excludedCodePoints, whichSet, status);
   }

   // check if already failed
   if (U_FAILURE(*status)) {
     return NULL;
   }
   // ensure args make sense!
   if (converterListSize < 0 || (converterList == NULL && converterListSize != 0)) {
     *status = U_ILLEGAL_ARGUMENT_ERROR;
     return NULL;
   }


   newSelector = (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector));
   if (!newSelector) {
     *status = U_MEMORY_ALLOCATION_ERROR;
     return NULL;
   }
   uprv_memset(newSelector, 0, sizeof(UConverterSelector));

   // make a backup copy of the list of converters
   if (converterList != NULL && converterListSize > 0) {
     newSelector->encodings =
       (char**)uprv_malloc(converterListSize*sizeof(char*));
     // out of memory. Give user back the 100 bytes or so
     // we allocated earlier, and wish them good luck ;)
     if (!newSelector->encodings) {
       *status = U_MEMORY_ALLOCATION_ERROR;
       uprv_free(newSelector);
       return NULL;
     }

     char* allStrings = NULL;
     int32_t totalSize = 0;
     for (i = 0 ; i < converterListSize ; i++) {
       totalSize += uprv_strlen(converterList[i])+1;
     }
     allStrings = (char*) uprv_malloc(totalSize);
     //out of memory :(
     if (!allStrings) {
       *status = U_MEMORY_ALLOCATION_ERROR;
       uprv_free(newSelector->encodings);
       uprv_free(newSelector);
       return NULL;
     }

     for (i = 0 ; i < converterListSize ; i++) {
       newSelector->encodings[i] = allStrings;
       uprv_strcpy(newSelector->encodings[i], converterList[i]);
       allStrings += uprv_strlen(newSelector->encodings[i]) + 1;  // calling strlen
         // twice per string is probably faster than allocating memory to
         // cache the lengths!
     }
   } else {
     int32_t count = ucnv_countAvailable();
     newSelector->encodings =
       (char**)uprv_malloc(ucnv_countAvailable()*sizeof(char*));
     // out of memory. Give user back the 100 bytes or so
     // we allocated earlier, and wish them good luck ;)
     if (!newSelector->encodings) {
       *status = U_MEMORY_ALLOCATION_ERROR;
       uprv_free(newSelector);
       return NULL;
     }
     char* allStrings = NULL;
     int32_t totalSize = 0;
     for (i = 0 ; i < count ; i++) {
       const char* conv_moniker = ucnv_getAvailableName(i);
       totalSize += uprv_strlen(conv_moniker)+1;
     }
     allStrings = (char*) uprv_malloc(totalSize);
     //out of memory :(
     if (!allStrings) {
       *status = U_MEMORY_ALLOCATION_ERROR;
       uprv_free(newSelector->encodings);
       uprv_free(newSelector);
       return NULL;
     }
     for (i = 0 ; i < count ; i++) {
       const char* conv_moniker = ucnv_getAvailableName(i);
       newSelector->encodings[i] = allStrings;
       uprv_strcpy(newSelector->encodings[i], conv_moniker);
       allStrings += uprv_strlen(conv_moniker) + 1;  // calling strlen twice per
         // string is probably faster than allocating memory to cache the
         // lengths!
     }
     converterListSize = ucnv_countAvailable();
   }

   newSelector->encodingsCount = converterListSize;
   generateSelectorData(newSelector, excludedCodePoints, whichSet, status);

   if (U_FAILURE(*status)) {
     // at this point, we know pv and encodings have been allocated. No harm in
     // calling ucnv_closeSelector()
     ucnvsel_close(newSelector);
     return NULL;
   }

   return newSelector;
 }


 /* close opened selector */
 U_CAPI void ucnvsel_close(UConverterSelector *sel) {
   if (!sel) {
     return;
   }
   uprv_free(sel->encodings[0]);
   uprv_free(sel->encodings);
   upvec_close(sel->pv);
   if (sel->serializedTrie) {  // this can be reached when
     // generateSelectorData() has failed, and
     // the trie is not serialized yet!
     uprv_free(sel->serializedTrie);
   }
   uprv_free(sel);
 }

 /* unserialize a selector */
 U_CAPI UConverterSelector* ucnvsel_unserialize(const char* buffer,
                                              int32_t length,
                                              UErrorCode* status) {
   // check if already failed
   if (U_FAILURE(*status)) {
     return NULL;
   }
   // ensure args make sense!
   if (buffer == NULL || length <= 0) {
     *status = U_ILLEGAL_ARGUMENT_ERROR;
     return NULL;
   }

   UConverterSelector* sel;
   int32_t i = 0;  // for the for loop
   // check length!
   if (length < sizeof(int32_t) * 3) {
     *status = U_INVALID_FORMAT_ERROR;
     return NULL;
   }
   uint32_t sig, ASCIIness;

   memcpy(&sig, buffer, sizeof(int32_t));
   buffer += sizeof(uint32_t);
   memcpy(&ASCIIness, buffer, sizeof(int32_t));
   buffer += sizeof(uint32_t);
   // at this point, we don't know what the endianness or Asciiness of
   // our system or data is. Detect everything!
   // notice that a little trick is used here to save work. We don't actually
   // detect endianness of the machine or of the data. We simply detect
   // if the 2 are reversed. If they are, we send flags to udata_openSwapper()
   // to indicate we need endian swapping. Those params are not REALLY
   // the machine and data endianness
   UBool dataEndianness = FALSE;
   //if endianness need to be reversed
   if (sig == 0x99887766) {
     dataEndianness = TRUE;
   } else if (sig != 0x66778899) {
     *status = U_INVALID_FORMAT_ERROR;
     return NULL;
   }

   int32_t dataASCIIness = ASCIIness;
   if(dataEndianness) {
     //need to convert ASCIIness before using it!
     dataASCIIness = ((char*)&ASCIIness)[3];
   }
   int32_t machineASCIIness = U_CHARSET_FAMILY;

   //now, we have everything!!
   if(dataEndianness ||
      dataASCIIness != machineASCIIness) {
     //construct a data swapper!
     UDataSwapper *ds;

     ds=udata_openSwapper(dataEndianness, dataASCIIness, FALSE, machineASCIIness, status);
     char* newBuffer = (char*)uprv_malloc(length);
     if(!newBuffer) {
       udata_closeSwapper(ds);
       *status = U_MEMORY_ALLOCATION_ERROR;
       return NULL;
     }
     //can we pass buffer twice to swap in place?
     ucnvsel_swap(ds, buffer, length, newBuffer, status);
     buffer = newBuffer;
     udata_closeSwapper(ds);
   }

   length -= 3 * sizeof(int32_t); //sig, Asciiness, and pvCount
   // end of check length!

   sel = (UConverterSelector*)uprv_malloc(sizeof(UConverterSelector));
   //out of memory :(
   if (!sel) {
     *status = U_MEMORY_ALLOCATION_ERROR;
     return NULL;
   }
   uprv_memset(sel, 0, sizeof(UConverterSelector));

   memcpy(&sel->pvCount, buffer, sizeof(int32_t));
   buffer+=sizeof(int32_t);

   // check length
   if (length < (sel->pvCount+1)*sizeof(uint32_t)) {
     uprv_free(sel);
     *status = U_INVALID_FORMAT_ERROR;
     return NULL;
   }
   length -= (sel->pvCount+1)*sizeof(uint32_t);
   // end of check length

   sel->pv = (uint32_t*)uprv_malloc(sel->pvCount*sizeof(uint32_t));
   if(!sel->pv) {
     *status = U_MEMORY_ALLOCATION_ERROR;
     uprv_free(sel);
     return NULL;
   }

   memcpy(sel->pv, buffer, sel->pvCount*sizeof(uint32_t));
   buffer += sel->pvCount*sizeof(uint32_t);

   int32_t encodingsLength;
   memcpy(&encodingsLength, buffer, sizeof(int32_t));
   buffer += sizeof(int32_t);
   char* tempEncodings = (char*) uprv_malloc(encodingsLength+1);
   if(!tempEncodings) {
     *status = U_MEMORY_ALLOCATION_ERROR;
     uprv_free(sel);
     uprv_free(sel->pv);
     return NULL;
   }

   memcpy(tempEncodings, buffer, encodingsLength);
   tempEncodings[encodingsLength] = 0;
   buffer += encodingsLength;
   // count how many strings are there!
   int32_t numStrings = 0;
   for (int32_t i = 0 ; i < encodingsLength + 1 ; i++) {
     if (tempEncodings[i] == 0) {
       numStrings++;
     }
   }
   sel->encodingsCount = numStrings;
   sel->encodings = (char**) uprv_malloc(numStrings * sizeof(char*));
   if(!sel->encodings) {
     *status = U_MEMORY_ALLOCATION_ERROR;
     uprv_free(sel);
     uprv_free(sel->pv);
     uprv_free(tempEncodings);
     return NULL;
   }

   int32_t curString = 0;
   sel->encodings[0] = tempEncodings;
   for (i = 0 ; i < encodingsLength ; i++) {
     if (tempEncodings[i] == 0) {
       sel->encodings[++curString] = tempEncodings+i+1;
     }
   }

   // check length
   if (length < sizeof(uint32_t)) {
     uprv_free(sel->pv);
     uprv_free(tempEncodings);
     uprv_free(sel->encodings);
     uprv_free(sel);
     *status = U_INVALID_FORMAT_ERROR;
     return NULL;
   }
   length -= sizeof(uint32_t);
   // end of check length

   // the trie
   memcpy(&sel->serializedTrieSize, buffer, sizeof(uint32_t));
   buffer += sizeof(uint32_t);

   // check length
   if (length < sel->serializedTrieSize) {
     uprv_free(sel->pv);
     uprv_free(tempEncodings);
     uprv_free(sel->encodings);
     uprv_free(sel);
     *status = U_INVALID_FORMAT_ERROR;
     return NULL;
   }
   length -= sizeof(uint32_t);
   // end of check length

   sel->serializedTrie = (uint8_t*) uprv_malloc(sel->serializedTrieSize);
   if(!sel->serializedTrie) {
     uprv_free(sel->pv);
     uprv_free(tempEncodings);
     uprv_free(sel->encodings);
     uprv_free(sel);
     *status = U_MEMORY_ALLOCATION_ERROR;
     return NULL;
   }
   memcpy(sel->serializedTrie, buffer, sel->serializedTrieSize);
   // unserialize!
   utrie_unserialize(&sel->constructedTrie, sel->serializedTrie,
     sel->serializedTrieSize, status);

   return sel;
 }

 /* serialize a selector */
 U_CAPI int32_t ucnvsel_serialize(const UConverterSelector* sel,
                                char* buffer,
                                int32_t bufferCapacity,
                                UErrorCode* status) {
   // compute size and make sure it fits
   int32_t totalSize;
   int32_t encodingStrLength = 0;

   // check if already failed
   if (U_FAILURE(*status)) {
     return 0;
   }
   // ensure args make sense!
   if (sel == NULL || bufferCapacity < 0) {
     *status = U_ILLEGAL_ARGUMENT_ERROR;
     return 0;
   }
 //utrie_swap(ds, inDa
   totalSize = sizeof(uint32_t) /*signature*/+sizeof(uint32_t) /*ASCIIness*/+
     sizeof(uint32_t)*sel->pvCount /*pv*/+ sizeof(uint32_t) /*pvCount*/+
     sizeof(uint32_t) /*serializedTrieSize*/+ sel->serializedTrieSize /*trie*/;

   // this is a multi-string! strlen() will stop at the first one
   encodingStrLength =
     uprv_strlen(sel->encodings[sel->encodingsCount-1]) +
     (sel->encodings[sel->encodingsCount-1] - sel->encodings[0]);

   totalSize += encodingStrLength + sizeof(uint32_t);

   if (totalSize > bufferCapacity) {
     *status = U_INDEX_OUTOFBOUNDS_ERROR;
     return totalSize;
   }
   // ok, save!
   // 0a. the signature
   uint32_t sig = 0x66778899;
   memcpy(buffer, &sig, sizeof(uint32_t));
   buffer+=sizeof(uint32_t);
   // 0b. ASCIIness
   uint32_t ASCIIness = U_CHARSET_FAMILY;
   memcpy(buffer, &ASCIIness, sizeof(uint32_t));
   buffer+=sizeof(uint32_t);

   // 1. the array
   memcpy(buffer, &sel->pvCount, sizeof(int32_t));
   buffer+=sizeof(int32_t);
   memcpy(buffer, sel->pv, sel->pvCount*sizeof(int32_t));
   buffer+=sel->pvCount*sizeof(int32_t);
   memcpy(buffer, &encodingStrLength, sizeof(int32_t));
   buffer+=sizeof(int32_t);
   memcpy(buffer, sel->encodings[0], encodingStrLength);
   buffer += encodingStrLength;

   // the trie
   memcpy(buffer, &sel->serializedTrieSize, sizeof(uint32_t));
   buffer+=sizeof(uint32_t);
   memcpy(buffer, sel->serializedTrie, sel->serializedTrieSize);
   return totalSize;
 }

 /* internal function! */
 void generateSelectorData(UConverterSelector* result,
                           const USet* excludedEncodings,
                           const UConverterUnicodeSet   whichSet,
                           UErrorCode* status) {
   const uint32_t encodingsSize = result->encodingsCount;

   // 66000 as suggested by Markus [I suggest something like 66000 which
   // exceeds the number of BMP code points. There will be fewer ranges of
   // combinations of encodings. (I believe there are no encodings that have
   // interesting mappings for supplementary code points. All encodings either
   // support all of them or none of them.)]
   result->pv = upvec_open((encodingsSize+31)/32, 66000);  // create for all
      // unicode codepoints, and have space for all those bits needed!

   for (uint32_t i = 0; i < encodingsSize; ++i) {
     uint32_t mask;
     uint32_t column;
     int32_t item_count;
     int32_t j;
     UConverter* test_converter = ucnv_open(result->encodings[i], status);
     if (U_FAILURE(*status)) {
       // status will propagate back to user
       return;
     }
     USet* unicode_point_set;
     unicode_point_set = uset_open(1, 0);  // empty set

     ucnv_getUnicodeSet(test_converter, unicode_point_set,
                        whichSet, status);

     column = i / 32;
     mask = 1 << (i%32);
     // now iterate over intervals on set i!
     item_count = uset_getItemCount(unicode_point_set);

     for (j = 0; j < item_count; ++j) {
       UChar32 start_char;
       UChar32 end_char;
       UErrorCode smallStatus = U_ZERO_ERROR;
       uset_getItem(unicode_point_set, j, &start_char, &end_char, NULL, 0,
                    &smallStatus);
       if (U_FAILURE(smallStatus)) {
         // this will be reached for the converters that fill the set with
         // strings. Those should be ignored by our system
       } else {
         // IMPORTANT: the intervals for usets are INCLUSIVE. However, the
         // intervals for upvec are NOT INCLUSIVE. This is why we need
         // end_char+1 here!
         upvec_setValue(result->pv, start_char, end_char + 1, column, ~0, mask,
                        status);
         if (U_FAILURE(*status)) {
            return;
         }
       }
     }
     ucnv_close(test_converter);
     uset_close(unicode_point_set);
   }


   // handle excluded encodings! Simply set their values to all 1's in the upvec
   if (excludedEncodings) {
     int32_t item_count = uset_getItemCount(excludedEncodings);
     for (int32_t j = 0; j < item_count; ++j) {
       UChar32 start_char;
       UChar32 end_char;

       uset_getItem(excludedEncodings, j, &start_char, &end_char, NULL, 0,
                    status);
       if (U_FAILURE(*status)) {
         return;
       } else {
         for (uint32_t col = 0 ; col < (encodingsSize+31)/32 ; col++) {
           upvec_setValue(result->pv, start_char, end_char + 1, col, ~0, ~0,
                         status);
         }
       }
     }
   }

   // alright. Now, let's put things in the same exact form you'd get when you
   // unserialize things.
   UNewTrie* trie = utrie_open(NULL, NULL, CAPACITY, 0, 0, TRUE);
   result->pvCount = upvec_compact(result->pv, upvec_compactToTrieHandler,
                                   trie, status);
   uint32_t length = utrie_serialize(trie, NULL, 0, NULL, TRUE, status);
   result->serializedTrie = (uint8_t*) uprv_malloc(length);
   length = utrie_serialize(trie, result->serializedTrie, length, NULL, TRUE,
                            status);
   result->serializedTrieSize = length;
   utrie_unserialize(&result->constructedTrie, result->serializedTrie, length,
                     status);
   utrie_close(trie);
 }


 // a bunch of functions for the enumeration thingie! Nothing fancy here. Just
 // iterate over the selected encodings
 struct Enumerator {
   int16_t* index;
   int16_t length;
   int16_t cur;
   const UConverterSelector* sel;
 };


 static void U_CALLCONV
 ucnvsel_close_selector_iterator(UEnumeration *enumerator) {
   uprv_free(((Enumerator*)(enumerator->context))->index);
   uprv_free(enumerator->context);
 }

 static int32_t U_CALLCONV
 ucnvsel_count_encodings(UEnumeration *enumerator, UErrorCode *status) {
   // check if already failed
   if (U_FAILURE(*status)) {
     return 0;
   }
   return ((Enumerator*)(enumerator->context))->length;
 }


 static const char* U_CALLCONV ucnvsel_next_encoding(UEnumeration* enumerator,
                                                  int32_t* resultLength,
                                                  UErrorCode* status) {
   // check if already failed
   if (U_FAILURE(*status)) {
     return NULL;
   }

   int16_t cur = ((Enumerator*)(enumerator->context))->cur;
   const UConverterSelector* sel;
   const char* result;
   if (cur >= ((Enumerator*)(enumerator->context))->length) {
     return NULL;
   }
   sel = ((Enumerator*)(enumerator->context))->sel;
   result = sel->encodings[((Enumerator*)(enumerator->context))->index[cur] ];
   ((Enumerator*)(enumerator->context))->cur++;
   if (resultLength) {
     *resultLength = uprv_strlen(result);
   }
   return result;
 }

 static void U_CALLCONV ucnvsel_reset_iterator(UEnumeration* enumerator,
                                            UErrorCode* status) {
   // check if already failed
   if (U_FAILURE(*status)) {
     return ;
   }
   ((Enumerator*)(enumerator->context))->cur = 0;
 }

 static const UEnumeration defaultEncodings = {
   NULL,
     NULL,
     ucnvsel_close_selector_iterator,
     ucnvsel_count_encodings,
     uenum_unextDefault,
     ucnvsel_next_encoding,
     ucnvsel_reset_iterator
 };


 // internal fn to intersect two sets of masks
 // returns whether the mask has reduced to all zeros
 UBool intersectMasks(uint32_t* dest, const uint32_t* source1, int32_t len) {
   int32_t i;
   uint32_t oredDest = 0;
   for (i = 0 ; i < len ; ++i) {
     oredDest |= (dest[i] &= source1[i]);
   }
   return oredDest == 0;
 }

 // internal fn to count how many 1's are there in a mask
 // algorithm taken from  http://graphics.stanford.edu/~seander/bithacks.html
 int16_t countOnes(uint32_t* mask, int32_t len) {
   int32_t i, totalOnes = 0;
   for (i = 0 ; i < len ; ++i) {
     uint32_t ent = mask[i];
     for (; ent; totalOnes++)
     {
       ent &= ent - 1; // clear the least significant bit set
     }
   }
   return totalOnes;
 }


 /* internal function! */
 UEnumeration *ucnvsel_select(const UConverterSelector* sel, const void *s,
 int32_t length, UErrorCode *status, UBool isUTF16) {
   const UChar* utf16buffer = (UChar*) s;
   const char* utf8buffer = (char*) s;

   UEnumeration *en = NULL;
   uint32_t* mask;
   UChar32 next = 0;
   int32_t offset = 0;
   int32_t i, j;

   // check if already failed
   if (U_FAILURE(*status)) {
     return NULL;
   }
   // ensure args make sense!
   if (sel == NULL || (s == NULL && length != 0)) {
     *status = U_ILLEGAL_ARGUMENT_ERROR;
     return NULL;
   }

   // this is the context we will use. Store a table of indices to which
   // encodings are legit.
   struct Enumerator* result = (Enumerator*)uprv_malloc(sizeof(Enumerator));
   result->index = NULL;  // this will be allocated later!
   result->length = result->cur = 0;
   result->sel = sel;

   en =  (UEnumeration *)uprv_malloc(sizeof(UEnumeration));
   memcpy(en, &defaultEncodings, sizeof(UEnumeration));
   en->context = result;

   mask = (uint32_t*) uprv_malloc((sel->encodingsCount+31)/32 *
                                  sizeof(uint32_t));
   uprv_memset(mask, ~0, (sel->encodingsCount+31)/32 * sizeof(uint32_t));

   if(length == -1) {
     if(isUTF16)
       length = u_strlen(utf16buffer);
     else
       length = uprv_strlen(utf8buffer);
   }

   if(s) {
     while (offset < length) {
        uint16_t result = 0;
        if (isUTF16)
          U16_NEXT(utf16buffer, offset, length, next)
        else
          U8_NEXT(utf8buffer, offset, length, next)

        if (next != -1) {
          UTRIE_GET16((&sel->constructedTrie), next, result)

          if (intersectMasks(mask, sel->pv+result, (sel->encodingsCount+31)/32)) {
            break;
          }
        }
     }
   }

   int16_t numOnes = countOnes(mask, (sel->encodingsCount+31)/32);
   // now, we know the exact space we need for index
   if (numOnes > 0) {
     result->index = (int16_t*) uprv_malloc(numOnes * sizeof(int16_t));
   } //otherwise, index will remain NULL (and will never be touched by
     //the enumerator code anyway)

   for (j = 0 ; j < (sel->encodingsCount+31)/32 ; j++) {
     for (i = 0 ; i < 32 ; i++) {
       uint32_t v = mask[j] & 1;
       if (v && j*32+i < sel->encodingsCount) {
         result->index[result->length++] = j*32+i;
       }
       mask[j] >>= 1;
     }
   }
   uprv_free(mask);
   return en;
 }

 /* check a string against the selector - UTF16 version */
 U_CAPI UEnumeration *ucnvsel_selectForString(const UConverterSelector* sel,
                                    const UChar *s,
                                    int32_t length,
                                    UErrorCode *status) {
   return ucnvsel_select(sel, s, length, status, TRUE);
 }

 /* check a string against the selector - UTF8 version */
 U_CAPI UEnumeration *ucnvsel_selectForUTF8(const UConverterSelector* sel,
                                  const char *utf8str,
                                  int32_t length,
                                  UErrorCode *status) {
   return ucnvsel_select(sel, utf8str, length, status, FALSE);
 }


 /**
  * swap a selector into the desired Endianness and Asciiness of
  * the system. Just as FYI, selectors are always saved in the format
  * of the system that created them. They are only converted if used
  * on another system. In other words, selectors created on different
  * system can be different even if the params are identical (endianness
  * and Asciiness differences only)
  *
  * @param ds pointer to data swapper containing swapping info
  * @param inData pointer to incoming data
  * @param length length of inData in bytes
  * @param outData pointer to output data. Capacity should
  *                be at least equal to capacity of inData
  * @param status an in/out ICU UErrorCode
  * @return 0 on failure, number of bytes swapped on success
  *         number of bytes swapped can be smaller than length
  *
  */
 U_CAPI int32_t ucnvsel_swap(const UDataSwapper *ds,
                                  const void *inData,
                                  int32_t length,
                                  void *outData,
                                  UErrorCode *status) {
   const char* inDataC = (const char*) inData;
   char * outDataC = (char*) outData;
   int32_t passedLength = length;
   //args check
   if(U_FAILURE(*status)) {
     return 0;
   }
   if(ds==NULL || inData==NULL || length<-1 || (length>0 && outData==NULL)) {
     *status=U_ILLEGAL_ARGUMENT_ERROR;
       return 0;
   }

   if(length < 3 * sizeof(uint32_t)) {
     * status = U_INDEX_OUTOFBOUNDS_ERROR;
     return 0;
   }

   ds->swapArray32(ds, inDataC, 3, outDataC, status);
   int32_t pvCount = ((int32_t*)outData)[2];

   if(((int32_t*)outData)[0] != 0x66778899)
     return 0;

   length -= 3 * sizeof(uint32_t);
   inDataC += 3 * sizeof(uint32_t);
   outDataC += 3 * sizeof(uint32_t);


   if(length < pvCount * sizeof(uint32_t)) {
     * status = U_INDEX_OUTOFBOUNDS_ERROR;
     return 0;
   }
   ds->swapArray32(ds, inDataC, pvCount, outDataC, status);
   length -= pvCount * sizeof(uint32_t);
   inDataC += pvCount * sizeof(uint32_t);
   outDataC += pvCount * sizeof(uint32_t);

   if(length < 1 * sizeof(uint32_t)) {
     * status = U_INDEX_OUTOFBOUNDS_ERROR;
     return 0;
   }
   ds->swapArray32(ds, inDataC, 1, outDataC, status);
   int32_t encodingStrLength = ((int32_t*)outData)[0];
   length -= sizeof(uint32_t);
   inDataC += sizeof(uint32_t);
   outDataC += sizeof(uint32_t);

   if(length < encodingStrLength) {
     * status = U_INDEX_OUTOFBOUNDS_ERROR;
     return 0;
   }
   ds->swapInvChars(ds, inDataC, encodingStrLength, outDataC, status);
   length -= encodingStrLength;
   inDataC += encodingStrLength;
   outDataC += encodingStrLength;

   if(length <  1 * sizeof(uint32_t)) {
     * status = U_INDEX_OUTOFBOUNDS_ERROR;
     return 0;
   }
   ds->swapArray32(ds, inDataC, 1, outDataC, status);
   int32_t trieSize = ((int32_t*)outData)[0];
   length -= sizeof(uint32_t);
   inDataC += sizeof(uint32_t);
   outDataC += sizeof(uint32_t);

   if(length <  trieSize) {
     * status = U_INDEX_OUTOFBOUNDS_ERROR;
     return 0;
   }
   utrie_swap(ds, inDataC, trieSize, outDataC, status);
   length -= trieSize;
   return passedLength - length;
 }