blob: b8e76f130ef990df391d02dedfc1f80114dd88e8 [file] [log] [blame]
/**
*******************************************************************************
* Copyright (C) 2006-2013, International Business Machines Corporation and *
* others. All Rights Reserved. *
*******************************************************************************
*
*******************************************************************************
*/
package com.ibm.icu.charset;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.nio.charset.spi.CharsetProvider;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import com.ibm.icu.impl.InvalidFormatException;
/**
* A concrete subclass of CharsetProvider for loading and providing charset converters
* in ICU.
* @stable ICU 3.6
*/
public final class CharsetProviderICU extends CharsetProvider{
private String optionsString;
/**
* Default constructor
* @stable ICU 3.6
*/
public CharsetProviderICU() {
optionsString = null;
}
/**
* Constructs a charset for the given charset name.
* Implements the abstract method of super class.
* @param charsetName charset name
* @return charset objet for the given charset name, null if unsupported
* @stable ICU 3.6
*/
public final Charset charsetForName(String charsetName){
try{
// extract the options from the charset name
charsetName = processOptions(charsetName);
// get the canonical name
String icuCanonicalName = getICUCanonicalName(charsetName);
// create the converter object and return it
if(icuCanonicalName==null || icuCanonicalName.length()==0){
// Try the original name, may be something added and not in the alias table.
// Will get an unsupported encoding exception if it doesn't work.
return getCharset(charsetName);
}
return getCharset(icuCanonicalName);
}catch(UnsupportedCharsetException ex){
}catch(IOException ex){
}
return null;
}
/**
* Constructs a charset for the given ICU conversion table from the specified class path.
* Example use: <code>cnv = CharsetProviderICU.charsetForName("myConverter", "com/myCompany/myDataPackage");</code>.
* In this example myConverter.cnv would exist in the com/myCompany/myDataPackage Java package.
* Conversion tables can be made with ICU4C's makeconv tool.
* This function allows you to allows you to load user defined conversion
* tables that are outside of ICU's core data.
* @param charsetName The name of the charset conversion table.
* @param classPath The class path that contain the conversion table.
* @return charset object for the given charset name, null if unsupported
* @stable ICU 3.8
*/
public final Charset charsetForName(String charsetName, String classPath) {
return charsetForName(charsetName, classPath, null);
}
/**
* Constructs a charset for the given ICU conversion table from the specified class path.
* This function is similar to {@link #charsetForName(String, String)}.
* @param charsetName The name of the charset conversion table.
* @param classPath The class path that contain the conversion table.
* @param loader the class object from which to load the charset conversion table
* @return charset object for the given charset name, null if unsupported
* @stable ICU 3.8
*/
public Charset charsetForName(String charsetName, String classPath, ClassLoader loader) {
CharsetMBCS cs = null;
try {
cs = new CharsetMBCS(charsetName, charsetName, new String[0], classPath, loader);
} catch (InvalidFormatException e) {
// return null;
}
return cs;
}
/**
* Gets the canonical name of the converter as defined by Java
* @param enc converter name
* @return canonical name of the converter
* @internal
* @deprecated This API is ICU internal only.
*/
public static final String getICUCanonicalName(String enc)
throws UnsupportedCharsetException{
String canonicalName = null;
String ret = null;
try{
if(enc!=null){
if((canonicalName = UConverterAlias.getCanonicalName(enc, "MIME"))!=null){
ret = canonicalName;
} else if((canonicalName = UConverterAlias.getCanonicalName(enc, "IANA"))!=null){
ret = canonicalName;
} else if((canonicalName = UConverterAlias.getAlias(enc, 0))!=null){
/* we have some aliases in the form x-blah .. match those */
ret = canonicalName;
}/*else if((canonicalName = UConverterAlias.getCanonicalName(enc, ""))!=null){
ret = canonicalName;
}*/else if(enc.indexOf("x-")==0 || enc.indexOf("X-")==0){
/* TODO: Match with getJavaCanonicalName method */
/*
char temp[ UCNV_MAX_CONVERTER_NAME_LENGTH] = {0};
strcpy(temp, encName+2);
*/
// Remove the 'x-' and get the ICU canonical name
if ((canonicalName = UConverterAlias.getAlias(enc.substring(2), 0))!=null) {
ret = canonicalName;
} else {
ret = "";
}
}else{
/* unsupported encoding */
ret = "";
}
}
return ret;
}catch(IOException ex){
throw new UnsupportedCharsetException(enc);
}
}
private Charset getCharset(String icuCanonicalName) throws IOException{
String[] aliases = getAliases(icuCanonicalName);
String canonicalName = getJavaCanonicalName(icuCanonicalName);
/* Concat the option string to the icuCanonicalName so that the options can be handled properly
* by the actual charset.
*/
if (optionsString != null) {
icuCanonicalName = icuCanonicalName.concat(optionsString);
optionsString = null;
}
return (CharsetICU.getCharset(icuCanonicalName,canonicalName, aliases));
}
/**
* Gets the canonical name of the converter as defined by Java
* @param charsetName converter name
* @return canonical name of the converter
* @internal
* @deprecated This API is ICU internal only.
*/
public static String getJavaCanonicalName(String charsetName){
/*
If a charset listed in the IANA Charset Registry is supported by an implementation
of the Java platform then its canonical name must be the name listed in the registry.
Many charsets are given more than one name in the registry, in which case the registry
identifies one of the names as MIME-preferred. If a charset has more than one registry
name then its canonical name must be the MIME-preferred name and the other names in
the registry must be valid aliases. If a supported charset is not listed in the IANA
registry then its canonical name must begin with one of the strings "X-" or "x-".
*/
if(charsetName==null ){
return null;
}
try{
String cName = null;
/* find out the alias with MIME tag */
if((cName=UConverterAlias.getStandardName(charsetName, "MIME"))!=null){
/* find out the alias with IANA tag */
}else if((cName=UConverterAlias.getStandardName(charsetName, "IANA"))!=null){
}else {
/*
check to see if an alias already exists with x- prefix, if yes then
make that the canonical name
*/
int aliasNum = UConverterAlias.countAliases(charsetName);
String name;
for(int i=0;i<aliasNum;i++){
name = UConverterAlias.getAlias(charsetName, i);
if(name!=null && name.indexOf("x-")==0){
cName = name;
break;
}
}
/* last resort just append x- to any of the alias and
make it the canonical name */
if((cName==null || cName.length()==0)){
name = UConverterAlias.getStandardName(charsetName, "UTR22");
if(name==null && charsetName.indexOf(",")!=-1){
name = UConverterAlias.getAlias(charsetName, 1);
}
/* if there is no UTR22 canonical name .. then just return itself*/
if(name==null){
name = charsetName;
}
cName = "x-"+ name;
}
}
/* After getting the java canonical name from ICU alias table, get the
* java canonical name from the current JDK. This is neccessary because
* different versions of the JVM (Sun and IBM) may have a different
* canonical name then the one given by ICU. So the java canonical name
* will depend on the current JVM. Since java cannot use the ICU canonical
* we have to try to use a java compatible name.
*/
if (cName != null) {
try {
if (Charset.isSupported(cName)) {
String testName = Charset.forName(cName).name();
/* Ensure that the java canonical name works in ICU */
if (!testName.equals(cName)) {
if (getICUCanonicalName(testName).length() > 0) {
cName = testName;
}
}
}
} catch (Exception e) {
// Any exception in the try block above
// must result Java's canonical name to be
// null. This block is necessary to reset
// gettingJavaCanonicalName to true always.
// See #9966.
// Note: The use of static gettingJavaCanonicalName
// looks really dangerous and obviously thread unsafe.
// We should revisit this code later. See #9973
cName = null;
}
}
return cName;
}catch (IOException ex){
}
return null;
}
/**
* Gets the aliases associated with the converter name
* @param encName converter name
* @return converter names as elements in an object array
* @internal
* @deprecated This API is ICU internal only.
*/
private static final String[] getAliases(String encName)throws IOException{
String[] ret = null;
int aliasNum = 0;
int i=0;
int j=0;
String aliasArray[/*50*/] = new String[50];
if(encName != null){
aliasNum = UConverterAlias.countAliases(encName);
for(i=0,j=0;i<aliasNum;i++){
String name = UConverterAlias.getAlias(encName,i);
if(name.indexOf('+')==-1 && name.indexOf(',')==-1){
aliasArray[j++]= name;
}
}
ret = new String[j];
for(;--j>=0;) {
ret[j] = aliasArray[j];
}
}
return (ret);
}
private void putCharsets(Map<Charset, String> map){
int num = UConverterAlias.countAvailable();
for(int i=0;i<num;i++) {
String name = UConverterAlias.getAvailableName(i);
try {
Charset cs = getCharset(name);
map.put(cs, getJavaCanonicalName(name));
}catch(UnsupportedCharsetException ex){
}catch (IOException e) {
}
// add only charsets that can be created!
}
}
/**
* Returns an iterator for the available charsets.
* Implements the abstract method of super class.
* @return Iterator the charset name iterator
* @stable ICU 3.6
*/
public final Iterator<Charset> charsets(){
HashMap<Charset, String> map = new HashMap<Charset, String>();
putCharsets(map);
return map.keySet().iterator();
}
/**
* Gets the canonical names of available converters
* @return array of available converter names
* @internal
* @deprecated This API is ICU internal only.
*/
public static final String[] getAvailableNames(){
CharsetProviderICU provider = new CharsetProviderICU();
HashMap<Charset, String> map = new HashMap<Charset, String>();
provider.putCharsets(map);
return map.values().toArray(new String[0]);
}
/**
* Return all names available
* @return String[] an array of all available names
* @internal
* @deprecated This API is ICU internal only.
*/
public static final String[] getAllNames(){
int num = UConverterAlias.countAvailable();
String[] names = new String[num];
for(int i=0;i<num;i++) {
names[i] = UConverterAlias.getAvailableName(i);
}
return names;
}
private String processOptions(String charsetName) {
if (charsetName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING) > -1) {
/* Remove and save the swap lfnl option string portion of the charset name. */
optionsString = UConverterConstants.OPTION_SWAP_LFNL_STRING;
charsetName = charsetName.substring(0, charsetName.indexOf(UConverterConstants.OPTION_SWAP_LFNL_STRING));
}
return charsetName;
}
}