| // © 2016 and later: Unicode, Inc. and others. |
| // License & terms of use: http://www.unicode.org/copyright.html |
| /* |
| ******************************************************************************* |
| * Copyright (C) 2001-2010, International Business Machines |
| * Corporation and others. All Rights Reserved. |
| ******************************************************************************* |
| */ |
| /* Written by Simon Montagu, Matitiahu Allouche |
| * (ported from C code written by Markus W. Scherer) |
| */ |
| |
| package com.ibm.icu.text; |
| |
| import com.ibm.icu.lang.UCharacter; |
| |
| final class BidiWriter { |
| |
| /** Bidi control code points */ |
| static final char LRM_CHAR = 0x200e; |
| static final char RLM_CHAR = 0x200f; |
| static final int MASK_R_AL = (1 << UCharacter.RIGHT_TO_LEFT | |
| 1 << UCharacter.RIGHT_TO_LEFT_ARABIC); |
| |
| private static boolean IsCombining(int type) |
| { |
| return ((1<<type & |
| (1<<UCharacter.NON_SPACING_MARK | |
| 1<<UCharacter.COMBINING_SPACING_MARK | |
| 1<<UCharacter.ENCLOSING_MARK)) != 0); |
| } |
| |
| /* |
| * When we have OUTPUT_REVERSE set on writeReordered(), then we |
| * semantically write RTL runs in reverse and later reverse them again. |
| * Instead, we actually write them in forward order to begin with. |
| * However, if the RTL run was to be mirrored, we need to mirror here now |
| * since the implicit second reversal must not do it. |
| * It looks strange to do mirroring in LTR output, but it is only because |
| * we are writing RTL output in reverse. |
| */ |
| private static String doWriteForward(String src, int options) { |
| /* optimize for several combinations of options */ |
| switch(options&(Bidi.REMOVE_BIDI_CONTROLS|Bidi.DO_MIRRORING)) { |
| case 0: { |
| /* simply return the LTR run */ |
| return src; |
| } |
| case Bidi.DO_MIRRORING: { |
| StringBuffer dest = new StringBuffer(src.length()); |
| |
| /* do mirroring */ |
| int i=0; |
| int c; |
| |
| do { |
| c = UTF16.charAt(src, i); |
| i += UTF16.getCharCount(c); |
| UTF16.append(dest, UCharacter.getMirror(c)); |
| } while(i < src.length()); |
| return dest.toString(); |
| } |
| case Bidi.REMOVE_BIDI_CONTROLS: { |
| StringBuilder dest = new StringBuilder(src.length()); |
| |
| /* copy the LTR run and remove any Bidi control characters */ |
| int i = 0; |
| char c; |
| do { |
| c = src.charAt(i++); |
| if(!Bidi.IsBidiControlChar(c)) { |
| dest.append(c); |
| } |
| } while(i < src.length()); |
| return dest.toString(); |
| } |
| default: { |
| StringBuffer dest = new StringBuffer(src.length()); |
| |
| /* remove Bidi control characters and do mirroring */ |
| int i = 0; |
| int c; |
| do { |
| c = UTF16.charAt(src, i); |
| i += UTF16.getCharCount(c); |
| if(!Bidi.IsBidiControlChar(c)) { |
| UTF16.append(dest, UCharacter.getMirror(c)); |
| } |
| } while(i < src.length()); |
| return dest.toString(); |
| } |
| } /* end of switch */ |
| } |
| |
| private static String doWriteForward(char[] text, int start, int limit, |
| int options) |
| { |
| return doWriteForward(new String(text, start, limit - start), options); |
| } |
| |
| static String writeReverse(String src, int options) { |
| /* |
| * RTL run - |
| * |
| * RTL runs need to be copied to the destination in reverse order |
| * of code points, not code units, to keep Unicode characters intact. |
| * |
| * The general strategy for this is to read the source text |
| * in backward order, collect all code units for a code point |
| * (and optionally following combining characters, see below), |
| * and copy all these code units in ascending order |
| * to the destination for this run. |
| * |
| * Several options request whether combining characters |
| * should be kept after their base characters, |
| * whether Bidi control characters should be removed, and |
| * whether characters should be replaced by their mirror-image |
| * equivalent Unicode characters. |
| */ |
| StringBuffer dest = new StringBuffer(src.length()); |
| |
| /* optimize for several combinations of options */ |
| switch (options & |
| (Bidi.REMOVE_BIDI_CONTROLS | |
| Bidi.DO_MIRRORING | |
| Bidi.KEEP_BASE_COMBINING)) { |
| |
| case 0: |
| /* |
| * With none of the "complicated" options set, the destination |
| * run will have the same length as the source run, |
| * and there is no mirroring and no keeping combining characters |
| * with their base characters. |
| * |
| * XXX: or dest = UTF16.reverse(new StringBuffer(src)); |
| */ |
| |
| int srcLength = src.length(); |
| |
| /* preserve character integrity */ |
| do { |
| /* i is always after the last code unit known to need to be kept |
| * in this segment */ |
| int i = srcLength; |
| |
| /* collect code units for one base character */ |
| srcLength -= UTF16.getCharCount(UTF16.charAt(src, |
| srcLength - 1)); |
| |
| /* copy this base character */ |
| dest.append(src.substring(srcLength, i)); |
| } while(srcLength > 0); |
| break; |
| |
| case Bidi.KEEP_BASE_COMBINING: |
| /* |
| * Here, too, the destination |
| * run will have the same length as the source run, |
| * and there is no mirroring. |
| * We do need to keep combining characters with their base |
| * characters. |
| */ |
| srcLength = src.length(); |
| |
| /* preserve character integrity */ |
| do { |
| /* i is always after the last code unit known to need to be kept |
| * in this segment */ |
| int c; |
| int i = srcLength; |
| |
| /* collect code units and modifier letters for one base |
| * character */ |
| do { |
| c = UTF16.charAt(src, srcLength - 1); |
| srcLength -= UTF16.getCharCount(c); |
| } while(srcLength > 0 && IsCombining(UCharacter.getType(c))); |
| |
| /* copy this "user character" */ |
| dest.append(src.substring(srcLength, i)); |
| } while(srcLength > 0); |
| break; |
| |
| default: |
| /* |
| * With several "complicated" options set, this is the most |
| * general and the slowest copying of an RTL run. |
| * We will do mirroring, remove Bidi controls, and |
| * keep combining characters with their base characters |
| * as requested. |
| */ |
| srcLength = src.length(); |
| |
| /* preserve character integrity */ |
| do { |
| /* i is always after the last code unit known to need to be kept |
| * in this segment */ |
| int i = srcLength; |
| |
| /* collect code units for one base character */ |
| int c = UTF16.charAt(src, srcLength - 1); |
| srcLength -= UTF16.getCharCount(c); |
| if ((options & Bidi.KEEP_BASE_COMBINING) != 0) { |
| /* collect modifier letters for this base character */ |
| while(srcLength > 0 && IsCombining(UCharacter.getType(c))) { |
| c = UTF16.charAt(src, srcLength - 1); |
| srcLength -= UTF16.getCharCount(c); |
| } |
| } |
| |
| if ((options & Bidi.REMOVE_BIDI_CONTROLS) != 0 && |
| Bidi.IsBidiControlChar(c)) { |
| /* do not copy this Bidi control character */ |
| continue; |
| } |
| |
| /* copy this "user character" */ |
| int j = srcLength; |
| if((options & Bidi.DO_MIRRORING) != 0) { |
| /* mirror only the base character */ |
| c = UCharacter.getMirror(c); |
| UTF16.append(dest, c); |
| j += UTF16.getCharCount(c); |
| } |
| dest.append(src.substring(j, i)); |
| } while(srcLength > 0); |
| break; |
| } /* end of switch */ |
| |
| return dest.toString(); |
| } |
| |
| static String doWriteReverse(char[] text, int start, int limit, int options) |
| { |
| return writeReverse(new String(text, start, limit - start), options); |
| } |
| |
| static String writeReordered(Bidi bidi, int options) |
| { |
| int run, runCount; |
| StringBuilder dest; |
| char[] text = bidi.text; |
| runCount = bidi.countRuns(); |
| |
| /* |
| * Option "insert marks" implies Bidi.INSERT_LRM_FOR_NUMERIC if the |
| * reordering mode (checked below) is appropriate. |
| */ |
| if ((bidi.reorderingOptions & Bidi.OPTION_INSERT_MARKS) != 0) { |
| options |= Bidi.INSERT_LRM_FOR_NUMERIC; |
| options &= ~Bidi.REMOVE_BIDI_CONTROLS; |
| } |
| /* |
| * Option "remove controls" implies Bidi.REMOVE_BIDI_CONTROLS |
| * and cancels Bidi.INSERT_LRM_FOR_NUMERIC. |
| */ |
| if ((bidi.reorderingOptions & Bidi.OPTION_REMOVE_CONTROLS) != 0) { |
| options |= Bidi.REMOVE_BIDI_CONTROLS; |
| options &= ~Bidi.INSERT_LRM_FOR_NUMERIC; |
| } |
| /* |
| * If we do not perform the "inverse Bidi" algorithm, then we |
| * don't need to insert any LRMs, and don't need to test for it. |
| */ |
| if ((bidi.reorderingMode != Bidi.REORDER_INVERSE_NUMBERS_AS_L) && |
| (bidi.reorderingMode != Bidi.REORDER_INVERSE_LIKE_DIRECT) && |
| (bidi.reorderingMode != Bidi.REORDER_INVERSE_FOR_NUMBERS_SPECIAL) && |
| (bidi.reorderingMode != Bidi.REORDER_RUNS_ONLY)) { |
| options &= ~Bidi.INSERT_LRM_FOR_NUMERIC; |
| } |
| dest = new StringBuilder((options & Bidi.INSERT_LRM_FOR_NUMERIC) != 0 ? |
| bidi.length * 2 : bidi.length); |
| /* |
| * Iterate through all visual runs and copy the run text segments to |
| * the destination, according to the options. |
| * |
| * The tests for where to insert LRMs ignore the fact that there may be |
| * BN codes or non-BMP code points at the beginning and end of a run; |
| * they may insert LRMs unnecessarily but the tests are faster this way |
| * (this would have to be improved for UTF-8). |
| */ |
| if ((options & Bidi.OUTPUT_REVERSE) == 0) { |
| /* forward output */ |
| if ((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) { |
| /* do not insert Bidi controls */ |
| for (run = 0; run < runCount; ++run) { |
| BidiRun bidiRun = bidi.getVisualRun(run); |
| if (bidiRun.isEvenRun()) { |
| dest.append(doWriteForward(text, bidiRun.start, |
| bidiRun.limit, |
| options & ~Bidi.DO_MIRRORING)); |
| } else { |
| dest.append(doWriteReverse(text, bidiRun.start, |
| bidiRun.limit, options)); |
| } |
| } |
| } else { |
| /* insert Bidi controls for "inverse Bidi" */ |
| byte[] dirProps = bidi.dirProps; |
| char uc; |
| int markFlag; |
| |
| for (run = 0; run < runCount; ++run) { |
| BidiRun bidiRun = bidi.getVisualRun(run); |
| markFlag=0; |
| /* check if something relevant in insertPoints */ |
| markFlag = bidi.runs[run].insertRemove; |
| if (markFlag < 0) { /* bidi controls count */ |
| markFlag = 0; |
| } |
| if (bidiRun.isEvenRun()) { |
| if (bidi.isInverse() && |
| dirProps[bidiRun.start] != Bidi.L) { |
| markFlag |= Bidi.LRM_BEFORE; |
| } |
| if ((markFlag & Bidi.LRM_BEFORE) != 0) { |
| uc = LRM_CHAR; |
| } else if ((markFlag & Bidi.RLM_BEFORE) != 0) { |
| uc = RLM_CHAR; |
| } else { |
| uc = 0; |
| } |
| if (uc != 0) { |
| dest.append(uc); |
| } |
| dest.append(doWriteForward(text, |
| bidiRun.start, bidiRun.limit, |
| options & ~Bidi.DO_MIRRORING)); |
| |
| if (bidi.isInverse() && |
| dirProps[bidiRun.limit - 1] != Bidi.L) { |
| markFlag |= Bidi.LRM_AFTER; |
| } |
| if ((markFlag & Bidi.LRM_AFTER) != 0) { |
| uc = LRM_CHAR; |
| } else if ((markFlag & Bidi.RLM_AFTER) != 0) { |
| uc = RLM_CHAR; |
| } else { |
| uc = 0; |
| } |
| if (uc != 0) { |
| dest.append(uc); |
| } |
| } else { /* RTL run */ |
| if (bidi.isInverse() && |
| !bidi.testDirPropFlagAt(MASK_R_AL, |
| bidiRun.limit - 1)) { |
| markFlag |= Bidi.RLM_BEFORE; |
| } |
| if ((markFlag & Bidi.LRM_BEFORE) != 0) { |
| uc = LRM_CHAR; |
| } else if ((markFlag & Bidi.RLM_BEFORE) != 0) { |
| uc = RLM_CHAR; |
| } else { |
| uc = 0; |
| } |
| if (uc != 0) { |
| dest.append(uc); |
| } |
| dest.append(doWriteReverse(text, bidiRun.start, |
| bidiRun.limit, options)); |
| |
| if(bidi.isInverse() && |
| (MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) { |
| markFlag |= Bidi.RLM_AFTER; |
| } |
| if ((markFlag & Bidi.LRM_AFTER) != 0) { |
| uc = LRM_CHAR; |
| } else if ((markFlag & Bidi.RLM_AFTER) != 0) { |
| uc = RLM_CHAR; |
| } else { |
| uc = 0; |
| } |
| if (uc != 0) { |
| dest.append(uc); |
| } |
| } |
| } |
| } |
| } else { |
| /* reverse output */ |
| if((options & Bidi.INSERT_LRM_FOR_NUMERIC) == 0) { |
| /* do not insert Bidi controls */ |
| for(run = runCount; --run >= 0; ) { |
| BidiRun bidiRun = bidi.getVisualRun(run); |
| if (bidiRun.isEvenRun()) { |
| dest.append(doWriteReverse(text, |
| bidiRun.start, bidiRun.limit, |
| options & ~Bidi.DO_MIRRORING)); |
| } else { |
| dest.append(doWriteForward(text, bidiRun.start, |
| bidiRun.limit, options)); |
| } |
| } |
| } else { |
| /* insert Bidi controls for "inverse Bidi" */ |
| |
| byte[] dirProps = bidi.dirProps; |
| |
| for (run = runCount; --run >= 0; ) { |
| /* reverse output */ |
| BidiRun bidiRun = bidi.getVisualRun(run); |
| if (bidiRun.isEvenRun()) { |
| if (dirProps[bidiRun.limit - 1] != Bidi.L) { |
| dest.append(LRM_CHAR); |
| } |
| |
| dest.append(doWriteReverse(text, bidiRun.start, |
| bidiRun.limit, options & ~Bidi.DO_MIRRORING)); |
| |
| if (dirProps[bidiRun.start] != Bidi.L) { |
| dest.append(LRM_CHAR); |
| } |
| } else { |
| if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.start])) == 0) { |
| dest.append(RLM_CHAR); |
| } |
| |
| dest.append(doWriteForward(text, bidiRun.start, |
| bidiRun.limit, options)); |
| |
| if ((MASK_R_AL & Bidi.DirPropFlag(dirProps[bidiRun.limit - 1])) == 0) { |
| dest.append(RLM_CHAR); |
| } |
| } |
| } |
| } |
| } |
| |
| return dest.toString(); |
| } |
| } |