ICU-22373 Export segmentation dictionaries
(cherry picked from commit 6c05042cbcf447eefd49da70c4d94359c336e60d)
diff --git a/.ci-builds/.azure-pipelines.yml b/.ci-builds/.azure-pipelines.yml
index 115b383..a04aa1c 100644
--- a/.ci-builds/.azure-pipelines.yml
+++ b/.ci-builds/.azure-pipelines.yml
@@ -633,7 +633,7 @@
./bin/icuexportdata --mode uprops --index --copyright --verbose --destdir icuexportdata/uprops/fast --trie-type fast --all
mkdir -p icuexportdata/uprops/small
./bin/icuexportdata --mode uprops --index --copyright --verbose --destdir icuexportdata/uprops/small --trie-type small --all
- displayName: 'Build property data export files'
+ displayName: 'Build property data files'
env:
LD_LIBRARY_PATH: lib
- script: |
@@ -642,7 +642,7 @@
./bin/icuexportdata --mode norm --index --copyright --verbose --destdir icuexportdata/norm/fast --trie-type fast --all
mkdir -p icuexportdata/norm/small
./bin/icuexportdata --mode norm --index --copyright --verbose --destdir icuexportdata/norm/small --trie-type small --all
- displayName: 'Build normalization data export files'
+ displayName: 'Build normalization data files'
env:
LD_LIBRARY_PATH: lib
- script: |
@@ -651,7 +651,7 @@
./bin/icuexportdata --mode ucase --index --copyright --verbose --destdir icuexportdata/ucase/fast --trie-type fast --all
mkdir -p icuexportdata/ucase/small
./bin/icuexportdata --mode ucase --index --copyright --verbose --destdir icuexportdata/ucase/small --trie-type small --all
- displayName: 'Build case data export files'
+ displayName: 'Build case data files'
env:
LD_LIBRARY_PATH: lib
- script: |
@@ -663,7 +663,19 @@
mkdir -p icuexportdata/collation/implicithan
./bin/genrb -X -s data/coll/ --ucadata data/in/coll/ucadata-implicithan-icu4x.icu -d icuexportdata/collation/implicithan $FILES
rm icuexportdata/collation/implicithan/*.res
- displayName: 'Build collation data export files'
+ displayName: 'Build collation data files'
+ env:
+ LD_LIBRARY_PATH: lib
+ - script: |
+ cd icu4c/source
+ mkdir -p icuexportdata/segmenter/dictionary
+ for FILE in `ls data/brkitr/dictionaries`
+ do
+ ./bin/gendict --uchars data/brkitr/dictionaries/$FILE /dev/stdout | \
+ python3 -c 'import sys; data = sys.stdin.buffer.read(); print(f"trie_data = {[data[i + 1] << 8 | data[i] for i in range(64, len(data) - 1, 2)]}")' \
+ > icuexportdata/segmenter/dictionary/`basename $FILE .txt`.toml
+ done
+ displayName: 'Build segmenter dictionary files'
env:
LD_LIBRARY_PATH: lib
- task: ArchiveFiles@2