| #!/usr/bin/qsh |
| # Copyright (C) 2016 and later: Unicode, Inc. and others. |
| # License & terms of use: http://www.unicode.org/copyright.html |
| # Copyright (C) 2000-2011, International Business Machines |
| # Corporation and others. All Rights Reserved. |
| # |
| # Authors: |
| # Ami Fixler |
| # Barry Novinger |
| # Steven R. Loomis |
| # George Rhoten |
| # Jason Spieth |
| # |
| # Shell script to unpax ICU and convert the files to an EBCDIC codepage. |
| # After extracting to EBCDIC, binary files are re-extracted without the |
| # EBCDIC conversion, thus restoring them to original codepage. |
| |
| if [ -z "$QSH_VERSION" ]; |
| then |
| QSH=0 |
| echo "QSH not detected (QSH_VERSION not set) - just testing." |
| else |
| QSH=1 |
| #echo "QSH version $QSH_VERSION" |
| fi |
| export QSH |
| |
| # set this to "v" to list files as they are unpacked (default) |
| VERBOSE_UNPACK="v" |
| |
| # Set the following variable to the list of binary file suffixes (extensions) |
| |
| |
| #**************************************************************************** |
| #binary_suffixes='ico ICO bmp BMP jpg JPG gif GIF brk BRK' |
| #ICU specific binary files |
| #**************************************************************************** |
| binary_suffixes='brk BRK bin BIN res RES cnv CNV dat DAT icu ICU spp SPP xml XML nrm NRM utf16be UTF16BE' |
| data_files='icu/source/data/brkitr/* icu/source/data/locales/* icu/source/data/coll/* icu/source/data/rbnf/* icu/source/data/mappings/* icu/source/data/misc/* icu/source/data/translit/* icu/source/data/unidata/* icu/source/test/testdata/*' |
| |
| #**************************************************************************** |
| # Function: usage |
| # Description: Prints out text that describes how to call this script |
| # Input: None |
| # Output: None |
| #**************************************************************************** |
| usage() |
| { |
| echo "Enter archive filename as a parameter: $0 icu-archive.tar" |
| } |
| |
| #**************************************************************************** |
| # first make sure we at least one arg and it's a file we can read |
| #**************************************************************************** |
| |
| # check for no arguments |
| if [ $# -eq 0 ]; then |
| usage |
| exit |
| fi |
| |
| # tar file is argument 1 |
| tar_file=$1 |
| |
| # check that the file is valid |
| if [ ! -r $tar_file ]; then |
| echo "$tar_file does not exist or cannot be read." |
| usage |
| exit |
| fi |
| |
| # treat all data files as ebcdic |
| ebcdic_data=$data_files |
| |
| #**************************************************************************** |
| # Extract files. We do this in two passes. One pass for 819 files and a |
| # second pass for 37 files |
| #**************************************************************************** |
| echo "" |
| echo "Extracting from $tar_file ..." |
| echo "" |
| |
| # extract everything as iso-8859-1 except these directories |
| pax -C 819 -rc${VERBOSE_UNPACK}f $tar_file $ebcdic_data |
| |
| # extract files while converting them to EBCDIC |
| echo "" |
| echo "Extracting files which must be in ibm-37 ..." |
| echo "" |
| pax -C 37 -r${VERBOSE_UNPACK}f $tar_file $ebcdic_data |
| |
| #**************************************************************************** |
| # For files we have restored as CCSID 37, check the BOM to see if they |
| # should be processed as 819. Also handle files with special paths. Files |
| # that match will be added to binary files lists. The lists will in turn |
| # be processed to restore files as 819. |
| #**************************************************************************** |
| echo "" |
| echo "Determining binary files by BOM ..." |
| echo "" |
| bin_count=0 |
| # Process BOMs |
| if [ -f icu/as_is/bomlist.txt ]; |
| then |
| echo "Using icu/as_is/bomlist.txt" |
| pax -C 819 -rvf $tar_file `cat icu/as_is/bomlist.txt` |
| else |
| for file in `find ./icu \( -name \*.txt -print \)`; do |
| bom8=`head -n 1 $file|\ |
| od -t x1|\ |
| head -n 1|\ |
| sed 's/ */ /g'|\ |
| cut -f2-4 -d ' '|\ |
| tr 'A-Z' 'a-z'`; |
| #Find a converted UTF-8 BOM |
| if [ "$bom8" = "057 08b 0ab" -o "$bom8" = "57 8b ab" ] |
| then |
| file="`echo $file | cut -d / -f2-`" |
| |
| if [ `echo $binary_files | wc -w` -lt 200 ] |
| then |
| bin_count=`expr $bin_count + 1` |
| binary_files="$binary_files $file"; |
| else |
| echo "Restoring binary files by BOM ($bin_count)..." |
| rm $binary_files; |
| pax -C 819 -rvf $tar_file $binary_files; |
| echo "Determining binary files by BOM ($bin_count)..." |
| binary_files="$file"; |
| bin_count=`expr $bin_count + 1` |
| fi |
| fi |
| done |
| # now see if a re-extract of binary files is necessary |
| if [ `echo $binary_files | wc -w` -gt 0 ] |
| then |
| echo "Restoring binary files ($bin_count) ..." |
| rm $binary_files |
| pax -C 819 -rvf $tar_file $binary_files |
| fi |
| fi |
| |
| echo "# Processing special paths." |
| # Process special paths |
| more_bin_opts=$(echo $binary_suffixes | sed -e 's%[a-zA-Z0-9]*%-o -name \*.&%g') |
| # echo "Looking for additional files: find ... $more_bin_opts" |
| more_bin_files=$(find icu -type f \( -name '*.zzz' $more_bin_opts \) -print) |
| echo "Restoring binary files by special paths ($bin_count) ..." |
| rm $more_bin_files |
| pax -C 819 -rvf $tar_file $more_bin_files |
| |
| #**************************************************************************** |
| # Generate and run the configure script |
| #**************************************************************************** |
| |
| echo "" |
| echo "Generating qsh compatible configure ..." |
| echo "" |
| |
| sed -f icu/as_is/os400/convertConfigure.sed icu/source/configure > icu/source/configureTemp |
| del -f icu/source/configure |
| mv icu/source/configureTemp icu/source/configure |
| chmod 755 icu/source/configure |
| |
| echo "" |
| echo "$0 has completed extracting ICU from $tar_file - $bin_count binary files extracted." |
| |