ICU-22095 Export ICU4X normalization data with tries only without Unicode sets
diff --git a/icu4c/source/tools/icuexportdata/icuexportdata.cpp b/icu4c/source/tools/icuexportdata/icuexportdata.cpp
index 328f5ee..b1f2df5 100644
--- a/icu4c/source/tools/icuexportdata/icuexportdata.cpp
+++ b/icu4c/source/tools/icuexportdata/icuexportdata.cpp
@@ -428,9 +428,7 @@
         status));
     handleError(status, basename);
 
-    if (!reference) {
-        usrc_writeUnicodeSet(f, uset, UPRV_TARGET_SYNTAX_TOML);
-    } else {
+    if (reference) {
         if (uset_contains(reference, 0xFF9E) || uset_contains(reference, 0xFF9F) || !uset_contains(reference, 0x0345)) {
             // NFD expectations don't hold. The set must not contain the half-width
             // kana voicing marks and must contain iota subscript.
@@ -484,6 +482,28 @@
     handleError(status, basename);
 }
 
+void writeNopCompositionPassThrough(const char* basename) {
+    IcuToolErrorCode status("icuexportdata: writeNopCompositionPassThrough");
+    FILE* f = prepareOutputFile(basename);
+
+    fprintf(f, "first = 0x0\n");
+
+    LocalUMutableCPTriePointer builder(umutablecptrie_open(0xFF, 0xFF, status));
+
+    LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
+        builder.getAlias(),
+        trieType,
+        UCPTRIE_VALUE_BITS_8,
+        status));
+    handleError(status, basename);
+
+    fprintf(f, "[trie]\n");
+    usrc_writeUCPTrie(f, "trie", utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
+
+    fclose(f);
+    handleError(status, basename);
+}
+
 void writePotentialCompositionPassThrough(const char* basename, const Normalizer2* norm, const USet* decompositionStartsWithNonStarter, const USet* decompositionStartsWithBackwardCombiningStarter, USet* potentialPassthroughAndNotBackwardCombining) {
     IcuToolErrorCode status("icuexportdata: writePotentialCompositionPassThrough");
     FILE* f = prepareOutputFile(basename);
@@ -517,12 +537,46 @@
         }
     }
 
-    // The surrogate range forms a useless discontinuity. The code
-    // that reads from the set never looks up by surrage, so let's
-    // put the surrogate range in the set as a micro-optimization.
-    uset_addRange(potentialPassthroughAndNotBackwardCombining, 0xD800, 0xDFFF);
+    // There are fancier ways to do this, but let's keep things
+    // very simple: Deliberately not working this into the above
+    // loop and not extracting this from the inversion list
+    // directly.
+    for (UChar32 c = 0; c <= 0x10FFFF; ++c) {
+        if (!uset_contains(potentialPassthroughAndNotBackwardCombining, c)) {
+            fprintf(f, "first = 0x%X\n", c);
+            break;
+        }
+    }
 
-    usrc_writeUnicodeSet(f, potentialPassthroughAndNotBackwardCombining, UPRV_TARGET_SYNTAX_TOML);
+    // 8 bits per trie value. Default is 0, which means pass-through.
+    // That is, the lookup key isn't actually a UChar32 but a UChar32
+    // divided by 8, but that's still in range, so things work despite
+    // the data structure not being meant to be used like this.
+    LocalUMutableCPTriePointer builder(umutablecptrie_open(0, 0, status));
+
+    for (int32_t i = 0; i < ((0x10FFFF + 1)/8); ++i) {
+        uint32_t trieVal = 0;
+        for (int32_t j = 0; j < 8; ++j) {
+            UChar32 c = i*8 + j;
+            if (!uset_contains(potentialPassthroughAndNotBackwardCombining, c)) {
+                trieVal |= (1 << j);
+            }
+        }
+        if (trieVal) {
+            umutablecptrie_set(builder.getAlias(), UChar32(i), trieVal, status);
+        }
+    }
+
+    LocalUCPTriePointer utrie(umutablecptrie_buildImmutable(
+        builder.getAlias(),
+        trieType,
+        UCPTRIE_VALUE_BITS_8,
+        status));
+    handleError(status, basename);
+
+    fprintf(f, "[trie]\n");
+    usrc_writeUCPTrie(f, "trie", utrie.getAlias(), UPRV_TARGET_SYNTAX_TOML);
+
     fclose(f);
     handleError(status, basename);
 }
@@ -619,15 +673,29 @@
         bool startsWithNonStarter = u_getCombiningClass(utf32[0]);
         if (startsWithNonStarter) {
             uset_add(decompositionStartsWithNonStarter, c);
+            if (src != dst && !(c == 0x0340 || c == 0x0341 || c == 0x0343 || c == 0x0344 || c == 0x0F73 || c == 0x0F75 || c == 0x0F81 || c == 0xFF9E || c == 0xFF9F)) {
+                // A character whose decomposition starts with a non-starter and isn't the same as the character itself and isn't already hard-coded into ICU4X.
+                status.set(U_INTERNAL_PROGRAM_ERROR);
+                handleError(status, basename);
+            }
         } else if (uset_contains(backwardCombiningStarters, c)) {
             uset_add(decompositionStartsWithBackwardCombiningStarter, c);
         }
+        if (c != 2 && len == 1 && utf32[0] == 2) {
+            // 2 is reserved as a marker for decomposition starts with non-starter.
+            status.set(U_INTERNAL_PROGRAM_ERROR);
+            handleError(status, basename);
+        }
         if (mainNormalizer != nfdNormalizer) {
             UnicodeString nfd;
             nfdNormalizer->normalize(src, nfd, status);
             if (dst == nfd) {
                 continue;
             }
+        } else if (startsWithNonStarter) {
+            // Insert a special marker
+            len = 1;
+            utf32[0] = 2; // magic value (1 is reserved for U+FDFA)
         } else {
             if (src == dst) {
                 continue;
@@ -681,24 +749,38 @@
                 }
             }
         }
-        if (startsWithNonStarter && !(c == 0x0340 || c == 0x0341 || c == 0x0343 || c == 0x0344 || c == 0x0F73 || c == 0x0F75 || c == 0x0F81 || c == 0xFF9E || c == 0xFF9F)) {
-            // A character whose decomposition starts with a non-starter and isn't the same as the character itself and isn't already hard-coded into ICU4X.
-            status.set(U_INTERNAL_PROGRAM_ERROR);
-            handleError(status, basename);
-        }
         if (len == 1 && utf32[0] <= 0xFFFF) {
             if (utf32[0] == 1) {
                 // 1 is reserved as a marker for the expansion of U+FDFA.
                 status.set(U_INTERNAL_PROGRAM_ERROR);
                 handleError(status, basename);
             }
-            pendingTrieInsertions.push_back({c, uint32_t(utf32[0]) << 16, FALSE});
+            // U+0345 is hard-coded in ICU4X
+            if (!(c == 0x0345 && utf32[0] == 0x03B9)) {
+                pendingTrieInsertions.push_back({c, uint32_t(utf32[0]) << 16, FALSE});
+            }
         } else if (len == 2 && utf32[0] <= 0xFFFF && utf32[1] <= 0xFFFF && !u_getCombiningClass(utf32[0]) && u_getCombiningClass(utf32[1])) {
+            for (int32_t i = 0; i < len; ++i) {
+                if (((utf32[i] == 0x0345) && (uprv_strcmp(basename, "uts46d") == 0)) || utf32[i] == 0xFF9E || utf32[i] == 0xFF9F) {
+                    // Assert that iota subscript and half-width voicing marks never occur in these
+                    // expansions in the normalization forms where they are special.
+                    printf("HER c: %X\n", c);
+                    status.set(U_INTERNAL_PROGRAM_ERROR);
+                    handleError(status, basename);
+                }
+            }
             pendingTrieInsertions.push_back({c, (uint32_t(utf32[0]) << 16) | uint32_t(utf32[1]), FALSE});
         } else {
             UBool supplementary = FALSE;
             UBool nonInitialStarter = FALSE;
             for (int32_t i = 0; i < len; ++i) {
+                if (((utf32[i] == 0x0345) && (uprv_strcmp(basename, "uts46d") == 0)) || utf32[i] == 0xFF9E || utf32[i] == 0xFF9F) {
+                    // Assert that iota subscript and half-width voicing marks never occur in these
+                    // expansions in the normalization forms where they are special.
+                    status.set(U_INTERNAL_PROGRAM_ERROR);
+                    handleError(status, basename);
+                }
+
                 if (utf32[i] > 0xFFFF) {
                     supplementary = TRUE;
                 }
@@ -1100,6 +1182,8 @@
     std::vector<uint16_t> storage16;
     std::vector<uint32_t> storage32;
 
+    // Note: the USets are not exported. They are only used to check that a new
+    // Unicode version doesn't violate expectations that are hard-coded in ICU4X.
     USet* nfdDecompositionStartsWithNonStarter = uset_openEmpty();
     USet* nfdDecompositionStartsWithBackwardCombiningStarter = uset_openEmpty();
     std::vector<PendingDescriptor> nfdPendingTrieInsertions;
@@ -1139,6 +1223,8 @@
     USet* uts46PotentialPassthroughAndNotBackwardCombining = uset_openEmpty();
     writePotentialCompositionPassThrough("uts46", nullptr, uts46DecompositionStartsWithNonStarter, uts46DecompositionStartsWithBackwardCombiningStarter, uts46PotentialPassthroughAndNotBackwardCombining);
 
+    writeNopCompositionPassThrough("passthroughnop");
+
     // Check that NFKC set has no characters that NFC doesn't also have.
     uset_removeAll(nfkcPotentialPassthroughAndNotBackwardCombining, nfcPotentialPassthroughAndNotBackwardCombining);
     if (!uset_isEmpty(nfkcPotentialPassthroughAndNotBackwardCombining)) {