ICU-21246 Handle kilogram SI prefix correctly

Fix kilogram parsing: ignore 'kilogram' as a stem, we have 'gram'.

Failures in the added unit test before the fix: withSIPrefix resulted
in 'microkilogram', and kilogram's prefix was considered to be
"ONE" (i.e. 10^0).
diff --git a/icu4c/source/i18n/measunit_extra.cpp b/icu4c/source/i18n/measunit_extra.cpp
index 0ae76b5..c702e13 100644
--- a/icu4c/source/i18n/measunit_extra.cpp
+++ b/icu4c/source/i18n/measunit_extra.cpp
@@ -14,6 +14,7 @@
 
 #include "charstr.h"
 #include "cmemory.h"
+#include "cstring.h"
 #include "measunit_impl.h"
 #include "resource.h"
 #include "uarrsort.h"
@@ -115,20 +116,37 @@
 };
 
 /**
- * A ResourceSink that collects table keys from a resource.
+ * A ResourceSink that collects simple unit identifiers from the keys of the
+ * convertUnits table into an array, and adds these values to a TrieBuilder,
+ * with associated values being their index into this array plus a specified
+ * offset, to a trie.
  *
- * This class is for use by ures_getAllItemsWithFallback. Example code:
+ * Example code:
  *
  *     UErrorCode status = U_ZERO_ERROR;
- *     const char* unitIdentifiers[200];
- *     TableKeysSink identifierSink(unitIdentifiers, 200);
+ *     BytesTrieBuilder b(status);
+ *     const char *unitIdentifiers[200];
+ *     SimpleUnitIdentifiersSink identifierSink(unitIdentifiers, 200, b, kTrieValueOffset);
  *     LocalUResourceBundlePointer unitsBundle(ures_openDirect(NULL, "units", &status));
  *     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
  */
-class TableKeysSink : public icu::ResourceSink {
+class SimpleUnitIdentifiersSink : public icu::ResourceSink {
   public:
-    explicit TableKeysSink(const char **out, int32_t outSize)
-        : outArray(out), outSize(outSize), outIndex(0) {
+    /**
+     * Constructor.
+     * @param out Array of char* to which the simple unit identifiers will be
+     *     saved.
+     * @param outSize The size of `out`.
+     * @param trieBuilder The trie builder to which the simple unit identifier
+     *     should be added. The trie builder must outlive this resource sink.
+     * @param trieValueOffset This is added to the index of the identifier in
+     *     the `out` array, before adding to `trieBuilder` as the value
+     *     associated with the identifier.
+     */
+    explicit SimpleUnitIdentifiersSink(const char **out, int32_t outSize, BytesTrieBuilder &trieBuilder,
+                                       int32_t trieValueOffset)
+        : outArray(out), outSize(outSize), trieBuilder(trieBuilder), trieValueOffset(trieValueOffset),
+          outIndex(0) {
     }
 
     /**
@@ -154,13 +172,25 @@
         for (int32_t i = 0; table.getKeyAndValue(i, key, value); ++i) {
             U_ASSERT(i < table.getSize());
             U_ASSERT(outIndex < outSize);
-            outArray[outIndex++] = key;
+            if (uprv_strcmp(key, "kilogram") == 0) {
+                // For parsing, we use "gram", the prefixless metric mass unit. We
+                // thus ignore the SI Base Unit of Mass: it exists due to being the
+                // mass conversion target unit, but not needed for MeasureUnit
+                // parsing.
+                continue;
+            }
+            outArray[outIndex] = key;
+            trieBuilder.add(key, trieValueOffset + outIndex, status);
+            outIndex++;
         }
     }
 
   private:
     const char **outArray;
     int32_t outSize;
+    BytesTrieBuilder &trieBuilder;
+    int32_t trieValueOffset;
+
     int32_t outIndex;
 };
 
@@ -228,6 +258,8 @@
         ures_getByKey(unitsBundle.getAlias(), "convertUnits", NULL, &status));
     if (U_FAILURE(status)) { return; }
 
+    // Allocate enough space: with identifierSink below skipping kilogram, we're
+    // probably allocating one more than needed.
     int32_t simpleUnitsCount = convertUnits.getAlias()->fSize;
     int32_t arrayMallocSize = sizeof(char *) * simpleUnitsCount;
     gSimpleUnits = static_cast<const char **>(uprv_malloc(arrayMallocSize));
@@ -237,11 +269,9 @@
     }
     uprv_memset(gSimpleUnits, 0, arrayMallocSize);
 
-    TableKeysSink identifierSink(gSimpleUnits, simpleUnitsCount);
+    // Populate gSimpleUnits and build the associated trie.
+    SimpleUnitIdentifiersSink identifierSink(gSimpleUnits, simpleUnitsCount, b, kSimpleUnitOffset);
     ures_getAllItemsWithFallback(unitsBundle.getAlias(), "convertUnits", identifierSink, status);
-    for (int i = 0; i < simpleUnitsCount; i++) {
-        b.add(gSimpleUnits[i], kSimpleUnitOffset + i, status);
-    }
 
     // Build the CharsTrie
     // TODO: Use SLOW or FAST here?
diff --git a/icu4c/source/test/intltest/measfmttest.cpp b/icu4c/source/test/intltest/measfmttest.cpp
index b562022..380246d 100644
--- a/icu4c/source/test/intltest/measfmttest.cpp
+++ b/icu4c/source/test/intltest/measfmttest.cpp
@@ -81,6 +81,7 @@
     void TestNumericTimeSomeSpecialFormats();
     void TestIdentifiers();
     void TestInvalidIdentifiers();
+    void TestKilogramIdentifier();
     void TestCompoundUnitOperations();
     void TestDimensionlessBehaviour();
     void Test21060_AddressSanitizerProblem();
@@ -206,6 +207,7 @@
     TESTCASE_AUTO(TestNumericTimeSomeSpecialFormats);
     TESTCASE_AUTO(TestIdentifiers);
     TESTCASE_AUTO(TestInvalidIdentifiers);
+    TESTCASE_AUTO(TestKilogramIdentifier);
     TESTCASE_AUTO(TestCompoundUnitOperations);
     TESTCASE_AUTO(TestDimensionlessBehaviour);
     TESTCASE_AUTO(Test21060_AddressSanitizerProblem);
@@ -3323,6 +3325,45 @@
     }
 }
 
+// Kilogram is a "base unit", although it's also "gram" with a kilo- prefix.
+// This tests that it is handled in the preferred manner.
+void MeasureFormatTest::TestKilogramIdentifier() {
+    IcuTestErrorCode status(*this, "TestKilogramIdentifier");
+
+    // SI unit of mass
+    MeasureUnit kilogram = MeasureUnit::forIdentifier("kilogram", status);
+    // Metric mass unit
+    MeasureUnit gram = MeasureUnit::forIdentifier("gram", status);
+    // Microgram: still a built-in type
+    MeasureUnit microgram = MeasureUnit::forIdentifier("microgram", status);
+    // Nanogram: not a built-in type at this time
+    MeasureUnit nanogram = MeasureUnit::forIdentifier("nanogram", status);
+    status.assertSuccess();
+
+    assertEquals("parsed kilogram equals built-in kilogram", MeasureUnit::getKilogram().getType(),
+                 kilogram.getType());
+    assertEquals("parsed kilogram equals built-in kilogram", MeasureUnit::getKilogram().getSubtype(),
+                 kilogram.getSubtype());
+    assertEquals("parsed gram equals built-in gram", MeasureUnit::getGram().getType(), gram.getType());
+    assertEquals("parsed gram equals built-in gram", MeasureUnit::getGram().getSubtype(),
+                 gram.getSubtype());
+    assertEquals("parsed microgram equals built-in microgram", MeasureUnit::getMicrogram().getType(),
+                 microgram.getType());
+    assertEquals("parsed microgram equals built-in microgram", MeasureUnit::getMicrogram().getSubtype(),
+                 microgram.getSubtype());
+    assertEquals("nanogram", "", nanogram.getType());
+    assertEquals("nanogram", "nanogram", nanogram.getIdentifier());
+
+    assertEquals("prefix of kilogram", UMEASURE_SI_PREFIX_KILO, kilogram.getSIPrefix(status));
+    assertEquals("prefix of gram", UMEASURE_SI_PREFIX_ONE, gram.getSIPrefix(status));
+    assertEquals("prefix of microgram", UMEASURE_SI_PREFIX_MICRO, microgram.getSIPrefix(status));
+    assertEquals("prefix of nanogram", UMEASURE_SI_PREFIX_NANO, nanogram.getSIPrefix(status));
+
+    MeasureUnit tmp = kilogram.withSIPrefix(UMEASURE_SI_PREFIX_MILLI, status);
+    assertEquals(UnicodeString("Kilogram + milli should be milligram, got: ") + tmp.getIdentifier(),
+                 MeasureUnit::getMilligram().getIdentifier(), tmp.getIdentifier());
+}
+
 void MeasureFormatTest::TestCompoundUnitOperations() {
     IcuTestErrorCode status(*this, "TestCompoundUnitOperations");