ICU-21035 add & use CharString::extract(dest buffer)

See #1253
diff --git a/icu4c/source/common/charstr.cpp b/icu4c/source/common/charstr.cpp
index 4878c73..318a185 100644
--- a/icu4c/source/common/charstr.cpp
+++ b/icu4c/source/common/charstr.cpp
@@ -20,6 +20,7 @@
 #include "cmemory.h"
 #include "cstring.h"
 #include "uinvchar.h"
+#include "ustr_imp.h"
 
 U_NAMESPACE_BEGIN
 
@@ -46,6 +47,19 @@
     return p;
 }
 
+int32_t CharString::extract(char *dest, int32_t capacity, UErrorCode &errorCode) const {
+    if (U_FAILURE(errorCode)) { return len; }
+    if (capacity < 0 || (capacity > 0 && dest == nullptr)) {
+        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
+        return len;
+    }
+    const char *src = buffer.getAlias();
+    if (0 < len && len <= capacity && src != dest) {
+        uprv_memcpy(dest, src, len);
+    }
+    return u_terminateChars(dest, capacity, len, &errorCode);
+}
+
 CharString &CharString::copyFrom(const CharString &s, UErrorCode &errorCode) {
     if(U_SUCCESS(errorCode) && this!=&s && ensureCapacity(s.len+1, 0, errorCode)) {
         len=s.len;
diff --git a/icu4c/source/common/charstr.h b/icu4c/source/common/charstr.h
index 9400fbb..6619faa 100644
--- a/icu4c/source/common/charstr.h
+++ b/icu4c/source/common/charstr.h
@@ -87,6 +87,22 @@
      * The caller must uprv_free() the result.
      */
     char *cloneData(UErrorCode &errorCode) const;
+    /**
+     * Copies the contents of the string into dest.
+     * Checks if there is enough space in dest, extracts the entire string if possible,
+     * and NUL-terminates dest if possible.
+     *
+     * If the string fits into dest but cannot be NUL-terminated (length()==capacity),
+     * then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
+     * If the string itself does not fit into dest (length()>capacity),
+     * then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+     *
+     * @param dest Destination string buffer.
+     * @param capacity Size of the dest buffer (number of chars).
+     * @param errorCode ICU error code.
+     * @return length()
+     */
+    int32_t extract(char *dest, int32_t capacity, UErrorCode &errorCode) const;
 
     bool operator==(StringPiece other) const {
         return len == other.length() && (len == 0 || uprv_memcmp(data(), other.data(), len) == 0);
diff --git a/icu4c/source/common/icuplug.cpp b/icu4c/source/common/icuplug.cpp
index c6439cc..303bd44 100644
--- a/icu4c/source/common/icuplug.cpp
+++ b/icu4c/source/common/icuplug.cpp
@@ -782,8 +782,8 @@
     /* plugin_file is not used for processing - it is only used 
        so that uplug_getPluginFile() works (i.e. icuinfo)
     */
-    uprv_strncpy(plugin_file, pluginFile.data(), sizeof(plugin_file));
-        
+    pluginFile.extract(plugin_file, sizeof(plugin_file), *status);
+
 #if UPLUG_TRACE
     DBG((stderr, "pluginfile= %s len %d/%d\n", plugin_file, (int)strlen(plugin_file), (int)sizeof(plugin_file)));
 #endif
diff --git a/icu4c/source/common/loclikely.cpp b/icu4c/source/common/loclikely.cpp
index ea0d98c..e87b06e 100644
--- a/icu4c/source/common/loclikely.cpp
+++ b/icu4c/source/common/loclikely.cpp
@@ -464,18 +464,7 @@
         goto error;
     }
 
-    {
-        icu::CharString result = ulocimp_getLanguage(position, &position, *err);
-        if (U_FAILURE(*err)) {
-            goto error;
-        }
-
-        subtagLength = result.length();
-        if (subtagLength <= *langLength) {
-            uprv_memcpy(lang, result.data(), subtagLength);
-        }
-        u_terminateChars(lang, *langLength, subtagLength, err);
-    }
+    subtagLength = ulocimp_getLanguage(position, &position, *err).extract(lang, *langLength, *err);
 
     /*
      * Note that we explicit consider U_STRING_NOT_TERMINATED_WARNING
diff --git a/icu4c/source/common/uloc.cpp b/icu4c/source/common/uloc.cpp
index 94cbaab..45fab01 100644
--- a/icu4c/source/common/uloc.cpp
+++ b/icu4c/source/common/uloc.cpp
@@ -50,9 +50,6 @@
 #include "uassert.h"
 #include "charstr.h"
 
-#include <algorithm>
-#include <stdio.h> /* for sprintf */
-
 U_NAMESPACE_USE
 
 /* ### Declarations **************************************************/
@@ -892,7 +889,6 @@
     char* startSearchHere = NULL;
     char* keywordStart = NULL;
     CharString updatedKeysAndValues;
-    int32_t updatedKeysAndValuesLen;
     UBool handledInputKeyAndValue = FALSE;
     char keyValuePrefix = '@';
 
@@ -1072,18 +1068,10 @@
         return bufLen;
     }
 
-    updatedKeysAndValuesLen = updatedKeysAndValues.length();
-    /* needLen = length of the part before '@' + length of updated key-value part including '@' */
-    needLen = (int32_t)(startSearchHere - buffer) + updatedKeysAndValuesLen;
-    if(needLen >= bufferCapacity) {
-        *status = U_BUFFER_OVERFLOW_ERROR;
-        return needLen; /* no change */
-    }
-    if (updatedKeysAndValuesLen > 0) {
-        uprv_strncpy(startSearchHere, updatedKeysAndValues.data(), updatedKeysAndValuesLen);
-    }
-    buffer[needLen]=0;
-    return needLen;
+    // needLen = length of the part before '@'
+    needLen = (int32_t)(startSearchHere - buffer);
+    return needLen + updatedKeysAndValues.extract(
+                         startSearchHere, bufferCapacity - needLen, *status);
 }
 
 /* ### ID parsing implementation **************************************************/
@@ -1232,13 +1220,7 @@
                   char *script, int32_t scriptCapacity,
                   const char **pEnd) {
     ErrorCode status;
-    CharString result = ulocimp_getScript(localeID, pEnd, status);
-    if (status.isFailure()) {
-        return 0;
-    }
-    int32_t reslen = result.length();
-    uprv_memcpy(script, result.data(), std::min(reslen, scriptCapacity));
-    return reslen;
+    return ulocimp_getScript(localeID, pEnd, status).extract(script, scriptCapacity, status);
 }
 
 static CharString
@@ -1281,13 +1263,7 @@
                    char *country, int32_t countryCapacity,
                    const char **pEnd) {
     ErrorCode status;
-    CharString result = ulocimp_getCountry(localeID, pEnd, status);
-    if (status.isFailure()) {
-        return 0;
-    }
-    int32_t reslen = result.length();
-    uprv_memcpy(country, result.data(), std::min(reslen, countryCapacity));
-    return reslen;
+    return ulocimp_getCountry(localeID, pEnd, status).extract(country, countryCapacity, status);
 }
 
 /**
@@ -1744,16 +1720,7 @@
         localeID=uloc_getDefault();
     }
 
-    CharString result = ulocimp_getLanguage(localeID, NULL, *err);
-    if (U_FAILURE(*err)) {
-        return 0;
-    }
-
-    int32_t reslen = result.length();
-    if (reslen <= languageCapacity) {
-        uprv_memcpy(language, result.data(), reslen);
-    }
-    return u_terminateChars(language, languageCapacity, reslen, err);
+    return ulocimp_getLanguage(localeID, NULL, *err).extract(language, languageCapacity, *err);
 }
 
 U_CAPI int32_t U_EXPORT2
diff --git a/icu4c/source/common/uloc_tag.cpp b/icu4c/source/common/uloc_tag.cpp
index 5eed02c..8faac32 100644
--- a/icu4c/source/common/uloc_tag.cpp
+++ b/icu4c/source/common/uloc_tag.cpp
@@ -1413,7 +1413,7 @@
                         break;
                     }
 
-                    uprv_strcpy(pExtBuf, bcpValue);
+                    buf.extract(pExtBuf, resultCapacity, tmpStatus);
                     T_CString_toLowerCase(pExtBuf);
 
                     extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
diff --git a/icu4c/source/i18n/rulebasedcollator.cpp b/icu4c/source/i18n/rulebasedcollator.cpp
index 60acf17..917482d 100644
--- a/icu4c/source/i18n/rulebasedcollator.cpp
+++ b/icu4c/source/i18n/rulebasedcollator.cpp
@@ -1600,10 +1600,7 @@
     appendSubtag(result, 'Z', subtag, length, errorCode);
 
     if(U_FAILURE(errorCode)) { return 0; }
-    if(result.length() <= capacity) {
-        uprv_memcpy(buffer, result.data(), result.length());
-    }
-    return u_terminateChars(buffer, capacity, result.length(), &errorCode);
+    return result.extract(buffer, capacity, errorCode);
 }
 
 UBool
diff --git a/icu4c/source/i18n/ucol_sit.cpp b/icu4c/source/i18n/ucol_sit.cpp
index 92f332d..4dc81ae 100644
--- a/icu4c/source/i18n/ucol_sit.cpp
+++ b/icu4c/source/i18n/ucol_sit.cpp
@@ -372,10 +372,7 @@
                     }
                     len += s->entries[i].length();
                 } else {
-                    len += s->entries[i].length();
-                    if(len < capacity) {
-                        uprv_strncat(destination,s->entries[i].data(), s->entries[i].length());
-                    }
+                    len += s->entries[i].extract(destination + len, capacity - len, *status);
                 }
             }
         }
diff --git a/icu4c/source/test/cintltst/cloctst.c b/icu4c/source/test/cintltst/cloctst.c
index 29f4c82..f87e518 100644
--- a/icu4c/source/test/cintltst/cloctst.c
+++ b/icu4c/source/test/cintltst/cloctst.c
@@ -2233,7 +2233,12 @@
         strcpy(buffer,kwSetTestCases[i].l);
         status = U_ZERO_ERROR;
         res = uloc_setKeywordValue(kwSetTestCases[i].k, kwSetTestCases[i].v, buffer, blen, &status);
-        if(status != U_BUFFER_OVERFLOW_ERROR) {
+        if(res == blen) {
+            if(status != U_STRING_NOT_TERMINATED_WARNING) {
+                log_err("expected not terminated warning on buffer %d got %s, len %d (%s + [%s=%s])\n", blen, u_errorName(status), res, kwSetTestCases[i].l, kwSetTestCases[i].k, kwSetTestCases[i].v);
+                return;
+            }
+        } else if(status != U_BUFFER_OVERFLOW_ERROR) {
             log_err("expected buffer overflow on buffer %d got %s, len %d (%s + [%s=%s])\n", blen, u_errorName(status), res, kwSetTestCases[i].l, kwSetTestCases[i].k, kwSetTestCases[i].v);
             return;
         }
diff --git a/icu4c/source/test/intltest/strtest.cpp b/icu4c/source/test/intltest/strtest.cpp
index 1185d31..54b5a19 100644
--- a/icu4c/source/test/intltest/strtest.cpp
+++ b/icu4c/source/test/intltest/strtest.cpp
@@ -810,6 +810,27 @@
                 "Long string over 40 characters to trigger heap allocation",
                 s3.data());
     }
+
+    {
+        // extract()
+        errorCode.reset();
+        CharString s("abc", errorCode);
+        char buffer[10];
+
+        s.extract(buffer, 10, errorCode);
+        assertEquals("abc.extract(10) success", U_ZERO_ERROR, errorCode.get());
+        assertEquals("abc.extract(10) output", "abc", buffer);
+
+        strcpy(buffer, "012345");
+        s.extract(buffer, 3, errorCode);
+        assertEquals("abc.extract(3) not terminated",
+                     U_STRING_NOT_TERMINATED_WARNING, errorCode.reset());
+        assertEquals("abc.extract(3) output", "abc345", buffer);
+
+        strcpy(buffer, "012345");
+        s.extract(buffer, 2, errorCode);
+        assertEquals("abc.extract(2) overflow", U_BUFFER_OVERFLOW_ERROR, errorCode.reset());
+    }
 }
 
 void
diff --git a/icu4c/source/tools/toolutil/pkg_genc.cpp b/icu4c/source/tools/toolutil/pkg_genc.cpp
index 8251128..17347ba 100644
--- a/icu4c/source/tools/toolutil/pkg_genc.cpp
+++ b/icu4c/source/tools/toolutil/pkg_genc.cpp
@@ -738,8 +738,8 @@
         exit(U_ILLEGAL_ARGUMENT_ERROR);
     }
 
-    uprv_strcpy(outFilename, outFilenameBuilder.data());
-    uprv_strcpy(entryName, entryNameBuilder.data());
+    outFilenameBuilder.extract(outFilename, outFilenameCapacity, status);
+    entryNameBuilder.extract(entryName, entryNameCapacity, status);
 }
 
 #ifdef CAN_GENERATE_OBJECTS