Android byte 数组转 String

    技术2024-12-16  16

    不知道什么原因,Android 的 String.java 的好多方法不能使用,抛出这样的异常:

    throw new UnsupportedOperationException("Use StringFactory instead.");

    但是呢,StringFactory.java 这个类却又是 @hide 的,无法直接调用,无奈拷贝出其中的方法,拎出来使用:

    import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; public class StringUtils { private static final char REPLACEMENT_CHAR = (char) 0xfffd; private static final int[] TABLE_UTF8_NEEDED = new int[]{ //0 1 2 3 4 5 6 7 8 9 a b c d e f 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xc0 - 0xcf 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0xd0 - 0xdf 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xe0 - 0xef 3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xf0 - 0xff }; private static final ThreadLocal<StringBuilder> TMP_STRING_BUILDER = new ThreadLocal<StringBuilder>() { @Override protected StringBuilder initialValue() { return new StringBuilder(); } }; public static String newStringFromBytes(byte[] data) { return newStringFromBytes(data, 0, data.length); } public static String newStringFromBytes(byte[] data, int offset, int byteCount) { return newStringFromBytes(data, offset, byteCount, StandardCharsets.UTF_8); } public static String newStringFromBytes(byte[] data, int offset, int byteCount, Charset charset) { if ((offset | byteCount) < 0 || byteCount > data.length - offset) { throw new IndexOutOfBoundsException("length=" + data.length + "; regionStart=" + offset + "; regionLength=" + byteCount); } char[] value; int length; // We inline UTF-8, ISO-8859-1, and US-ASCII decoders for speed. String canonicalCharsetName = charset.name(); if (canonicalCharsetName.equals("UTF-8")) { /* This code converts a UTF-8 byte sequence to a Java String (UTF-16). It implements the W3C recommended UTF-8 decoder. https://www.w3.org/TR/encoding/#utf-8-decoder Unicode 3.2 Well-Formed UTF-8 Byte Sequences Code Points First Second Third Fourth U+0000..U+007F 00..7F U+0080..U+07FF C2..DF 80..BF U+0800..U+0FFF E0 A0..BF 80..BF U+1000..U+CFFF E1..EC 80..BF 80..BF U+D000..U+D7FF ED 80..9F 80..BF U+E000..U+FFFF EE..EF 80..BF 80..BF U+10000..U+3FFFF F0 90..BF 80..BF 80..BF U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF U+100000..U+10FFFF F4 80..8F 80..BF 80..BF Please refer to Unicode as the authority. p.126 Table 3-7 in http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf Handling Malformed Input The maximal subpart should be replaced by a single U+FFFD. Maximal subpart is the longest code unit subsequence starting at an unconvertible offset that is either 1) the initial subsequence of a well-formed code unit sequence, or 2) a subsequence of length one: One U+FFFD should be emitted for every sequence of bytes that is an incomplete prefix of a valid sequence, and with the conversion to restart after the incomplete sequence. For example, in byte sequence "41 C0 AF 41 F4 80 80 41", the maximal subparts are "C0", "AF", and "F4 80 80". "F4 80 80" can be the initial subsequence of "F4 80 80 80", but "C0" can't be the initial subsequence of any well-formed code unit sequence. Thus, the output should be "A\ufffd\ufffdA\ufffdA". Please refer to section "Best Practices for Using U+FFFD." in http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf */ byte[] d = data; char[] v = new char[byteCount]; int idx = offset; int last = offset + byteCount; int s = 0; int codePoint = 0; int utf8BytesSeen = 0; int utf8BytesNeeded = 0; int lowerBound = 0x80; int upperBound = 0xbf; while (idx < last) { int b = d[idx++] & 0xff; if (utf8BytesNeeded == 0) { if ((b & 0x80) == 0) { // ASCII char. 0xxxxxxx v[s++] = (char) b; continue; } if ((b & 0x40) == 0) { // 10xxxxxx is illegal as first byte v[s++] = REPLACEMENT_CHAR; continue; } // 11xxxxxx int tableLookupIndex = b & 0x3f; utf8BytesNeeded = TABLE_UTF8_NEEDED[tableLookupIndex]; if (utf8BytesNeeded == 0) { v[s++] = REPLACEMENT_CHAR; continue; } // utf8BytesNeeded // 1: b & 0x1f // 2: b & 0x0f // 3: b & 0x07 codePoint = b & (0x3f >> utf8BytesNeeded); if (b == 0xe0) { lowerBound = 0xa0; } else if (b == 0xed) { upperBound = 0x9f; } else if (b == 0xf0) { lowerBound = 0x90; } else if (b == 0xf4) { upperBound = 0x8f; } } else { if (b < lowerBound || b > upperBound) { // The bytes seen are ill-formed. Substitute them with U+FFFD v[s++] = REPLACEMENT_CHAR; codePoint = 0; utf8BytesNeeded = 0; utf8BytesSeen = 0; lowerBound = 0x80; upperBound = 0xbf; /* * According to the Unicode Standard, * "a UTF-8 conversion process is required to never consume well-formed * subsequences as part of its error handling for ill-formed subsequences" * The current byte could be part of well-formed subsequences. Reduce the * index by 1 to parse it in next loop. */ idx--; continue; } lowerBound = 0x80; upperBound = 0xbf; codePoint = (codePoint << 6) | (b & 0x3f); utf8BytesSeen++; if (utf8BytesNeeded != utf8BytesSeen) { continue; } // Encode chars from U+10000 up as surrogate pairs if (codePoint < 0x10000) { v[s++] = (char) codePoint; } else { v[s++] = (char) ((codePoint >> 10) + 0xd7c0); v[s++] = (char) ((codePoint & 0x3ff) + 0xdc00); } utf8BytesSeen = 0; utf8BytesNeeded = 0; codePoint = 0; } } // The bytes seen are ill-formed. Substitute them by U+FFFD if (utf8BytesNeeded != 0) { v[s++] = REPLACEMENT_CHAR; } if (s == byteCount) { // We guessed right, so we can use our temporary array as-is. value = v; length = s; } else { // Our temporary array was too big, so reallocate and copy. value = new char[s]; length = s; System.arraycopy(v, 0, value, 0, s); } } else { CharBuffer cb = charset.decode(ByteBuffer.wrap(data, offset, byteCount)); length = cb.length(); // The call to newStringFromChars below will copy length bytes out of value, so it does // not matter that cb.array().length may be > cb.length() or that a Charset could keep a // reference to the CharBuffer it returns and later mutate it. value = cb.array(); } StringBuilder tmpSb = TMP_STRING_BUILDER.get(); tmpSb.setLength(0); tmpSb.append(value, 0, length); return tmpSb.toString(); } }
    Processed: 0.016, SQL: 9