279 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			279 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			C++
		
	
	
	
| /*
 | |
|  * Copyright (C) 2008 The Android Open Source Project
 | |
|  *
 | |
|  * Licensed under the Apache License, Version 2.0 (the "License");
 | |
|  * you may not use this file except in compliance with the License.
 | |
|  * You may obtain a copy of the License at
 | |
|  *
 | |
|  *      http://www.apache.org/licenses/LICENSE-2.0
 | |
|  *
 | |
|  * Unless required by applicable law or agreed to in writing, software
 | |
|  * distributed under the License is distributed on an "AS IS" BASIS,
 | |
|  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
|  * See the License for the specific language governing permissions and
 | |
|  * limitations under the License.
 | |
|  */
 | |
| 
 | |
| #include "java_lang_StringFactory.h"
 | |
| 
 | |
| #include "common_throws.h"
 | |
| #include "handle_scope-inl.h"
 | |
| #include "jni/jni_internal.h"
 | |
| #include "mirror/object-inl.h"
 | |
| #include "mirror/string-alloc-inl.h"
 | |
| #include "native_util.h"
 | |
| #include "nativehelper/jni_macros.h"
 | |
| #include "nativehelper/scoped_local_ref.h"
 | |
| #include "nativehelper/scoped_primitive_array.h"
 | |
| #include "scoped_fast_native_object_access-inl.h"
 | |
| #include "scoped_thread_state_change-inl.h"
 | |
| 
 | |
| namespace art {
 | |
| 
 | |
| static jstring StringFactory_newStringFromBytes(JNIEnv* env, jclass, jbyteArray java_data,
 | |
|                                                 jint high, jint offset, jint byte_count) {
 | |
|   ScopedFastNativeObjectAccess soa(env);
 | |
|   if (UNLIKELY(java_data == nullptr)) {
 | |
|     ThrowNullPointerException("data == null");
 | |
|     return nullptr;
 | |
|   }
 | |
|   StackHandleScope<1> hs(soa.Self());
 | |
|   Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
 | |
|   int32_t data_size = byte_array->GetLength();
 | |
|   if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
 | |
|     soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
 | |
|                                    "length=%d; regionStart=%d; regionLength=%d", data_size,
 | |
|                                    offset, byte_count);
 | |
|     return nullptr;
 | |
|   }
 | |
|   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
 | |
|   ObjPtr<mirror::String> result = mirror::String::AllocFromByteArray(soa.Self(),
 | |
|                                                                      byte_count,
 | |
|                                                                      byte_array,
 | |
|                                                                      offset,
 | |
|                                                                      high,
 | |
|                                                                      allocator_type);
 | |
|   return soa.AddLocalReference<jstring>(result);
 | |
| }
 | |
| 
 | |
| // The char array passed as `java_data` must not be a null reference.
 | |
| static jstring StringFactory_newStringFromChars(JNIEnv* env, jclass, jint offset,
 | |
|                                                 jint char_count, jcharArray java_data) {
 | |
|   DCHECK(java_data != nullptr);
 | |
|   ScopedFastNativeObjectAccess soa(env);
 | |
|   StackHandleScope<1> hs(soa.Self());
 | |
|   Handle<mirror::CharArray> char_array(hs.NewHandle(soa.Decode<mirror::CharArray>(java_data)));
 | |
|   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
 | |
|   ObjPtr<mirror::String> result = mirror::String::AllocFromCharArray(soa.Self(),
 | |
|                                                                      char_count,
 | |
|                                                                      char_array,
 | |
|                                                                      offset,
 | |
|                                                                      allocator_type);
 | |
|   return soa.AddLocalReference<jstring>(result);
 | |
| }
 | |
| 
 | |
| static jstring StringFactory_newStringFromString(JNIEnv* env, jclass, jstring to_copy) {
 | |
|   ScopedFastNativeObjectAccess soa(env);
 | |
|   if (UNLIKELY(to_copy == nullptr)) {
 | |
|     ThrowNullPointerException("toCopy == null");
 | |
|     return nullptr;
 | |
|   }
 | |
|   StackHandleScope<1> hs(soa.Self());
 | |
|   Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(to_copy)));
 | |
|   gc::AllocatorType allocator_type = Runtime::Current()->GetHeap()->GetCurrentAllocator();
 | |
|   ObjPtr<mirror::String> result = mirror::String::AllocFromString(soa.Self(),
 | |
|                                                                   string->GetLength(),
 | |
|                                                                   string,
 | |
|                                                                   /*offset=*/ 0,
 | |
|                                                                   allocator_type);
 | |
|   return soa.AddLocalReference<jstring>(result);
 | |
| }
 | |
| 
 | |
| static jstring StringFactory_newStringFromUtf8Bytes(JNIEnv* env, jclass, jbyteArray java_data,
 | |
|                                                     jint offset, jint byte_count) {
 | |
|   // Local Define in here
 | |
|   static const jchar kReplacementChar = 0xfffd;
 | |
|   static const int kDefaultBufferSize = 256;
 | |
|   static const int kTableUtf8Needed[] = {
 | |
|     //      0  1  2  3  4  5  6  7  8  9  a  b  c  d  e  f
 | |
|     0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 0xc0 - 0xcf
 | |
|     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  // 0xd0 - 0xdf
 | |
|     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,  // 0xe0 - 0xef
 | |
|     3, 3, 3, 3, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  // 0xf0 - 0xff
 | |
|   };
 | |
| 
 | |
|   ScopedFastNativeObjectAccess soa(env);
 | |
|   if (UNLIKELY(java_data == nullptr)) {
 | |
|     ThrowNullPointerException("data == null");
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   StackHandleScope<1> hs(soa.Self());
 | |
|   Handle<mirror::ByteArray> byte_array(hs.NewHandle(soa.Decode<mirror::ByteArray>(java_data)));
 | |
|   int32_t data_size = byte_array->GetLength();
 | |
|   if ((offset | byte_count) < 0 || byte_count > data_size - offset) {
 | |
|     soa.Self()->ThrowNewExceptionF("Ljava/lang/StringIndexOutOfBoundsException;",
 | |
|         "length=%d; regionStart=%d; regionLength=%d", data_size,
 | |
|         offset, byte_count);
 | |
|     return nullptr;
 | |
|   }
 | |
| 
 | |
|   /*
 | |
|    * This code converts a UTF-8 byte sequence to a Java String (UTF-16).
 | |
|    * It implements the W3C recommended UTF-8 decoder.
 | |
|    * https://www.w3.org/TR/encoding/#utf-8-decoder
 | |
|    *
 | |
|    * Unicode 3.2 Well-Formed UTF-8 Byte Sequences
 | |
|    * Code Points        First  Second Third Fourth
 | |
|    * U+0000..U+007F     00..7F
 | |
|    * U+0080..U+07FF     C2..DF 80..BF
 | |
|    * U+0800..U+0FFF     E0     A0..BF 80..BF
 | |
|    * U+1000..U+CFFF     E1..EC 80..BF 80..BF
 | |
|    * U+D000..U+D7FF     ED     80..9F 80..BF
 | |
|    * U+E000..U+FFFF     EE..EF 80..BF 80..BF
 | |
|    * U+10000..U+3FFFF   F0     90..BF 80..BF 80..BF
 | |
|    * U+40000..U+FFFFF   F1..F3 80..BF 80..BF 80..BF
 | |
|    * U+100000..U+10FFFF F4     80..8F 80..BF 80..BF
 | |
|    *
 | |
|    * Please refer to Unicode as the authority.
 | |
|    * p.126 Table 3-7 in http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
 | |
|    *
 | |
|    * Handling Malformed Input
 | |
|    * The maximal subpart should be replaced by a single U+FFFD. Maximal subpart is
 | |
|    * the longest code unit subsequence starting at an unconvertible offset that is either
 | |
|    * 1) the initial subsequence of a well-formed code unit sequence, or
 | |
|    * 2) a subsequence of length one:
 | |
|    * One U+FFFD should be emitted for every sequence of bytes that is an incomplete prefix
 | |
|    * of a valid sequence, and with the conversion to restart after the incomplete sequence.
 | |
|    *
 | |
|    * For example, in byte sequence "41 C0 AF 41 F4 80 80 41", the maximal subparts are
 | |
|    * "C0", "AF", and "F4 80 80". "F4 80 80" can be the initial subsequence of "F4 80 80 80",
 | |
|    * but "C0" can't be the initial subsequence of any well-formed code unit sequence.
 | |
|    * Thus, the output should be "A\ufffd\ufffdA\ufffdA".
 | |
|    *
 | |
|    * Please refer to section "Best Practices for Using U+FFFD." in
 | |
|    * http://www.unicode.org/versions/Unicode10.0.0/ch03.pdf
 | |
|    */
 | |
| 
 | |
|   // Initial value
 | |
|   jchar stack_buffer[kDefaultBufferSize];
 | |
|   std::unique_ptr<jchar[]> allocated_buffer;
 | |
|   jchar* v;
 | |
|   if (byte_count <= kDefaultBufferSize) {
 | |
|     v = stack_buffer;
 | |
|   } else {
 | |
|     allocated_buffer.reset(new jchar[byte_count]);
 | |
|     v = allocated_buffer.get();
 | |
|   }
 | |
| 
 | |
|   jbyte* d = byte_array->GetData();
 | |
|   DCHECK(d != nullptr);
 | |
| 
 | |
|   int idx = offset;
 | |
|   int last = offset + byte_count;
 | |
|   int s = 0;
 | |
| 
 | |
|   int code_point = 0;
 | |
|   int utf8_bytes_seen = 0;
 | |
|   int utf8_bytes_needed = 0;
 | |
|   int lower_bound = 0x80;
 | |
|   int upper_bound = 0xbf;
 | |
|   while (idx < last) {
 | |
|     int b = d[idx++] & 0xff;
 | |
|     if (utf8_bytes_needed == 0) {
 | |
|       if ((b & 0x80) == 0) {  // ASCII char. 0xxxxxxx
 | |
|         v[s++] = (jchar) b;
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       if ((b & 0x40) == 0) {  // 10xxxxxx is illegal as first byte
 | |
|         v[s++] = kReplacementChar;
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       // 11xxxxxx
 | |
|       int tableLookupIndex = b & 0x3f;
 | |
|       utf8_bytes_needed = kTableUtf8Needed[tableLookupIndex];
 | |
|       if (utf8_bytes_needed == 0) {
 | |
|         v[s++] = kReplacementChar;
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       // utf8_bytes_needed
 | |
|       // 1: b & 0x1f
 | |
|       // 2: b & 0x0f
 | |
|       // 3: b & 0x07
 | |
|       code_point = b & (0x3f >> utf8_bytes_needed);
 | |
|       if (b == 0xe0) {
 | |
|         lower_bound = 0xa0;
 | |
|       } else if (b == 0xed) {
 | |
|         upper_bound = 0x9f;
 | |
|       } else if (b == 0xf0) {
 | |
|         lower_bound = 0x90;
 | |
|       } else if (b == 0xf4) {
 | |
|         upper_bound = 0x8f;
 | |
|       }
 | |
|     } else {
 | |
|       if (b < lower_bound || b > upper_bound) {
 | |
|         // The bytes seen are ill-formed. Substitute them with U+FFFD
 | |
|         v[s++] = kReplacementChar;
 | |
|         code_point = 0;
 | |
|         utf8_bytes_needed = 0;
 | |
|         utf8_bytes_seen = 0;
 | |
|         lower_bound = 0x80;
 | |
|         upper_bound = 0xbf;
 | |
|         /*
 | |
|          * According to the Unicode Standard,
 | |
|          * "a UTF-8 conversion process is required to never consume well-formed
 | |
|          * subsequences as part of its error handling for ill-formed subsequences"
 | |
|          * The current byte could be part of well-formed subsequences. Reduce the
 | |
|          * index by 1 to parse it in next loop.
 | |
|          */
 | |
|         idx--;
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       lower_bound = 0x80;
 | |
|       upper_bound = 0xbf;
 | |
|       code_point = (code_point << 6) | (b & 0x3f);
 | |
|       utf8_bytes_seen++;
 | |
|       if (utf8_bytes_needed != utf8_bytes_seen) {
 | |
|         continue;
 | |
|       }
 | |
| 
 | |
|       // Encode chars from U+10000 up as surrogate pairs
 | |
|       if (code_point < 0x10000) {
 | |
|         v[s++] = (jchar) code_point;
 | |
|       } else {
 | |
|         v[s++] = (jchar) ((code_point >> 10) + 0xd7c0);
 | |
|         v[s++] = (jchar) ((code_point & 0x3ff) + 0xdc00);
 | |
|       }
 | |
| 
 | |
|       utf8_bytes_seen = 0;
 | |
|       utf8_bytes_needed = 0;
 | |
|       code_point = 0;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // The bytes seen are ill-formed. Substitute them by U+FFFD
 | |
|   if (utf8_bytes_needed != 0) {
 | |
|     v[s++] = kReplacementChar;
 | |
|   }
 | |
| 
 | |
|   ObjPtr<mirror::String> result = mirror::String::AllocFromUtf16(soa.Self(), s, v);
 | |
|   return soa.AddLocalReference<jstring>(result);
 | |
| }
 | |
| 
 | |
| static JNINativeMethod gMethods[] = {
 | |
|   FAST_NATIVE_METHOD(StringFactory, newStringFromBytes, "([BIII)Ljava/lang/String;"),
 | |
|   FAST_NATIVE_METHOD(StringFactory, newStringFromChars, "(II[C)Ljava/lang/String;"),
 | |
|   FAST_NATIVE_METHOD(StringFactory, newStringFromString, "(Ljava/lang/String;)Ljava/lang/String;"),
 | |
|   FAST_NATIVE_METHOD(StringFactory, newStringFromUtf8Bytes, "([BII)Ljava/lang/String;"),
 | |
| };
 | |
| 
 | |
| void register_java_lang_StringFactory(JNIEnv* env) {
 | |
|   REGISTER_NATIVE_METHODS("java/lang/StringFactory");
 | |
| }
 | |
| 
 | |
| }  // namespace art
 |