upload android base code part3

2018-08-08 16:48:17 +08:00 · 2018-08-08 16:48:17 +08:00 · b9e30e05b1
commit b9e30e05b1
parent 71b83c22f1
15122 changed files with 2089659 additions and 0 deletions
--- a/android/dalvik/libdex/DexUtf.h
+++ b/android/dalvik/libdex/DexUtf.h
@ -0,0 +1,131 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Validate and manipulate MUTF-8 (modified UTF-8) encoded string data.
+ */
+
+#ifndef LIBDEX_DEXUTF_H_
+#define LIBDEX_DEXUTF_H_
+
+#include "DexFile.h"
+
+/*
+ * Retrieve the next UTF-16 character from a UTF-8 string.
+ *
+ * Advances "*pUtf8Ptr" to the start of the next character.
+ *
+ * WARNING: If a string is corrupted by dropping a '\0' in the middle
+ * of a 3-byte sequence, you can end up overrunning the buffer with
+ * reads (and possibly with the writes if the length was computed and
+ * cached before the damage). For performance reasons, this function
+ * assumes that the string being parsed is known to be valid (e.g., by
+ * already being verified). Most strings we process here are coming
+ * out of dex files or other internal translations, so the only real
+ * risk comes from the JNI NewStringUTF call.
+ */
+DEX_INLINE u2 dexGetUtf16FromUtf8(const char** pUtf8Ptr)
+{
+    unsigned int one, two, three;
+
+    one = *(*pUtf8Ptr)++;
+    if ((one & 0x80) != 0) {
+        /* two- or three-byte encoding */
+        two = *(*pUtf8Ptr)++;
+        if ((one & 0x20) != 0) {
+            /* three-byte encoding */
+            three = *(*pUtf8Ptr)++;
+            return ((one & 0x0f) << 12) |
+                   ((two & 0x3f) << 6) |
+                   (three & 0x3f);
+        } else {
+            /* two-byte encoding */
+            return ((one & 0x1f) << 6) |
+                   (two & 0x3f);
+        }
+    } else {
+        /* one-byte encoding */
+        return one;
+    }
+}
+
+/* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
+ * code point values for comparison. This treats different encodings
+ * for the same code point as equivalent, except that only a real '\0'
+ * byte is considered the string terminator. The return value is as
+ * for strcmp(). */
+int dexUtf8Cmp(const char* s1, const char* s2);
+
+/* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
+extern u4 DEX_MEMBER_VALID_LOW_ASCII[4];
+
+/* Helper for dexIsValidMemberUtf8(); do not call directly. */
+bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr);
+
+/* Return whether the pointed-at modified-UTF-8 encoded character is
+ * valid as part of a member name, updating the pointer to point past
+ * the consumed character. This will consume two encoded UTF-16 code
+ * points if the character is encoded as a surrogate pair. Also, if
+ * this function returns false, then the given pointer may only have
+ * been partially advanced. */
+DEX_INLINE bool dexIsValidMemberNameUtf8(const char** pUtf8Ptr) {
+    u1 c = (u1) **pUtf8Ptr;
+    if (c <= 0x7f) {
+        // It's low-ascii, so check the table.
+        u4 wordIdx = c >> 5;
+        u4 bitIdx = c & 0x1f;
+        (*pUtf8Ptr)++;
+        return (DEX_MEMBER_VALID_LOW_ASCII[wordIdx] & (1 << bitIdx)) != 0;
+    }
+
+    /*
+     * It's a multibyte encoded character. Call a non-inline function
+     * for the heavy lifting.
+     */
+    return dexIsValidMemberNameUtf8_0(pUtf8Ptr);
+}
+
+/* Return whether the given string is a valid field or method name. */
+bool dexIsValidMemberName(const char* s);
+
+/* Return whether the given string is a valid type descriptor. */
+bool dexIsValidTypeDescriptor(const char* s);
+
+/* Return whether the given string is a valid internal-form class
+ * name, with components separated either by dots or slashes as
+ * specified. A class name is like a type descriptor, except that it
+ * can't name a primitive type (including void). In terms of syntax,
+ * the form is either (a) the name of the class without adornment
+ * (that is, not bracketed by "L" and ";"); or (b) identical to the
+ * type descriptor syntax for array types. */
+bool dexIsValidClassName(const char* s, bool dotSeparator);
+
+/* Return whether the given string is a valid reference descriptor. This
+ * is true if dexIsValidTypeDescriptor() returns true and the descriptor
+ * is for a class or array and not a primitive type. */
+bool dexIsReferenceDescriptor(const char* s);
+
+/* Return whether the given string is a valid class descriptor. This
+ * is true if dexIsValidTypeDescriptor() returns true and the descriptor
+ * is for a class and not an array or primitive type. */
+bool dexIsClassDescriptor(const char* s);
+
+/* Return whether the given string is a valid field type descriptor. This
+ * is true if dexIsValidTypeDescriptor() returns true and the descriptor
+ * is for anything but "void". */
+bool dexIsFieldDescriptor(const char* s);
+
+#endif  // LIBDEX_DEXUTF_H_