301 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			301 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C++
		
	
	
	
| // Copyright 2021 Code Intelligence GmbH
 | |
| //
 | |
| // Licensed under the Apache License, Version 2.0 (the "License");
 | |
| // you may not use this file except in compliance with the License.
 | |
| // You may obtain a copy of the License at
 | |
| //
 | |
| //      http://www.apache.org/licenses/LICENSE-2.0
 | |
| //
 | |
| // Unless required by applicable law or agreed to in writing, software
 | |
| // distributed under the License is distributed on an "AS IS" BASIS,
 | |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| // See the License for the specific language governing permissions and
 | |
| // limitations under the License.
 | |
| 
 | |
| #include "fuzzed_data_provider.h"
 | |
| 
 | |
| #include <cstddef>
 | |
| #include <cstdint>
 | |
| #include <random>
 | |
| #include <sstream>
 | |
| #include <string>
 | |
| #include <vector>
 | |
| 
 | |
| #include "fuzz_target_runner.h"
 | |
| #include "gflags/gflags.h"
 | |
| #include "gtest/gtest.h"
 | |
| #include "jvm_tooling.h"
 | |
| #include "tools/cpp/runfiles/runfiles.h"
 | |
| 
 | |
| DECLARE_string(cp);
 | |
| DECLARE_string(jvm_args);
 | |
| DECLARE_string(instrumentation_excludes);
 | |
| 
 | |
| DECLARE_string(target_class);
 | |
| DECLARE_string(target_args);
 | |
| 
 | |
| namespace jazzer {
 | |
| 
 | |
| std::pair<std::string, std::size_t> FixUpModifiedUtf8(const uint8_t* pos,
 | |
|                                                       std::size_t max_bytes,
 | |
|                                                       jint max_length,
 | |
|                                                       bool ascii_only,
 | |
|                                                       bool stop_on_backslash);
 | |
| 
 | |
| std::pair<std::string, std::size_t> FixUpRemainingModifiedUtf8(
 | |
|     const std::string& str, bool ascii_only, bool stop_on_backslash) {
 | |
|   return FixUpModifiedUtf8(reinterpret_cast<const uint8_t*>(str.c_str()),
 | |
|                            str.length(), std::numeric_limits<jint>::max(),
 | |
|                            ascii_only, stop_on_backslash);
 | |
| }
 | |
| 
 | |
| // Work around the fact that size_t is unsigned long on Linux and unsigned long
 | |
| // long on Windows.
 | |
| std::size_t operator"" _z(unsigned long long x) { return x; }
 | |
| 
 | |
| using namespace std::literals::string_literals;
 | |
| TEST(FixUpModifiedUtf8Test, FullUtf8_ContinueOnBackslash) {
 | |
|   EXPECT_EQ(std::make_pair("jazzer"s, 6_z),
 | |
|             FixUpRemainingModifiedUtf8("jazzer"s, false, false));
 | |
|   EXPECT_EQ(std::make_pair("ja\xC0\x80zzer"s, 7_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\0zzer"s, false, false));
 | |
|   EXPECT_EQ(std::make_pair("ja\xC0\x80\xC0\x80zzer"s, 8_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\0\0zzer"s, false, false));
 | |
|   EXPECT_EQ(std::make_pair("ja\\zzer"s, 7_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\\zzer"s, false, false));
 | |
|   EXPECT_EQ(std::make_pair("ja\\\\zzer"s, 8_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\\\\zzer"s, false, false));
 | |
|   EXPECT_EQ(std::make_pair("ۧ"s, 5_z),
 | |
|             FixUpRemainingModifiedUtf8(u8"ۧ"s, false, false));
 | |
| }
 | |
| 
 | |
| TEST(FixUpModifiedUtf8Test, AsciiOnly_ContinueOnBackslash) {
 | |
|   EXPECT_EQ(std::make_pair("jazzer"s, 6_z),
 | |
|             FixUpRemainingModifiedUtf8("jazzer"s, true, false));
 | |
|   EXPECT_EQ(std::make_pair("ja\xC0\x80zzer"s, 7_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\0zzer"s, true, false));
 | |
|   EXPECT_EQ(std::make_pair("ja\xC0\x80\xC0\x80zzer"s, 8_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\0\0zzer"s, true, false));
 | |
|   EXPECT_EQ(std::make_pair("ja\\zzer"s, 7_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\\zzer"s, true, false));
 | |
|   EXPECT_EQ(std::make_pair("ja\\\\zzer"s, 8_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\\\\zzer"s, true, false));
 | |
|   EXPECT_EQ(std::make_pair("\x62\x02\x2C\x43\x1F"s, 5_z),
 | |
|             FixUpRemainingModifiedUtf8(u8"ۧ"s, true, false));
 | |
| }
 | |
| 
 | |
| TEST(FixUpModifiedUtf8Test, FullUtf8_StopOnBackslash) {
 | |
|   EXPECT_EQ(std::make_pair("jazzer"s, 6_z),
 | |
|             FixUpRemainingModifiedUtf8("jazzer"s, false, true));
 | |
|   EXPECT_EQ(std::make_pair("ja\xC0\x80zzer"s, 7_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\0zzer"s, false, true));
 | |
|   EXPECT_EQ(std::make_pair("ja\xC0\x80\xC0\x80zzer"s, 8_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\0\0zzer"s, false, true));
 | |
|   EXPECT_EQ(std::make_pair("ja"s, 4_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\\zzer"s, false, true));
 | |
|   EXPECT_EQ(std::make_pair("ja\\zzer"s, 8_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\\\\zzer"s, false, true));
 | |
| }
 | |
| 
 | |
| TEST(FixUpModifiedUtf8Test, AsciiOnly_StopOnBackslash) {
 | |
|   EXPECT_EQ(std::make_pair("jazzer"s, 6_z),
 | |
|             FixUpRemainingModifiedUtf8("jazzer"s, true, true));
 | |
|   EXPECT_EQ(std::make_pair("ja\xC0\x80zzer"s, 7_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\0zzer"s, true, true));
 | |
|   EXPECT_EQ(std::make_pair("ja\xC0\x80\xC0\x80zzer"s, 8_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\0\0zzer"s, true, true));
 | |
|   EXPECT_EQ(std::make_pair("ja"s, 4_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\\zzer"s, true, true));
 | |
|   EXPECT_EQ(std::make_pair("ja\\zzer"s, 8_z),
 | |
|             FixUpRemainingModifiedUtf8("ja\\\\zzer"s, true, true));
 | |
| }
 | |
| 
 | |
| class FuzzedDataProviderTest : public ::testing::Test {
 | |
|  protected:
 | |
|   // After DestroyJavaVM() no new JVM instance can be created in the same
 | |
|   // process, so we set up a single JVM instance for this test binary which gets
 | |
|   // destroyed after all tests in this test suite have finished.
 | |
|   static void SetUpTestCase() {
 | |
|     FLAGS_instrumentation_excludes = "**";
 | |
|     using ::bazel::tools::cpp::runfiles::Runfiles;
 | |
|     Runfiles* runfiles = Runfiles::CreateForTest();
 | |
|     FLAGS_cp = runfiles->Rlocation(FLAGS_cp);
 | |
| 
 | |
|     jvm_ = std::make_unique<JVM>("test_executable");
 | |
|   }
 | |
| 
 | |
|   static void TearDownTestCase() { jvm_.reset(nullptr); }
 | |
| 
 | |
|   static std::unique_ptr<JVM> jvm_;
 | |
| };
 | |
| 
 | |
| std::unique_ptr<JVM> FuzzedDataProviderTest::jvm_ = nullptr;
 | |
| 
 | |
| // see testdata/test/FuzzTargetWithDataProvider.java for the implementation
 | |
| // of the fuzz target that asserts that the correct values are received from
 | |
| // the data provider.
 | |
| const uint8_t kInput[] = {
 | |
|     // Bytes read from the start
 | |
|     0x01, 0x02,  // consumeBytes(2): {0x01, 0x02}
 | |
| 
 | |
|     'j', 'a', 'z', 'z', 'e', 'r',   // consumeString(6): "jazzer"
 | |
|     'j', 'a', 0x00, 'z', 'e', 'r',  // consumeString(6): "ja\u0000zer"
 | |
|     0xE2, 0x82, 0xAC, 0xC3, 0x9F,   // consumeString(2): "€ẞ"
 | |
| 
 | |
|     'j', 'a', 'z', 'z', 'e', 'r',   // consumeAsciiString(6): "jazzer"
 | |
|     'j', 'a', 0x00, 'z', 'e', 'r',  // consumeAsciiString(6): "ja\u0000zer"
 | |
|     0xE2, 0x82, 0xAC, 0xC3,
 | |
|     0x9F,  // consumeAsciiString(5): "\u0062\u0002\u002C\u0043\u001F"
 | |
| 
 | |
|     false, false, true, false,
 | |
|     true,  // consumeBooleans(5): { false, false, true, false, true }
 | |
|     0xEF, 0xDC, 0xAB, 0x89, 0x67, 0x45, 0x23, 0x01, 0x10, 0x32, 0x54, 0x76,
 | |
|     0x98, 0xBA, 0xDC,
 | |
|     0xFE,  // consumeLongs(2): { 0x0123456789ABCDEF, 0xFEDCBA9876543210 }
 | |
| 
 | |
|     0x78, 0x56, 0x34, 0x12,  // consumeInts(3): { 0x12345678 }
 | |
|     0x56, 0x34, 0x12,        // consumeLong():
 | |
| 
 | |
|     // Bytes read from the end
 | |
|     0x02, 0x03, 0x02, 0x04,  // 4x pickValue in array with five elements
 | |
| 
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
 | |
|     0x78,  // consumed but unused by consumeDouble()
 | |
|     10,    // -max for next consumeDouble
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
 | |
|     0x78,  // consumed but unused by consumeDouble()
 | |
|     9,     // max for next consumeDouble
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
 | |
|     0x78,  // consumed but unused by consumeDouble()
 | |
|     8,     // -min for next consumeDouble
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
 | |
|     0x78,  // consumed but unused by consumeDouble()
 | |
|     7,     // min for next consumeDouble
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
 | |
|     0x78,  // consumed but unused by consumeDouble()
 | |
|     6,     // -denorm_min for next consumeDouble
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
 | |
|     0x78,  // consumed but unused by consumeDouble()
 | |
|     5,     // denorm_min for next consumeDouble
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
 | |
|     0x78,  // consumed but unused by consumeDouble()
 | |
|     4,     // NaN for next consumeDouble
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
 | |
|     0x78,  // consumed but unused by consumeDouble()
 | |
|     3,     // -infinity for next consumeDouble
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
 | |
|     0x78,  // consumed but unused by consumeDouble()
 | |
|     2,     // infinity for next consumeDouble
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
 | |
|     0x78,  // consumed but unused by consumeDouble()
 | |
|     1,     // -0.0 for next consumeDouble
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90, 0x12, 0x34, 0x56,
 | |
|     0x78,  // consumed but unused by consumeDouble()
 | |
|     0,     // 0.0 for next consumeDouble
 | |
| 
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
 | |
|     10,                            // -max for next consumeFloat
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
 | |
|     9,                             // max for next consumeFloat
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
 | |
|     8,                             // -min for next consumeFloat
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
 | |
|     7,                             // min for next consumeFloat
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
 | |
|     6,                             // -denorm_min for next consumeFloat
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
 | |
|     5,                             // denorm_min for next consumeFloat
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
 | |
|     4,                             // NaN for next consumeFloat
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
 | |
|     3,                             // -infinity for next consumeFloat
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
 | |
|     2,                             // infinity for next consumeFloat
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
 | |
|     1,                             // -0.0 for next consumeFloat
 | |
|     0x12, 0x34, 0x56, 0x78, 0x90,  // consumed but unused by consumeFloat()
 | |
|     0,                             // 0.0 for next consumeFloat
 | |
| 
 | |
|     0x88, 0xAB, 0x61, 0xCB, 0x32, 0xEB, 0x30,
 | |
|     0xF9,  // consumeDouble(13.37, 31.337): 30.859126145478349 (small range)
 | |
|     0x51, 0xF6, 0x1F,
 | |
|     0x3A,  // consumeFloat(123.0, 777.0): 271.49084 (small range)
 | |
|     0x11, 0x4D, 0xFD, 0x54, 0xD6, 0x3D, 0x43, 0x73,
 | |
|     0x39,  // consumeRegularDouble(): 8.0940194040236032e+307
 | |
|     0x16, 0xCF, 0x3D, 0x29, 0x4A,  // consumeRegularFloat(): -2.8546307e+38
 | |
| 
 | |
|     0x61, 0xCB, 0x32, 0xEB, 0x30, 0xF9, 0x51,
 | |
|     0xF6,                    // consumeProbabilityDouble(): 0.96218831486039413
 | |
|     0x1F, 0x3A, 0x11, 0x4D,  // consumeProbabilityFloat(): 0.30104411
 | |
|     0xFD, 0x54, 0xD6, 0x3D, 0x43, 0x73, 0x39,
 | |
|     0x16,                    // consumeProbabilityDouble(): 0.086814121166605432
 | |
|     0xCF, 0x3D, 0x29, 0x4A,  // consumeProbabilityFloat(): 0.28969181
 | |
| 
 | |
|     0x01,  // consumeInt(0x12345678, 0x12345679): 0x12345679
 | |
|     0x78,  // consumeInt(-0x12345678, -0x12345600): -0x12345600
 | |
|     0x78, 0x56, 0x34, 0x12,  // consumeInt(): 0x12345678
 | |
| 
 | |
|     0x02,  // consumeByte(0x12, 0x22): 0x14
 | |
|     0x7F,  // consumeByte(): 0x7F
 | |
| 
 | |
|     0x01,  // consumeBool(): true
 | |
| };
 | |
| 
 | |
| TEST_F(FuzzedDataProviderTest, FuzzTargetWithDataProvider) {
 | |
|   FLAGS_target_class = "test/FuzzTargetWithDataProvider";
 | |
|   FLAGS_target_args = "";
 | |
|   FuzzTargetRunner fuzz_target_runner(*jvm_);
 | |
| 
 | |
|   ASSERT_EQ(RunResult::kOk, fuzz_target_runner.Run(kInput, sizeof(kInput)));
 | |
| }
 | |
| 
 | |
| constexpr std::size_t kValidModifiedUtf8NumRuns = 10000;
 | |
| constexpr std::size_t kValidModifiedUtf8NumBytes = 100000;
 | |
| constexpr uint32_t kValidModifiedUtf8Seed = 0x12345678;
 | |
| 
 | |
| TEST_F(FuzzedDataProviderTest, InvalidModifiedUtf8AfterFixup) {
 | |
|   auto modified_utf8_validator = jvm_->FindClass("test.ModifiedUtf8Encoder");
 | |
|   ASSERT_NE(nullptr, modified_utf8_validator);
 | |
|   auto string_to_modified_utf_bytes = jvm_->GetStaticMethodID(
 | |
|       modified_utf8_validator, "encode", "(Ljava/lang/String;)[B");
 | |
|   ASSERT_NE(nullptr, string_to_modified_utf_bytes);
 | |
|   auto& env = jvm_->GetEnv();
 | |
|   auto random_bytes = std::vector<uint8_t>(kValidModifiedUtf8NumBytes);
 | |
|   auto random = std::mt19937(kValidModifiedUtf8Seed);
 | |
|   for (bool ascii_only : {false, true}) {
 | |
|     for (bool stop_on_backslash : {false, true}) {
 | |
|       for (std::size_t i = 0; i < kValidModifiedUtf8NumRuns; ++i) {
 | |
|         std::generate(random_bytes.begin(), random_bytes.end(), random);
 | |
|         std::string fixed_string;
 | |
|         std::tie(fixed_string, std::ignore) = FixUpModifiedUtf8(
 | |
|             random_bytes.data(), random_bytes.size(),
 | |
|             std::numeric_limits<jint>::max(), ascii_only, stop_on_backslash);
 | |
| 
 | |
|         jstring jni_fixed_string = env.NewStringUTF(fixed_string.c_str());
 | |
|         auto jni_roundtripped_bytes = (jbyteArray)env.CallStaticObjectMethod(
 | |
|             modified_utf8_validator, string_to_modified_utf_bytes,
 | |
|             jni_fixed_string);
 | |
|         ASSERT_FALSE(env.ExceptionCheck());
 | |
|         env.DeleteLocalRef(jni_fixed_string);
 | |
|         jint roundtripped_bytes_length =
 | |
|             env.GetArrayLength(jni_roundtripped_bytes);
 | |
|         jbyte* roundtripped_bytes =
 | |
|             env.GetByteArrayElements(jni_roundtripped_bytes, nullptr);
 | |
|         auto roundtripped_string =
 | |
|             std::string(reinterpret_cast<char*>(roundtripped_bytes),
 | |
|                         roundtripped_bytes_length);
 | |
|         env.ReleaseByteArrayElements(jni_roundtripped_bytes, roundtripped_bytes,
 | |
|                                      JNI_ABORT);
 | |
|         env.DeleteLocalRef(jni_roundtripped_bytes);
 | |
| 
 | |
|         // Verify that the bytes obtained from running our modified UTF-8 fix-up
 | |
|         // function remain unchanged when turned into a Java string and
 | |
|         // reencoded into modified UTF-8. This will only happen if the our
 | |
|         // fix-up function indeed returned valid modified UTF-8.
 | |
|         ASSERT_EQ(fixed_string, roundtripped_string);
 | |
|       }
 | |
|     }
 | |
|   }
 | |
| }
 | |
| }  // namespace jazzer
 |