// Copyright 2021 Code Intelligence GmbH // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "jvm_tooling.h" #include #include #include #include #include #include "absl/strings/str_format.h" #include "absl/strings/str_join.h" #include "absl/strings/str_replace.h" #include "absl/strings/str_split.h" #include "coverage_tracker.h" #include "gflags/gflags.h" #include "glog/logging.h" #include "libfuzzer_callbacks.h" #include "signal_handler.h" #include "tools/cpp/runfiles/runfiles.h" #include "utils.h" DEFINE_string(cp, ".", "the classpath to use for fuzzing. Behaves analogously to java's " "-cp (separator is ':' on Linux/macOS and ';' on Windows, escape " "it with '\\')."); DEFINE_string(jvm_args, "", "arguments passed to the JVM (separator is ':' on Linux/macOS " "and ';' on Windows, escape it with '\\')"); DEFINE_string(additional_jvm_args, "", "additional arguments passed to the JVM (separator is ':' on " "Linux/macOS and ';' on Windows). Use this option to set further " "JVM args that should not " "interfere with those provided via --jvm_args."); DEFINE_string(agent_path, "", "location of the fuzzing instrumentation agent"); // Arguments that are passed to the instrumentation agent. // The instrumentation agent takes arguments in the form // =,=,... To not expose this // format to the user the available options are defined here as flags and // combined during the initialization of the JVM. DEFINE_string(instrumentation_includes, "", "list of glob patterns for classes that will be instrumented for " "fuzzing. Separated by colon \":\""); DEFINE_string(instrumentation_excludes, "", "list of glob patterns for classes that will not be instrumented " "for fuzzing. Separated by colon \":\""); DEFINE_string(custom_hook_includes, "", "list of glob patterns for classes that will only be " "instrumented using custom hooks. Separated by colon \":\""); DEFINE_string(custom_hook_excludes, "", "list of glob patterns for classes that will not be instrumented " "using custom hooks. Separated by colon \":\""); DEFINE_string(custom_hooks, "", "list of classes containing custom instrumentation hooks. " "Separated by colon \":\""); DEFINE_string( trace, "", "list of instrumentation to perform separated by colon \":\". " "Available options are cov, cmp, div, gep, all. These options " "correspond to the \"-fsanitize-coverage=trace-*\" flags in clang."); DEFINE_string( id_sync_file, "", "path to a file that should be used to synchronize coverage IDs " "between parallel fuzzing processes. Defaults to a temporary file " "created for this purpose if running in parallel."); DEFINE_string( dump_classes_dir, "", "path to a directory in which Jazzer should dump the instrumented classes"); DEFINE_bool(hooks, true, "Use JVM hooks to provide coverage information to the fuzzer. The " "fuzzer uses the coverage information to perform smarter input " "selection and mutation. If set to false no " "coverage information will be processed. This can be useful for " "running a regression test on non-instrumented bytecode."); #ifdef _WIN32 #define ARG_SEPARATOR ";" #else #define ARG_SEPARATOR ":" #endif // Called by the agent when // com.code_intelligence.jazzer.instrumentor.ClassInstrumentor is initialized. // This only happens when FLAGS_hooks is true. extern "C" JNIEXPORT jint JNICALL JNI_OnLoad_jazzer_initialize(JavaVM *vm, void *) { if (!FLAGS_hooks) { LOG(ERROR) << "JNI_OnLoad_jazzer_initialize called with --nohooks"; exit(1); } JNIEnv *env = nullptr; jint result = vm->GetEnv(reinterpret_cast(&env), JNI_VERSION_1_8); if (result != JNI_OK) { LOG(FATAL) << "Failed to get JNI environment"; exit(1); } jazzer::registerFuzzerCallbacks(*env); jazzer::CoverageTracker::Setup(*env); jazzer::SignalHandler::Setup(*env); return JNI_VERSION_1_8; } namespace { constexpr auto kAgentBazelRunfilesPath = "jazzer/agent/jazzer_agent_deploy.jar"; constexpr auto kAgentFileName = "jazzer_agent_deploy.jar"; constexpr const char kExceptionUtilsClassName[] = "com/code_intelligence/jazzer/runtime/ExceptionUtils"; } // namespace namespace jazzer { void DumpJvmStackTraces() { JavaVM *vm; jsize num_vms; JNI_GetCreatedJavaVMs(&vm, 1, &num_vms); if (num_vms != 1) { return; } JNIEnv *env = nullptr; if (vm->AttachCurrentThread(reinterpret_cast(&env), nullptr) != JNI_OK) { return; } jclass exceptionUtils = env->FindClass(kExceptionUtilsClassName); if (env->ExceptionCheck()) { env->ExceptionDescribe(); return; } jmethodID dumpStack = env->GetStaticMethodID(exceptionUtils, "dumpAllStackTraces", "()V"); if (env->ExceptionCheck()) { env->ExceptionDescribe(); return; } env->CallStaticVoidMethod(exceptionUtils, dumpStack); if (env->ExceptionCheck()) { env->ExceptionDescribe(); return; } // Do not detach as we may be the main thread (but the JVM exits anyway). } std::string dirFromFullPath(const std::string &path) { const auto pos = path.rfind(kPathSeparator); if (pos != std::string::npos) { return path.substr(0, pos); } return ""; } // getInstrumentorAgentPath searches for the fuzzing instrumentation agent and // returns the location if it is found. Otherwise it calls exit(0). std::string getInstrumentorAgentPath(const std::string &executable_path) { // User provided agent location takes precedence. if (!FLAGS_agent_path.empty()) { if (std::ifstream(FLAGS_agent_path).good()) return FLAGS_agent_path; LOG(ERROR) << "Could not find " << kAgentFileName << " at \"" << FLAGS_agent_path << "\""; exit(1); } // First check if we are running inside the Bazel tree and use the agent // runfile. { using bazel::tools::cpp::runfiles::Runfiles; std::string error; std::unique_ptr runfiles( Runfiles::Create(executable_path, &error)); if (runfiles != nullptr) { auto bazel_path = runfiles->Rlocation(kAgentBazelRunfilesPath); if (!bazel_path.empty() && std::ifstream(bazel_path).good()) return bazel_path; } } // If the agent is not in the bazel path we look next to the jazzer_driver // binary. const auto dir = dirFromFullPath(executable_path); auto agent_path = absl::StrFormat("%s%c%s", dir, kPathSeparator, kAgentFileName); if (std::ifstream(agent_path).good()) return agent_path; LOG(ERROR) << "Could not find " << kAgentFileName << ". Please provide " "the pathname via the --agent_path flag."; exit(1); } std::string agentArgsFromFlags() { std::vector args; for (const auto &flag_pair : std::vector>{ // {, } {"instrumentation_includes", FLAGS_instrumentation_includes}, {"instrumentation_excludes", FLAGS_instrumentation_excludes}, {"custom_hooks", FLAGS_custom_hooks}, {"custom_hook_includes", FLAGS_custom_hook_includes}, {"custom_hook_excludes", FLAGS_custom_hook_excludes}, {"trace", FLAGS_trace}, {"id_sync_file", FLAGS_id_sync_file}, {"dump_classes_dir", FLAGS_dump_classes_dir}, }) { if (!flag_pair.second.empty()) { args.push_back(flag_pair.first + "=" + flag_pair.second); } } return absl::StrJoin(args, ","); } // Splits a string at the ARG_SEPARATOR unless it is escaped with a backslash. // Backslash itself can be escaped with another backslash. std::vector splitEscaped(const std::string &str) { // Protect \\ and \ against splitting. const std::string BACKSLASH_BACKSLASH_REPLACEMENT = "%%JAZZER_BACKSLASH_BACKSLASH_REPLACEMENT%%"; const std::string BACKSLASH_SEPARATOR_REPLACEMENT = "%%JAZZER_BACKSLASH_SEPARATOR_REPLACEMENT%%"; std::string protected_str = absl::StrReplaceAll(str, {{"\\\\", BACKSLASH_BACKSLASH_REPLACEMENT}}); protected_str = absl::StrReplaceAll( protected_str, {{"\\" ARG_SEPARATOR, BACKSLASH_SEPARATOR_REPLACEMENT}}); std::vector parts = absl::StrSplit(protected_str, ARG_SEPARATOR); std::transform(parts.begin(), parts.end(), parts.begin(), [&BACKSLASH_SEPARATOR_REPLACEMENT, &BACKSLASH_BACKSLASH_REPLACEMENT](const std::string &part) { return absl::StrReplaceAll( part, { {BACKSLASH_SEPARATOR_REPLACEMENT, ARG_SEPARATOR}, {BACKSLASH_BACKSLASH_REPLACEMENT, "\\"}, }); }); return parts; } JVM::JVM(const std::string &executable_path) { // combine class path from command line flags and JAVA_FUZZER_CLASSPATH env // variable std::string class_path = absl::StrFormat("-Djava.class.path=%s", FLAGS_cp); const auto class_path_from_env = std::getenv("JAVA_FUZZER_CLASSPATH"); if (class_path_from_env) { class_path += absl::StrFormat(ARG_SEPARATOR "%s", class_path_from_env); } class_path += absl::StrFormat(ARG_SEPARATOR "%s", getInstrumentorAgentPath(executable_path)); LOG(INFO) << "got class path " << class_path; std::vector options; options.push_back( JavaVMOption{.optionString = const_cast(class_path.c_str())}); // Set the maximum heap size to a value that is slightly smaller than // libFuzzer's default rss_limit_mb. This prevents erroneous oom reports. options.push_back(JavaVMOption{.optionString = (char *)"-Xmx1800m"}); options.push_back(JavaVMOption{.optionString = (char *)"-enableassertions"}); // Preserve and emit stack trace information even on hot paths. // This may hurt performance, but also helps find flaky bugs. options.push_back( JavaVMOption{.optionString = (char *)"-XX:-OmitStackTraceInFastThrow"}); // Optimize GC for high throughput rather than low latency. options.push_back(JavaVMOption{.optionString = (char *)"-XX:+UseParallelGC"}); // add additional jvm options set through command line flags std::vector jvm_args; if (!FLAGS_jvm_args.empty()) { jvm_args = splitEscaped(FLAGS_jvm_args); } for (const auto &arg : jvm_args) { options.push_back( JavaVMOption{.optionString = const_cast(arg.c_str())}); } std::vector additional_jvm_args; if (!FLAGS_additional_jvm_args.empty()) { additional_jvm_args = splitEscaped(FLAGS_additional_jvm_args); } for (const auto &arg : additional_jvm_args) { options.push_back( JavaVMOption{.optionString = const_cast(arg.c_str())}); } std::string agent_jvm_arg; if (FLAGS_hooks) { agent_jvm_arg = absl::StrFormat("-javaagent:%s=%s", getInstrumentorAgentPath(executable_path), agentArgsFromFlags()); options.push_back(JavaVMOption{ .optionString = const_cast(agent_jvm_arg.c_str())}); } JavaVMInitArgs jvm_init_args = {.version = JNI_VERSION_1_8, .nOptions = (int)options.size(), .options = options.data(), .ignoreUnrecognized = JNI_FALSE}; auto ret = JNI_CreateJavaVM(&jvm_, (void **)&env_, &jvm_init_args); if (ret != JNI_OK) { throw std::runtime_error( absl::StrFormat("JNI_CreateJavaVM returned code %d", ret)); } } JNIEnv &JVM::GetEnv() const { return *env_; } JVM::~JVM() { jvm_->DestroyJavaVM(); } jclass JVM::FindClass(std::string class_name) const { auto &env = GetEnv(); std::replace(class_name.begin(), class_name.end(), '.', '/'); const auto ret = env.FindClass(class_name.c_str()); if (ret == nullptr) { if (env.ExceptionCheck()) { env.ExceptionDescribe(); throw std::runtime_error( absl::StrFormat("Could not find class %s", class_name)); } else { throw std::runtime_error(absl::StrFormat( "Java class '%s' not found without exception", class_name)); } } return ret; } jmethodID JVM::GetStaticMethodID(jclass jclass, const std::string &jmethod, const std::string &signature, bool is_required) const { auto &env = GetEnv(); const auto ret = env.GetStaticMethodID(jclass, jmethod.c_str(), signature.c_str()); if (ret == nullptr) { if (is_required) { if (env.ExceptionCheck()) { env.ExceptionDescribe(); } throw std::runtime_error( absl::StrFormat("Static method '%s' not found", jmethod)); } else { LOG(INFO) << "did not find method " << jmethod << " with signature " << signature; env.ExceptionClear(); } } return ret; } jmethodID JVM::GetMethodID(jclass jclass, const std::string &jmethod, const std::string &signature) const { auto &env = GetEnv(); const auto ret = env.GetMethodID(jclass, jmethod.c_str(), signature.c_str()); if (ret == nullptr) { if (env.ExceptionCheck()) { env.ExceptionDescribe(); } throw std::runtime_error(absl::StrFormat("Method '%s' not found", jmethod)); } return ret; } jfieldID JVM::GetStaticFieldID(jclass class_id, const std::string &field_name, const std::string &type) const { auto &env = GetEnv(); const auto ret = env.GetStaticFieldID(class_id, field_name.c_str(), type.c_str()); if (ret == nullptr) { if (env.ExceptionCheck()) { env.ExceptionDescribe(); } throw std::runtime_error( absl::StrFormat("Field '%s' not found", field_name)); } return ret; } ExceptionPrinter::ExceptionPrinter(JVM &jvm) : jvm_(jvm), string_writer_class_(jvm.FindClass("java/io/StringWriter")), string_writer_constructor_( jvm.GetMethodID(string_writer_class_, "", "()V")), string_writer_to_string_method_(jvm.GetMethodID( string_writer_class_, "toString", "()Ljava/lang/String;")), print_writer_class_(jvm.FindClass("java/io/PrintWriter")), print_writer_constructor_(jvm.GetMethodID(print_writer_class_, "", "(Ljava/io/Writer;)V")) { auto throwable_class = jvm.FindClass("java/lang/Throwable"); print_stack_trace_method_ = jvm.GetMethodID( throwable_class, "printStackTrace", "(Ljava/io/PrintWriter;)V"); if (FLAGS_hooks) { exception_utils_ = jvm.FindClass(kExceptionUtilsClassName); compute_dedup_token_method_ = jvm.GetStaticMethodID( exception_utils_, "computeDedupToken", "(Ljava/lang/Throwable;)J"); preprocess_throwable_method_ = jvm.GetStaticMethodID(exception_utils_, "preprocessThrowable", "(Ljava/lang/Throwable;)Ljava/lang/Throwable;"); } } // The JNI way of writing: // StringWriter stringWriter = new StringWriter(); // PrintWriter printWriter = new PrintWriter(stringWriter); // e.printStackTrace(printWriter); // return stringWriter.toString(); std::string ExceptionPrinter::getStackTrace(jthrowable exception) const { auto &env = jvm_.GetEnv(); if (exception == nullptr) { return ""; } auto string_writer = env.NewObject(string_writer_class_, string_writer_constructor_); if (string_writer == nullptr) { env.ExceptionDescribe(); return ""; } auto print_writer = env.NewObject(print_writer_class_, print_writer_constructor_, string_writer); if (print_writer == nullptr) { env.ExceptionDescribe(); return ""; } env.CallVoidMethod(exception, print_stack_trace_method_, print_writer); env.DeleteLocalRef(print_writer); if (env.ExceptionCheck()) { env.ExceptionDescribe(); return ""; } auto exception_string_object = reinterpret_cast( env.CallObjectMethod(string_writer, string_writer_to_string_method_)); env.DeleteLocalRef(string_writer); if (env.ExceptionCheck()) { env.ExceptionDescribe(); return ""; } auto char_pointer = env.GetStringUTFChars(exception_string_object, nullptr); std::string exception_string(char_pointer); env.ReleaseStringUTFChars(exception_string_object, char_pointer); env.DeleteLocalRef(exception_string_object); return exception_string; } jthrowable ExceptionPrinter::preprocessException(jthrowable exception) const { if (exception == nullptr) return nullptr; auto &env = jvm_.GetEnv(); if (!FLAGS_hooks || !preprocess_throwable_method_) return exception; auto processed_exception = (jthrowable)(env.CallStaticObjectMethod( exception_utils_, preprocess_throwable_method_, exception)); if (env.ExceptionCheck()) { env.ExceptionDescribe(); return exception; } return processed_exception; } jlong ExceptionPrinter::computeDedupToken(jthrowable exception) const { auto &env = jvm_.GetEnv(); if (!FLAGS_hooks || exception == nullptr || compute_dedup_token_method_ == nullptr) return 0; const auto dedup_token = env.CallStaticLongMethod( exception_utils_, compute_dedup_token_method_, exception); if (env.ExceptionCheck()) { env.ExceptionDescribe(); return 0; } return dedup_token; } } // namespace jazzer