diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b7d1288e0..e23d46555a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -244,6 +244,23 @@ else() option(ENABLE_SCE "enables Script Check Engine - an alternative checking engine that lets you use executables instead of OVAL for checks" ON) endif() +# ---------- FUZZING +option(ENABLE_FUZZING "build libFuzzer harnesses (fuzz/) and instrument the library with libFuzzer + ASan/UBSan. Requires a Clang toolchain." OFF) +if(ENABLE_FUZZING) + if(NOT CMAKE_C_COMPILER_ID MATCHES "Clang") + message(FATAL_ERROR "ENABLE_FUZZING requires Clang (libFuzzer). Re-run cmake with CC=clang CXX=clang++.") + endif() + # Instrument the whole library (and everything else) for libFuzzer coverage + # and catch memory/UB errors at runtime. fuzzer-no-link adds the coverage + # instrumentation without pulling libFuzzer's main() into every object; + # the harness target adds -fsanitize=fuzzer to get the driver. + set(OSCAP_FUZZING_FLAGS "-fsanitize=fuzzer-no-link,address,undefined -fno-omit-frame-pointer -g") + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OSCAP_FUZZING_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OSCAP_FUZZING_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address,undefined") + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -fsanitize=address,undefined") +endif() + # ---------- OVAL FEATURE SWITCHES option(ENABLE_PROBES "build OVAL probes - each probe implements an OVAL test" TRUE) @@ -635,6 +652,9 @@ endif() add_subdirectory("compat") add_subdirectory("src") +if(ENABLE_FUZZING) + add_subdirectory("fuzz") +endif() add_subdirectory("utils") add_subdirectory("docs") add_subdirectory("dist") diff --git a/fuzz/.gitignore b/fuzz/.gitignore new file mode 100644 index 0000000000..21df6746be --- /dev/null +++ b/fuzz/.gitignore @@ -0,0 +1,23 @@ +# libFuzzer run artifacts (these patterns match anywhere; the curated +# regression inputs under reproducers/ are re-included below) +crash-* +oom-* +leak-* +timeout-* +*.profraw + +# Always keep the curated regression corpus, even though some are named crash-* +!reproducers/ +!reproducers/** + +# run-all.sh outputs +findings/ +logs/ +*.work/ + +# Fuzzing corpora are large (seeded from tests/, then grown by libFuzzer) and +# regenerable; they are not committed. Regression inputs live in reproducers/. +corpus/ +corpus_xccdf/ +corpus_arf/ +corpus_tailoring/ diff --git a/fuzz/CMakeLists.txt b/fuzz/CMakeLists.txt new file mode 100644 index 0000000000..2d9bb8e6c0 --- /dev/null +++ b/fuzz/CMakeLists.txt @@ -0,0 +1,44 @@ +# libFuzzer harnesses for SCAP parsing / processing. +# +# Enabled with -DENABLE_FUZZING=ON. Requires a Clang toolchain (libFuzzer ships +# with clang). When enabled, the whole library is compiled with the libFuzzer +# coverage instrumentation plus AddressSanitizer/UndefinedBehaviorSanitizer (set +# from the top-level CMakeLists), and each harness is linked with +# -fsanitize=fuzzer to pull in the libFuzzer driver/main. + +set(FUZZ_INCLUDE_DIRS + "${CMAKE_CURRENT_SOURCE_DIR}" + "${CMAKE_SOURCE_DIR}/src/common/public" + "${CMAKE_SOURCE_DIR}/src/source/public" + "${CMAKE_SOURCE_DIR}/src/DS/public" + "${CMAKE_SOURCE_DIR}/src/XCCDF/public" + "${CMAKE_SOURCE_DIR}/src/XCCDF_POLICY/public" + "${CMAKE_SOURCE_DIR}/src/CPE/public" + "${CMAKE_SOURCE_DIR}/src/OVAL/public" + "${LIBXML2_INCLUDE_DIR}" +) + +# add_fuzzer( ) builds one libFuzzer executable linked against the +# instrumented library. +function(add_fuzzer name source) + add_executable(${name} ${source}) + target_include_directories(${name} PRIVATE ${FUZZ_INCLUDE_DIRS}) + target_link_libraries(${name} openscap) + target_compile_options(${name} PRIVATE -fsanitize=fuzzer) + target_link_options(${name} PRIVATE -fsanitize=fuzzer) +endfunction() + +add_fuzzer(scap_parse_fuzzer scap_parse_fuzzer.c) # dispatch-by-type parser +add_fuzzer(xccdf_policy_fuzzer xccdf_policy_fuzzer.c) # XCCDF policy/profile layer +add_fuzzer(validate_fuzzer validate_fuzzer.c) # XSD + Schematron validation +add_fuzzer(arf_fuzzer arf_fuzzer.c) # ARF / result data stream (RDS) +add_fuzzer(xccdf_tailoring_fuzzer xccdf_tailoring_fuzzer.c) # XCCDF tailoring + +# Convenience target to build them all: `cmake --build . --target fuzzers` +add_custom_target(fuzzers DEPENDS + scap_parse_fuzzer + xccdf_policy_fuzzer + validate_fuzzer + arf_fuzzer + xccdf_tailoring_fuzzer +) diff --git a/fuzz/README.md b/fuzz/README.md new file mode 100644 index 0000000000..2ce2332d7d --- /dev/null +++ b/fuzz/README.md @@ -0,0 +1,138 @@ +# OpenSCAP fuzzers + +[libFuzzer](https://llvm.org/docs/LibFuzzer.html) harnesses that exercise the +SCAP file processing code paths (parse / resolve / validate). Requires a Clang +toolchain (libFuzzer ships with Clang). + +## Available harnesses + +| Binary | Entry point | Corpus dir | +|--------|-------------|------------| +| `scap_parse_fuzzer` | `oscap_source_get_scap_type()` then the matching importer (DS, ARF, XCCDF, all OVAL kinds, CPE) | `corpus/` | +| `xccdf_policy_fuzzer` | `xccdf_policy_model_new()` + `build_all_useful_policies()` + `xccdf_policy_resolve()` | `corpus_xccdf/` | +| `validate_fuzzer` | `oscap_source_validate()` + `oscap_source_validate_schematron()` | `corpus/` | +| `arf_fuzzer` | `ds_rds_session_*` — build the RDS index, walk reports/assets, extract reports | `corpus_arf/` | +| `xccdf_tailoring_fuzzer`| `xccdf_tailoring_import_source()` against an embedded benchmark | `corpus_tailoring/` | + +Each harness is one `*_fuzzer.c` file in this directory. Corpora are seeded from +`tests/` and grown by the fuzzer; they are git-ignored (regenerable). + +## Build + +```sh +mkdir -p build && cd build +CC=clang CXX=clang++ cmake .. -DENABLE_FUZZING=ON -DENABLE_PROBES=OFF -DENABLE_SCE=OFF +cmake --build . --target fuzzers -j"$(nproc)" # builds all harnesses +``` + +`ENABLE_FUZZING` instruments the whole library with +`-fsanitize=fuzzer-no-link,address,undefined` and links each harness with +`-fsanitize=fuzzer`. (`-DENABLE_PROBES=OFF -DENABLE_SCE=OFF` just trims the build.) + +## Run the fuzz tests + +Recommended sanitizer environment (LeakSanitizer is noisy on inputs the parser +intentionally rejects mid-parse; UBSan `halt_on_error=0` keeps benign +function-pointer-cast reports from aborting): + +```sh +export ASAN_OPTIONS=detect_leaks=0 UBSAN_OPTIONS=halt_on_error=0 +``` + +One harness on its corpus: + +```sh +cd build +./fuzz/scap_parse_fuzzer -max_len=65536 ../fuzz/corpus +./fuzz/xccdf_policy_fuzzer -max_len=65536 ../fuzz/corpus_xccdf +# validate_fuzzer needs the bundled schemas: +OSCAP_SCHEMA_PATH=$(pwd)/../schemas ./fuzz/validate_fuzzer -max_len=65536 ../fuzz/corpus +``` + +All harnesses in parallel (libFuzzer `-fork` mode; a crash/OOM/timeout in one +input is recorded and fuzzing continues). `run-all.sh` sets the sanitizer +options and `OSCAP_SCHEMA_PATH` automatically: + +```sh +fuzz/run-all.sh 3600 # duration in seconds; one fork child per harness +FORK=4 fuzz/run-all.sh 28800 # 4 fork children per harness +``` + +Findings land in `fuzz/findings//` (`crash-`/`oom-`/`timeout-`/`leak-`), +per-harness logs in `fuzz/logs/.log`; a per-harness summary is printed +at the end. Both dirs are git-ignored. + +## Coverage + +Build a second, coverage-instrumented tree, replay the corpus, and report with +`llvm-cov`: + +```sh +mkdir -p build-cov && cd build-cov +CC=clang CXX=clang++ cmake .. -DENABLE_FUZZING=ON -DENABLE_PROBES=OFF -DENABLE_SCE=OFF \ + -DCMAKE_C_FLAGS="-fprofile-instr-generate -fcoverage-mapping" \ + -DCMAKE_CXX_FLAGS="-fprofile-instr-generate -fcoverage-mapping" \ + -DCMAKE_EXE_LINKER_FLAGS="-fprofile-instr-generate -fcoverage-mapping" \ + -DCMAKE_SHARED_LINKER_FLAGS="-fprofile-instr-generate -fcoverage-mapping" +cmake --build . --target fuzzers -j"$(nproc)" + +# Replay the corpus (-runs=0 just executes the inputs, no fuzzing): +LLVM_PROFILE_FILE=cov.profraw ASAN_OPTIONS=detect_leaks=0 \ + ./fuzz/scap_parse_fuzzer -runs=0 ../fuzz/corpus + +llvm-profdata merge -sparse cov.profraw -o cov.profdata +# The library lives in a shared object, so pass it with -object: +llvm-cov report ./fuzz/scap_parse_fuzzer -object ./src/libopenscap.so* \ + -instr-profile=cov.profdata +# Per-file/line detail: +llvm-cov show ./fuzz/scap_parse_fuzzer -object ./src/libopenscap.so* \ + -instr-profile=cov.profdata src/OVAL/oval_parser.c +``` + +Merge several `*.profraw` (one per harness, via different `LLVM_PROFILE_FILE`) +before `report` to get combined coverage, and pass each harness with its own +`-object` to `llvm-cov`. + +## Replay / debug a crash + +A crashing input is written as `crash-` (or `oom-`/`timeout-`) in the +working dir, or under `fuzz/findings//` when using `run-all.sh`. +Curated regression inputs are in `fuzz/reproducers/`. + +Replay one input through the harness that produced it — the ASan report +(stack trace, fault address, allocation site) prints to stderr: + +```sh +cd build +ASAN_OPTIONS=detect_leaks=0 UBSAN_OPTIONS=halt_on_error=0 \ + ./fuzz/scap_parse_fuzzer ./crash- +# validate_fuzzer also needs: OSCAP_SCHEMA_PATH=$(pwd)/../schemas +``` + +> **Note:** a small number of reproducers (`crash-oval-set-mixed-type-double-free` +> and `crash-sds-index-checklist-null-strcmp`) trigger a UBSan +> wrong-function-pointer error rather than an ASan SEGV. Use +> `UBSAN_OPTIONS=halt_on_error=1` instead of `halt_on_error=0` for those. + +Under a debugger — make ASan/UBSan abort into the debugger on the faulting frame: + +```sh +ASAN_OPTIONS=abort_on_error=1:detect_leaks=0 UBSAN_OPTIONS=halt_on_error=1 \ + gdb --args ./fuzz/scap_parse_fuzzer ./crash- +(gdb) run +(gdb) bt # backtrace at the crash +# (lldb works the same: lldb -- ./fuzz/ ./crash-; run; bt) +``` + +Useful extras: +- Symbolized ASan traces need `llvm-symbolizer` on `PATH` (set + `ASAN_SYMBOLIZER_PATH=$(command -v llvm-symbolizer)` if needed). +- Minimize a crash to the smallest triggering input: + `./fuzz/ -minimize_crash=1 -exact_artifact_path=min ./crash-`. +- Replay all regression inputs (run from `build/`): + ```sh + for f in ../fuzz/reproducers/*; do + ASAN_OPTIONS=detect_leaks=0 UBSAN_OPTIONS=halt_on_error=0 \ + ./fuzz/scap_parse_fuzzer "$f" >/dev/null 2>&1 || echo "triggered: $f" + done + ``` diff --git a/fuzz/arf_fuzzer.c b/fuzz/arf_fuzzer.c new file mode 100644 index 0000000000..19a0c29ce5 --- /dev/null +++ b/fuzz/arf_fuzzer.c @@ -0,0 +1,81 @@ +/* + * libFuzzer harness for ARF / Result Data Stream (RDS) parsing + * (src/DS/rds.c, src/DS/rds_index.c, src/DS/ds_rds_session.c). + * + * ARF (Asset Reporting Format) result files are the output side of a scan and + * are routinely passed around and re-ingested, so their parser is a real attack + * surface. The base scap_parse_fuzzer only builds the RDS session; this harness + * goes further and walks the result-data-stream index (reports, assets, + * report-requests) and extracts the embedded reports, which is what drives the + * bulk of the RDS parsing code. + * + * Pipeline: + * ds_rds_session_new_from_source() open the ARF + * ds_rds_session_get_rds_idx() build & return the RDS index + * walk reports / assets / report-requests via the index iterators + * ds_rds_session_select_report(NULL) extract+parse the first report + * ds_rds_session_select_report_request(NULL) + */ + +#include +#include + +#include "fuzz_common.h" +#include "oscap_source.h" +#include "scap_ds.h" +#include "ds_rds_session.h" + +static void walk_index(struct rds_index *idx) +{ + if (idx == NULL) { + return; + } + + struct rds_report_index_iterator *rit = rds_index_get_reports(idx); + while (rds_report_index_iterator_has_more(rit)) { + struct rds_report_index *r = rds_report_index_iterator_next(rit); + rds_report_index_get_id(r); + } + rds_report_index_iterator_free(rit); + + struct rds_report_request_index_iterator *qit = rds_index_get_report_requests(idx); + while (rds_report_request_index_iterator_has_more(qit)) { + struct rds_report_request_index *q = rds_report_request_index_iterator_next(qit); + rds_report_request_index_get_id(q); + } + rds_report_request_index_iterator_free(qit); + + struct rds_asset_index_iterator *ait = rds_index_get_assets(idx); + while (rds_asset_index_iterator_has_more(ait)) { + struct rds_asset_index *a = rds_asset_index_iterator_next(ait); + struct rds_report_index_iterator *arit = rds_asset_index_get_reports(a); + while (rds_report_index_iterator_has_more(arit)) { + rds_report_index_iterator_next(arit); + } + rds_report_index_iterator_free(arit); + } + rds_asset_index_iterator_free(ait); +} + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + FUZZ_INIT(); + + struct oscap_source *source = + oscap_source_new_from_memory((const char *)data, size, "fuzz-arf.xml"); + if (source == NULL) { + return 0; + } + + struct ds_rds_session *session = ds_rds_session_new_from_source(source); + if (session != NULL) { + walk_index(ds_rds_session_get_rds_idx(session)); + // Returned sources are owned by the session; do not free them. + ds_rds_session_select_report(session, NULL); + ds_rds_session_select_report_request(session, NULL); + ds_rds_session_free(session); + } + + oscap_source_free(source); + return 0; +} diff --git a/fuzz/fuzz_common.h b/fuzz/fuzz_common.h new file mode 100644 index 0000000000..4cd8bcefe4 --- /dev/null +++ b/fuzz/fuzz_common.h @@ -0,0 +1,34 @@ +/* + * Shared setup for the OpenSCAP fuzz harnesses. + */ +#ifndef OPENSCAP_FUZZ_COMMON_H +#define OPENSCAP_FUZZ_COMMON_H + +#include +#include + +#include "oscap.h" + +/* + * One-time process initialization. Silences libxml2's error reporting (it would + * otherwise print a parse error to stderr for every malformed input, which both + * slows fuzzing down and buries real sanitizer reports) and initializes the + * library. Call from the top of LLVMFuzzerTestOneInput guarded by a static flag. + */ +static inline void fuzz_init_once(void) +{ + xmlSetGenericErrorFunc(NULL, NULL); + xmlSetStructuredErrorFunc(NULL, NULL); + oscap_init(); +} + +#define FUZZ_INIT() \ + do { \ + static int _fuzz_inited = 0; \ + if (!_fuzz_inited) { \ + fuzz_init_once(); \ + _fuzz_inited = 1; \ + } \ + } while (0) + +#endif /* OPENSCAP_FUZZ_COMMON_H */ diff --git a/fuzz/reproducers/crash-oval-set-mixed-type-double-free b/fuzz/reproducers/crash-oval-set-mixed-type-double-free new file mode 100644 index 0000000000..91253777b8 --- /dev/null +++ b/fuzz/reproducers/crash-oval-set-mixed-type-double-free @@ -0,0 +1 @@ +o:1 \ No newline at end of file diff --git a/fuzz/reproducers/crash-oval-state-version-null-atoi b/fuzz/reproducers/crash-oval-state-version-null-atoi new file mode 100644 index 0000000000..511b5023c1 --- /dev/null +++ b/fuzz/reproducers/crash-oval-state-version-null-atoi @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/crash-oval-varmodel-duplicate-id-null-frame b/fuzz/reproducers/crash-oval-varmodel-duplicate-id-null-frame new file mode 100644 index 0000000000..190f25879c --- /dev/null +++ b/fuzz/reproducers/crash-oval-varmodel-duplicate-id-null-frame @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/crash-rds-asset-missing-id-strcmp b/fuzz/reproducers/crash-rds-asset-missing-id-strcmp new file mode 100644 index 0000000000..9f34612277 --- /dev/null +++ b/fuzz/reproducers/crash-rds-asset-missing-id-strcmp @@ -0,0 +1,7 @@ + + + + +a + + diff --git a/fuzz/reproducers/crash-rds-isabout-null-asset b/fuzz/reproducers/crash-rds-isabout-null-asset new file mode 100644 index 0000000000..f993a58d7a --- /dev/null +++ b/fuzz/reproducers/crash-rds-isabout-null-asset @@ -0,0 +1,7 @@ + + + + +b + + diff --git a/fuzz/reproducers/crash-rds-relationship-missing-type b/fuzz/reproducers/crash-rds-relationship-missing-type new file mode 100644 index 0000000000..1f88fb0337 --- /dev/null +++ b/fuzz/reproducers/crash-rds-relationship-missing-type @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/crash-rds-report-missing-id-htable b/fuzz/reproducers/crash-rds-report-missing-id-htable new file mode 100644 index 0000000000..de9095882f --- /dev/null +++ b/fuzz/reproducers/crash-rds-report-missing-id-htable @@ -0,0 +1,3 @@ + + + diff --git a/fuzz/reproducers/crash-rds-select-report-null-index b/fuzz/reproducers/crash-rds-select-report-null-index new file mode 100644 index 0000000000..5cd0969adb --- /dev/null +++ b/fuzz/reproducers/crash-rds-select-report-null-index @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/crash-schematron-table-no-sentinel-oob b/fuzz/reproducers/crash-schematron-table-no-sentinel-oob new file mode 100644 index 0000000000..d3e39163e6 --- /dev/null +++ b/fuzz/reproducers/crash-schematron-table-no-sentinel-oob @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/crash-sds-catalog-cycle-recursion-oom b/fuzz/reproducers/crash-sds-catalog-cycle-recursion-oom new file mode 100644 index 0000000000..e4210fd06f --- /dev/null +++ b/fuzz/reproducers/crash-sds-catalog-cycle-recursion-oom @@ -0,0 +1,10 @@ + + + + + + + + + + diff --git a/fuzz/reproducers/crash-sds-component-id-use-after-free b/fuzz/reproducers/crash-sds-component-id-use-after-free new file mode 100644 index 0000000000..b7aab96684 --- /dev/null +++ b/fuzz/reproducers/crash-sds-component-id-use-after-free @@ -0,0 +1 @@ + diff --git a/fuzz/reproducers/crash-sds-component-missing-id-strcmp b/fuzz/reproducers/crash-sds-component-missing-id-strcmp new file mode 100644 index 0000000000..0013aed775 --- /dev/null +++ b/fuzz/reproducers/crash-sds-component-missing-id-strcmp @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/crash-sds-index-checklist-null-strcmp b/fuzz/reproducers/crash-sds-index-checklist-null-strcmp new file mode 100644 index 0000000000..6681f04185 --- /dev/null +++ b/fuzz/reproducers/crash-sds-index-checklist-null-strcmp @@ -0,0 +1,6 @@ + + + + + + diff --git a/fuzz/reproducers/crash-sds-index-select-null-index b/fuzz/reproducers/crash-sds-index-select-null-index new file mode 100644 index 0000000000..557d22a5cd --- /dev/null +++ b/fuzz/reproducers/crash-sds-index-select-null-index @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/crash-tailoring-malformed-profile-null b/fuzz/reproducers/crash-tailoring-malformed-profile-null new file mode 100644 index 0000000000..801848e209 --- /dev/null +++ b/fuzz/reproducers/crash-tailoring-malformed-profile-null @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/crash-xccdf-null-testresult-add b/fuzz/reproducers/crash-xccdf-null-testresult-add new file mode 100644 index 0000000000..214aacddc6 --- /dev/null +++ b/fuzz/reproducers/crash-xccdf-null-testresult-add @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/crash-xccdf-platform-missing-idref b/fuzz/reproducers/crash-xccdf-platform-missing-idref new file mode 100644 index 0000000000..469871fba2 --- /dev/null +++ b/fuzz/reproducers/crash-xccdf-platform-missing-idref @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/crash-xccdf-policy-cyclic-extends-via-creation b/fuzz/reproducers/crash-xccdf-policy-cyclic-extends-via-creation new file mode 100644 index 0000000000..2a192dd986 --- /dev/null +++ b/fuzz/reproducers/crash-xccdf-policy-cyclic-extends-via-creation @@ -0,0 +1 @@ +tt diff --git a/fuzz/reproducers/crash-xccdf-policy-cyclic-profile-extends b/fuzz/reproducers/crash-xccdf-policy-cyclic-profile-extends new file mode 100644 index 0000000000..97e5c69423 --- /dev/null +++ b/fuzz/reproducers/crash-xccdf-policy-cyclic-profile-extends @@ -0,0 +1 @@ + diff --git a/fuzz/reproducers/crash-xccdf-resolve-warning-list-oob b/fuzz/reproducers/crash-xccdf-resolve-warning-list-oob new file mode 100644 index 0000000000..26827da51d --- /dev/null +++ b/fuzz/reproducers/crash-xccdf-resolve-warning-list-oob @@ -0,0 +1 @@ +ttw diff --git a/fuzz/reproducers/hang-cpe-generator-parse-eof b/fuzz/reproducers/hang-cpe-generator-parse-eof new file mode 100644 index 0000000000..a283e8452f --- /dev/null +++ b/fuzz/reproducers/hang-cpe-generator-parse-eof @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/hang-cpe-item-parse-eof b/fuzz/reproducers/hang-cpe-item-parse-eof new file mode 100644 index 0000000000..a283e8452f --- /dev/null +++ b/fuzz/reproducers/hang-cpe-item-parse-eof @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/hang-cpe-platform-parse-eof b/fuzz/reproducers/hang-cpe-platform-parse-eof new file mode 100644 index 0000000000..02b4edf3ba --- /dev/null +++ b/fuzz/reproducers/hang-cpe-platform-parse-eof @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/hang-cpe-testexpr-invalid-operator b/fuzz/reproducers/hang-cpe-testexpr-invalid-operator new file mode 100644 index 0000000000..73346189fc --- /dev/null +++ b/fuzz/reproducers/hang-cpe-testexpr-invalid-operator @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/hang-rds-index-parse-nonadvancing b/fuzz/reproducers/hang-rds-index-parse-nonadvancing new file mode 100644 index 0000000000..953efc6f90 --- /dev/null +++ b/fuzz/reproducers/hang-rds-index-parse-nonadvancing @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/fuzz/reproducers/hang-xccdf-profile-remarks-nonremark-child b/fuzz/reproducers/hang-xccdf-profile-remarks-nonremark-child new file mode 100644 index 0000000000..4c5f24a5fb --- /dev/null +++ b/fuzz/reproducers/hang-xccdf-profile-remarks-nonremark-child @@ -0,0 +1 @@ + diff --git a/fuzz/run-all.sh b/fuzz/run-all.sh new file mode 100755 index 0000000000..0016059915 --- /dev/null +++ b/fuzz/run-all.sh @@ -0,0 +1,137 @@ +#!/usr/bin/env bash +# +# Drive all OpenSCAP libFuzzer harnesses in parallel for an extended run. +# +# Each harness fuzzes its own corpus, writes crash/oom/timeout artifacts under +# findings//, and logs to logs/.log. New interesting inputs +# are added back into the harness's corpus directory so progress carries over +# between runs. +# +# Usage: +# fuzz/run-all.sh [duration_seconds] +# +# Environment overrides: +# BUILD build directory containing fuzz/ (default: ./build) +# SCHEMAS OSCAP_SCHEMA_PATH for validate_fuzzer (default: ./schemas) +# FORK libFuzzer -fork child processes per harness (default: 1) +# MAXLEN -max_len (default: 65536) +# RSS -rss_limit_mb (default: 4096) +# UNITTMO -timeout (per-input, seconds) (default: 25) +# +# Runs in -fork mode so a crash/OOM/timeout in one input does not stop the run: +# libFuzzer recycles the child, records the artifact, and keeps fuzzing. This +# also bounds memory (children are restarted), which matters for validate_fuzzer +# whose libxml2/libxslt caches grow across inputs. +# +# Examples: +# fuzz/run-all.sh 3600 # one hour, one process each +# JOBS=4 fuzz/run-all.sh 28800 # 8h, 4 workers per harness +# +set -u + +# Resolve repo root from this script's location so it works from anywhere. +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)" + +DURATION="${1:-3600}" +BUILD="${BUILD:-${ROOT}/build}" +SCHEMAS="${SCHEMAS:-${ROOT}/schemas}" +FORK="${FORK:-1}" +MAXLEN="${MAXLEN:-65536}" +RSS="${RSS:-4096}" +UNITTMO="${UNITTMO:-25}" + +FUZZ_DIR="${ROOT}/fuzz" +OUT="${FUZZ_DIR}/findings" +LOGS="${FUZZ_DIR}/logs" + +# harness corpus-dir +HARNESSES=( + "scap_parse_fuzzer:corpus" + "xccdf_policy_fuzzer:corpus_xccdf" + "validate_fuzzer:corpus" + "arf_fuzzer:corpus_arf" + "xccdf_tailoring_fuzzer:corpus_tailoring" +) + +# Sanitizer runtime options shared by every harness: +# - detect_leaks=0: the parsers intentionally abandon allocations on rejected +# input; leak reports would drown out memory-safety crashes. +# - halt_on_error=0: the OVAL code has many benign function-pointer-cast UBSan +# reports that would otherwise abort the run. +export ASAN_OPTIONS="detect_leaks=0:abort_on_error=1:${ASAN_OPTIONS:-}" +export UBSAN_OPTIONS="halt_on_error=0:print_stacktrace=1:${UBSAN_OPTIONS:-}" + +PIDS=() +cleanup() { echo; echo "[run-all] stopping…"; kill "${PIDS[@]}" 2>/dev/null; } +trap cleanup INT TERM + +echo "[run-all] duration=${DURATION}s fork/harness=${FORK} build=${BUILD}" +mkdir -p "${LOGS}" + +for entry in "${HARNESSES[@]}"; do + name="${entry%%:*}" + corpus="${FUZZ_DIR}/${entry##*:}" + bin="${BUILD}/fuzz/${name}" + + if [[ ! -x "${bin}" ]]; then + echo "[run-all] SKIP ${name}: not built (run: cmake --build '${BUILD}' --target fuzzers)" + continue + fi + mkdir -p "${OUT}/${name}" "${corpus}" + + # validate_fuzzer needs the bundled schemas to reach the deep schema code. + schema_env=() + [[ "${name}" == "validate_fuzzer" ]] && schema_env=(env "OSCAP_SCHEMA_PATH=${SCHEMAS}") + + # -fork mode writes its own fuzz-.log files into the cwd, so give each + # harness its own working directory under logs/. + workdir="${LOGS}/${name}.work" + mkdir -p "${workdir}" + + echo "[run-all] launch ${name} (corpus: ${entry##*:})" + ( + cd "${workdir}" || exit 1 + "${schema_env[@]}" "${bin}" \ + -fork="${FORK}" \ + -ignore_crashes=1 -ignore_ooms=1 -ignore_timeouts=1 \ + -max_total_time="${DURATION}" \ + -max_len="${MAXLEN}" \ + -rss_limit_mb="${RSS}" \ + -timeout="${UNITTMO}" \ + -print_final_stats=1 \ + -artifact_prefix="${OUT}/${name}/" \ + "${corpus}" + ) > "${LOGS}/${name}.log" 2>&1 & + PIDS+=("$!") +done + +if [[ ${#PIDS[@]} -eq 0 ]]; then + echo "[run-all] nothing to run." + exit 1 +fi + +echo "[run-all] ${#PIDS[@]} harness(es) running; logs in ${LOGS}/" +wait "${PIDS[@]}" +trap - INT TERM + +echo +echo "[run-all] ===== summary =====" +total=0 +for entry in "${HARNESSES[@]}"; do + name="${entry%%:*}" + # crash-/oom-/leak-/timeout- artifacts indicate findings; ignore corpus units. + mapfile -t finds < <(find "${OUT}/${name}" -maxdepth 1 -type f \ + \( -name 'crash-*' -o -name 'oom-*' -o -name 'leak-*' -o -name 'timeout-*' \) 2>/dev/null) + n=${#finds[@]} + total=$((total + n)) + if [[ ${n} -gt 0 ]]; then + echo " ${name}: ${n} finding(s)" + for f in "${finds[@]}"; do echo " ${f}"; done + else + echo " ${name}: clean" + fi +done +echo "[run-all] total findings: ${total}" +echo "[run-all] reproduce with: (validate_fuzzer needs OSCAP_SCHEMA_PATH=${SCHEMAS})" +exit 0 diff --git a/fuzz/scap_parse_fuzzer.c b/fuzz/scap_parse_fuzzer.c new file mode 100644 index 0000000000..5b390530d2 --- /dev/null +++ b/fuzz/scap_parse_fuzzer.c @@ -0,0 +1,187 @@ +/* + * libFuzzer harness for OpenSCAP SCAP file parsing. + * + * This harness feeds arbitrary bytes into the OpenSCAP parsing pipeline the + * same way an application would when it loads a SCAP file from disk: + * + * 1. wrap the bytes in an oscap_source (the library's single entry point for + * "here is a SCAP document"), + * 2. let the library sniff the document type, and + * 3. dispatch to the matching importer (data stream, XCCDF, OVAL, CPE, ...). + * + * Every importer ultimately drives the XML reader and the type-specific + * deserialization code, which is where parser crashes / segfaults live. + * + * Build it with the project's ENABLE_FUZZING CMake option (see fuzz/README.md), + * which compiles the whole library with the libFuzzer + AddressSanitizer + * instrumentation and links this file with -fsanitize=fuzzer. + */ + +#include +#include + +#include "fuzz_common.h" +#include "oscap_source.h" +#include "scap_ds.h" +#include "ds_sds_session.h" +#include "ds_rds_session.h" +#include "xccdf_benchmark.h" +#include "cpe_dict.h" +#include "cpe_lang.h" +#include "oval_definitions.h" +#include "oval_variables.h" +#include "oval_system_characteristics.h" +#include "oval_results.h" +#include "oval_directives.h" + +/* Exercise the source data stream / result data stream code paths. */ +static void fuzz_datastream(struct oscap_source *source) +{ + struct ds_sds_session *session = ds_sds_session_new_from_source(source); + if (session != NULL) { + /* NULL ids -> let the session guess; this walks the index, + * the catalogue and extracts/parses the selected components. */ + ds_sds_session_select_checklist(session, NULL, NULL, NULL); + ds_sds_session_free(session); + } +} + +static void fuzz_arf(struct oscap_source *source) +{ + struct ds_rds_session *session = ds_rds_session_new_from_source(source); + if (session != NULL) { + ds_rds_session_get_rds_idx(session); + ds_rds_session_free(session); + } +} + +static void fuzz_xccdf(struct oscap_source *source) +{ + struct xccdf_benchmark *benchmark = xccdf_benchmark_import_source(source); + if (benchmark != NULL) { + xccdf_benchmark_free(benchmark); + } +} + +static void fuzz_oval_definitions(struct oscap_source *source) +{ + struct oval_definition_model *model = oval_definition_model_import_source(source); + if (model != NULL) { + oval_definition_model_free(model); + } +} + +static void fuzz_oval_variables(struct oscap_source *source) +{ + struct oval_variable_model *model = oval_variable_model_import_source(source); + if (model != NULL) { + oval_variable_model_free(model); + } +} + +static void fuzz_oval_syschar(struct oscap_source *source) +{ + struct oval_definition_model *defs = oval_definition_model_new(); + struct oval_syschar_model *model = oval_syschar_model_new(defs); + if (model != NULL) { + oval_syschar_model_import_source(model, source); + oval_syschar_model_free(model); + } + oval_definition_model_free(defs); +} + +static void fuzz_oval_results(struct oscap_source *source) +{ + struct oval_definition_model *defs = oval_definition_model_new(); + struct oval_results_model *model = oval_results_model_new(defs, NULL); + if (model != NULL) { + oval_results_model_import_source(model, source); + oval_results_model_free(model); + } + oval_definition_model_free(defs); +} + +static void fuzz_oval_directives(struct oscap_source *source) +{ + struct oval_directives_model *model = oval_directives_model_new(); + if (model != NULL) { + oval_directives_model_import_source(model, source); + oval_directives_model_free(model); + } +} + +static void fuzz_cpe_dict(struct oscap_source *source) +{ + struct cpe_dict_model *model = cpe_dict_model_import_source(source); + if (model != NULL) { + cpe_dict_model_free(model); + } +} + +static void fuzz_cpe_lang(struct oscap_source *source) +{ + struct cpe_lang_model *model = cpe_lang_model_import_source(source); + if (model != NULL) { + cpe_lang_model_free(model); + } +} + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + FUZZ_INIT(); + + /* oscap_source copies the buffer, so the const input is never mutated. */ + struct oscap_source *source = + oscap_source_new_from_memory((const char *)data, size, "fuzz.xml"); + if (source == NULL) { + return 0; + } + + /* Determining the type already parses the document far enough to read the + * root element and namespaces, so this alone exercises a lot of code. */ + oscap_document_type_t type = oscap_source_get_scap_type(source); + + switch (type) { + case OSCAP_DOCUMENT_SDS: + fuzz_datastream(source); + break; + case OSCAP_DOCUMENT_ARF: + fuzz_arf(source); + break; + case OSCAP_DOCUMENT_XCCDF: + case OSCAP_DOCUMENT_XCCDF_TAILORING: + fuzz_xccdf(source); + break; + case OSCAP_DOCUMENT_OVAL_DEFINITIONS: + fuzz_oval_definitions(source); + break; + case OSCAP_DOCUMENT_OVAL_VARIABLES: + fuzz_oval_variables(source); + break; + case OSCAP_DOCUMENT_OVAL_SYSCHAR: + fuzz_oval_syschar(source); + break; + case OSCAP_DOCUMENT_OVAL_RESULTS: + fuzz_oval_results(source); + break; + case OSCAP_DOCUMENT_OVAL_DIRECTIVES: + fuzz_oval_directives(source); + break; + case OSCAP_DOCUMENT_CPE_DICTIONARY: + fuzz_cpe_dict(source); + break; + case OSCAP_DOCUMENT_CPE_LANGUAGE: + fuzz_cpe_lang(source); + break; + case OSCAP_DOCUMENT_UNKNOWN: + default: + /* Unknown type: still try the data stream and XCCDF importers, which + * are the most complex parsers and do their own validation. This keeps + * coverage high even when type detection bails out early. */ + fuzz_datastream(source); + break; + } + + oscap_source_free(source); + return 0; +} diff --git a/fuzz/validate_fuzzer.c b/fuzz/validate_fuzzer.c new file mode 100644 index 0000000000..560ecd0039 --- /dev/null +++ b/fuzz/validate_fuzzer.c @@ -0,0 +1,44 @@ +/* + * libFuzzer harness for SCAP document *validation* (src/source/schematron.c, + * src/source/xslt.c and the libxml2 XSD validation path). + * + * `oscap ds sds-validate` and friends are a very common entry point and a + * distinct chunk of code from the object-model importers: XSD schema validation + * and Schematron (implemented as an XSLT transform). Both are driven here. + * + * Validation needs the bundled XML schemas. At runtime point the harness at + * them with the OSCAP_SCHEMA_PATH environment variable, e.g. + * + * OSCAP_SCHEMA_PATH=/schemas ./validate_fuzzer corpus + * + * Without it, schema lookups simply fail early (still exercises the dispatch / + * type-detection and error handling, just not the deep schema code). + */ + +#include +#include + +#include "fuzz_common.h" +#include "oscap_source.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + FUZZ_INIT(); + + struct oscap_source *source = + oscap_source_new_from_memory((const char *)data, size, "fuzz-validate.xml"); + if (source == NULL) { + return 0; + } + + // Determining the type selects which schema(s) the validator will use. + if (oscap_source_get_scap_type(source) != OSCAP_DOCUMENT_UNKNOWN) { + // XSD validation: parses the document and walks the schema grammar. + oscap_source_validate(source, NULL, NULL); + // Schematron validation: compiles and applies the Schematron XSLT. + oscap_source_validate_schematron(source); + } + + oscap_source_free(source); + return 0; +} diff --git a/fuzz/xccdf_policy_fuzzer.c b/fuzz/xccdf_policy_fuzzer.c new file mode 100644 index 0000000000..ecabaeaaae --- /dev/null +++ b/fuzz/xccdf_policy_fuzzer.c @@ -0,0 +1,65 @@ +/* + * libFuzzer harness for the XCCDF *policy* layer (src/XCCDF_POLICY). + * + * The base scap_parse_fuzzer only parses an XCCDF benchmark into its object + * model. This harness goes one step further and drives the policy model, which + * is the code that resolves profiles, applies selectors, binds values and + * performs text substitution. None of that is reached by plain parsing, yet it + * runs purely on parsed content (no OVAL probes / no system access), so it is a + * good fuzzing target. + * + * Pipeline: + * xccdf_benchmark_import_source() parse the benchmark + * xccdf_policy_model_new() build a policy model (takes + * ownership of the benchmark) + * xccdf_policy_model_build_all_useful_policies() + * instantiate a policy per profile, + * resolving selectors & refinements + * xccdf_policy_resolve() for each policy resolve the selected items + */ + +#include +#include + +#include "fuzz_common.h" +#include "oscap_source.h" +#include "xccdf_benchmark.h" +#include "xccdf_policy.h" + +int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + FUZZ_INIT(); + + struct oscap_source *source = + oscap_source_new_from_memory((const char *)data, size, "fuzz-xccdf.xml"); + if (source == NULL) { + return 0; + } + + struct xccdf_benchmark *benchmark = xccdf_benchmark_import_source(source); + if (benchmark == NULL) { + oscap_source_free(source); + return 0; + } + + // xccdf_policy_model_new takes ownership of the benchmark and frees it in + // xccdf_policy_model_free, so we must not free the benchmark separately. + struct xccdf_policy_model *model = xccdf_policy_model_new(benchmark); + if (model != NULL) { + xccdf_policy_model_build_all_useful_policies(model); + + struct xccdf_policy_iterator *it = xccdf_policy_model_get_policies(model); + while (xccdf_policy_iterator_has_more(it)) { + struct xccdf_policy *policy = xccdf_policy_iterator_next(it); + xccdf_policy_resolve(policy); + } + xccdf_policy_iterator_free(it); + + xccdf_policy_model_free(model); + } else { + xccdf_benchmark_free(benchmark); + } + + oscap_source_free(source); + return 0; +} diff --git a/fuzz/xccdf_tailoring_fuzzer.c b/fuzz/xccdf_tailoring_fuzzer.c new file mode 100644 index 0000000000..686c3bb152 --- /dev/null +++ b/fuzz/xccdf_tailoring_fuzzer.c @@ -0,0 +1,80 @@ +/* + * libFuzzer harness for XCCDF tailoring parsing (src/XCCDF/tailoring.c). + * + * A tailoring file customizes an existing benchmark (overriding profiles, + * selecting/deselecting rules, refining values). It is parsed *against* a + * benchmark, so this harness imports one small fixed benchmark at startup and + * then feeds every fuzzer input to xccdf_tailoring_import_source() as the + * tailoring document. This reaches the tailoring parser and its profile / + * selector / value-refinement handling, which plain benchmark parsing skips. + */ + +#include +#include + +#include "fuzz_common.h" +#include "oscap_source.h" +#include "xccdf_benchmark.h" + +// A minimal but valid XCCDF 1.2 benchmark with a profile, a value and a rule so +// that tailoring documents have something to extend / select / refine. +static const char BASE_BENCHMARK[] = + "\n" + "\n" + " draft\n" + " 1.0\n" + " \n" + " base profile\n" + "