From 8c84e6802c0e23503bfe655dadcdc4a15de7373a Mon Sep 17 00:00:00 2001 From: Kristofer Karlsson Date: Thu, 28 May 2026 09:00:48 +0000 Subject: [PATCH 01/30] t3070: skip ls-files tests with backslash patterns on Windows MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On Windows (MINGW), backslashes in pathspecs are silently converted to forward slashes (directory separators), which changes the glob semantics. This causes 36 test failures in t3070-wildmatch when the "via ls-files" variants test patterns containing backslash escapes (e.g. '\[ab]', '[\-_]', '[A-\\]'). The wildmatch function itself handles these patterns correctly — only the ls-files code path fails because pathspec parsing converts the backslashes before they reach the glob matcher. Skip these ls-files tests on platforms where BSLASHPSPEC is not set, which is the existing prereq that captures exactly this semantic: "backslashes in pathspec are not directory separators." Signed-off-by: Kristofer Karlsson Signed-off-by: Junio C Hamano --- t/t3070-wildmatch.sh | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/t/t3070-wildmatch.sh b/t/t3070-wildmatch.sh index 655bb1a0f21031..33941222189663 100755 --- a/t/t3070-wildmatch.sh +++ b/t/t3070-wildmatch.sh @@ -99,6 +99,13 @@ match_with_ls_files() { match_function=$4 ls_files_args=$5 + prereqs=EXPENSIVE_ON_WINDOWS + case "$pattern" in + *\\*) + prereqs="$prereqs,BSLASHPSPEC" + ;; + esac + match_stdout_stderr_cmp=" tr -d '\0' actual && test_must_be_empty actual.err && @@ -108,36 +115,36 @@ match_with_ls_files() { then if test -e .git/created_test_file then - test_expect_success EXPENSIVE_ON_WINDOWS "$match_function (via ls-files): match dies on '$pattern' '$text'" " + test_expect_success $prereqs "$match_function (via ls-files): match dies on '$pattern' '$text'" " printf '%s' '$text' >expect && test_must_fail git$ls_files_args ls-files -z -- '$pattern' " else - test_expect_failure EXPENSIVE_ON_WINDOWS "$match_function (via ls-files): match skip '$pattern' '$text'" 'false' + test_expect_failure $prereqs "$match_function (via ls-files): match skip '$pattern' '$text'" 'false' fi elif test "$match_expect" = 1 then if test -e .git/created_test_file then - test_expect_success EXPENSIVE_ON_WINDOWS "$match_function (via ls-files): match '$pattern' '$text'" " + test_expect_success $prereqs "$match_function (via ls-files): match '$pattern' '$text'" " printf '%s' '$text' >expect && git$ls_files_args ls-files -z -- '$pattern' >actual.raw 2>actual.err && $match_stdout_stderr_cmp " else - test_expect_failure EXPENSIVE_ON_WINDOWS "$match_function (via ls-files): match skip '$pattern' '$text'" 'false' + test_expect_failure $prereqs "$match_function (via ls-files): match skip '$pattern' '$text'" 'false' fi elif test "$match_expect" = 0 then if test -e .git/created_test_file then - test_expect_success EXPENSIVE_ON_WINDOWS "$match_function (via ls-files): no match '$pattern' '$text'" " + test_expect_success $prereqs "$match_function (via ls-files): no match '$pattern' '$text'" " >expect && git$ls_files_args ls-files -z -- '$pattern' >actual.raw 2>actual.err && $match_stdout_stderr_cmp " else - test_expect_failure EXPENSIVE_ON_WINDOWS "$match_function (via ls-files): no match skip '$pattern' '$text'" 'false' + test_expect_failure $prereqs "$match_function (via ls-files): no match skip '$pattern' '$text'" 'false' fi else test_expect_success "PANIC: Test framework error. Unknown matches value $match_expect" 'false' From 1ec041bebb46159562c4beeb2e6980284e0f9a28 Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Thu, 28 May 2026 19:21:45 +0000 Subject: [PATCH 02/30] doc: clarify that --word-diff operates on line-level hunks The --word-diff documentation describes the output modes and word-regex mechanics but does not explain that word-diff operates within the hunks produced by the line-level diff rather than performing an independent word-stream comparison. This can surprise users when the line-level alignment causes word-level changes to appear even though the words in both files are identical. Add an implementation note explaining the two-stage relationship and that the output may change if Git acquires a different implementation in the future. Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- Documentation/diff-options.adoc | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Documentation/diff-options.adoc b/Documentation/diff-options.adoc index 9cdad6f72a0c7d..88b724b8c6dba4 100644 --- a/Documentation/diff-options.adoc +++ b/Documentation/diff-options.adoc @@ -455,6 +455,14 @@ endif::git-diff[] + Note that despite the name of the first mode, color is used to highlight the changed parts in all modes if enabled. ++ +The `--word-diff` option operates by taking the same line-by-line +diff that is produced without the option and computing +word-by-word changes within each hunk. This may produce a +larger diff than a dedicated word-diff tool would. If Git +acquires a different implementation in the future, the output +may change. Note that this is similar to the `--diff-algorithm` +option, which may also change the output. `--word-diff-regex=`:: Use __ to decide what a word is, instead of considering From 558057cf4f43ea3b28c5e0b1b2250cab362f1a6a Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Thu, 28 May 2026 20:47:44 +0000 Subject: [PATCH 03/30] revision: move -L setup before output_format-to-diff derivation The line_level_traverse block sets a default DIFF_FORMAT_PATCH when no output format has been explicitly requested. This default must be visible to the "Did the user ask for any diff output?" check that derives revs->diff from revs->diffopt.output_format. Currently the -L block runs after that derivation, so revs->diff stays 0 when no explicit format is given. This does not matter yet because log_tree_commit() short-circuits into line_log_print() before consulting revs->diff, but the next commit will route -L through the normal log_tree_diff() path, which checks revs->diff. Move the block above the derivation so the default DIFF_FORMAT_PATCH is in place when revs->diff is computed. No behavior change on its own. Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- revision.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/revision.c b/revision.c index 599b3a66c369ca..4a8e24bc38d572 100644 --- a/revision.c +++ b/revision.c @@ -3112,6 +3112,14 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s object_context_release(&oc); } + if (revs->line_level_traverse) { + if (want_ancestry(revs)) + revs->limited = 1; + revs->topo_order = 1; + if (!revs->diffopt.output_format) + revs->diffopt.output_format = DIFF_FORMAT_PATCH; + } + /* Did the user ask for any diff output? Run the diff! */ if (revs->diffopt.output_format & ~DIFF_FORMAT_NO_OUTPUT) revs->diff = 1; @@ -3125,14 +3133,6 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s if (revs->diffopt.objfind) revs->simplify_history = 0; - if (revs->line_level_traverse) { - if (want_ancestry(revs)) - revs->limited = 1; - revs->topo_order = 1; - if (!revs->diffopt.output_format) - revs->diffopt.output_format = DIFF_FORMAT_PATCH; - } - if (revs->topo_order && !generation_numbers_enabled(the_repository)) revs->limited = 1; From 42d960748efa79a31e72cc36d983aca244dc167e Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Thu, 28 May 2026 20:47:45 +0000 Subject: [PATCH 04/30] line-log: integrate -L output with the standard log-tree pipeline `git log -L` has bypassed log_tree_diff() and log_tree_diff_flush() since the feature was introduced, short-circuiting from log_tree_commit() directly into line_log_print(). This skips the no_free save/restore (noted in a NEEDSWORK comment added by f8781bfda3), the always_show_header fallback, show_diff_of_diff(), and diff_free() cleanup. Restructure so that -L flows through log_tree_diff() -> log_tree_diff_flush(), the same path used by the normal single-parent and merge diff codepaths: - Rename line_log_print() to line_log_queue_pairs() and strip it down to just queuing pre-computed filepairs. The show_log(), separator, diffcore_std(), and diff_flush() calls are removed since log_tree_diff_flush() handles all of those. - In log_tree_diff(), call line_log_queue_pairs() then log_tree_diff_flush(), mirroring the diff_tree_oid() + flush pattern used by the single-parent and merge codepaths. - Remove the early return in log_tree_commit() that is no longer needed now that -L output flows through log_tree_diff() and log_tree_diff_flush(); this restores no_free save/restore, always_show_header, and diff_free() cleanup. Because show_log() is now deferred until after diffcore_std() inside log_tree_diff_flush(), pickaxe (-S, -G, --find-object) and --diff-filter now properly suppress commits when all pairs are filtered out. The blank-line separator between commit header and diff changes slightly: the old code printed one unconditionally, while log_tree_diff_flush() only emits one for verbose headers. This matches the rest of log output. Also reject --full-diff, which is not yet supported with -L: the filepairs are pre-computed during the history walk and scoped to tracked line ranges, so there is currently no full-tree diff to fall back to for display. Update tests accordingly. Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- line-log.c | 30 ++++------- line-log.h | 2 +- log-tree.c | 10 ++-- revision.c | 6 ++- t/t4211-line-log.sh | 53 ++++++++++++++----- t/t4211/sha1/expect.parallel-change-f-to-main | 1 - .../sha256/expect.parallel-change-f-to-main | 1 - 7 files changed, 60 insertions(+), 43 deletions(-) diff --git a/line-log.c b/line-log.c index 858a899cd2a61d..7ee55b05cc5077 100644 --- a/line-log.c +++ b/line-log.c @@ -13,7 +13,6 @@ #include "revision.h" #include "xdiff-interface.h" #include "strbuf.h" -#include "log-tree.h" #include "line-log.h" #include "setup.h" #include "strvec.h" @@ -1004,29 +1003,18 @@ static int process_all_files(struct line_log_data **range_out, return changed; } -int line_log_print(struct rev_info *rev, struct commit *commit) +void line_log_queue_pairs(struct rev_info *rev, struct commit *commit) { - show_log(rev); - if (!(rev->diffopt.output_format & DIFF_FORMAT_NO_OUTPUT)) { - struct line_log_data *range = lookup_line_range(rev, commit); - struct line_log_data *r; - const char *prefix = diff_line_prefix(&rev->diffopt); - - fprintf(rev->diffopt.file, "%s\n", prefix); - - for (r = range; r; r = r->next) { - if (r->pair) { - struct diff_filepair *p = - diff_filepair_dup(r->pair); - p->line_ranges = &r->ranges; - diff_q(&diff_queued_diff, p); - } - } + struct line_log_data *range = lookup_line_range(rev, commit); + struct line_log_data *r; - diffcore_std(&rev->diffopt); - diff_flush(&rev->diffopt); + for (r = range; r; r = r->next) { + if (r->pair) { + struct diff_filepair *p = diff_filepair_dup(r->pair); + p->line_ranges = &r->ranges; + diff_q(&diff_queued_diff, p); + } } - return 1; } static int bloom_filter_check(struct rev_info *rev, diff --git a/line-log.h b/line-log.h index 04a6ea64d3d68f..99e1755ce3d568 100644 --- a/line-log.h +++ b/line-log.h @@ -46,7 +46,7 @@ int line_log_filter(struct rev_info *rev); int line_log_process_ranges_arbitrary_commit(struct rev_info *rev, struct commit *commit); -int line_log_print(struct rev_info *rev, struct commit *commit); +void line_log_queue_pairs(struct rev_info *rev, struct commit *commit); void line_log_free(struct rev_info *rev); diff --git a/log-tree.c b/log-tree.c index 7e048701d0c5b4..88b3019293b725 100644 --- a/log-tree.c +++ b/log-tree.c @@ -1105,6 +1105,12 @@ static int log_tree_diff(struct rev_info *opt, struct commit *commit, struct log if (!all_need_diff && !opt->merges_need_diff) return 0; + if (opt->line_level_traverse) { + line_log_queue_pairs(opt, commit); + log_tree_diff_flush(opt); + return !opt->loginfo; + } + parse_commit_or_die(commit); oid = get_commit_tree_oid(commit); @@ -1179,10 +1185,6 @@ int log_tree_commit(struct rev_info *opt, struct commit *commit) opt->loginfo = &log; opt->diffopt.no_free = 1; - /* NEEDSWORK: no restoring of no_free? Why? */ - if (opt->line_level_traverse) - return line_log_print(opt, commit); - if (opt->track_linear && !opt->linear && !opt->reverse_output_stage) fprintf(opt->diffopt.file, "\n%s\n", opt->break_bar); shown = log_tree_diff(opt, commit, &log); diff --git a/revision.c b/revision.c index 4a8e24bc38d572..c903f7a1b4c4c8 100644 --- a/revision.c +++ b/revision.c @@ -3179,8 +3179,10 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s die(_("the option '%s' requires '%s'"), "--grep-reflog", "--walk-reflogs"); if (revs->line_level_traverse && - (revs->diffopt.output_format & ~(DIFF_FORMAT_PATCH | DIFF_FORMAT_NO_OUTPUT))) - die(_("-L does not yet support diff formats besides -p and -s")); + (revs->full_diff || + (revs->diffopt.output_format & + ~(DIFF_FORMAT_PATCH | DIFF_FORMAT_NO_OUTPUT)))) + die(_("-L does not yet support the requested diff format")); if (revs->expand_tabs_in_log < 0) revs->expand_tabs_in_log = revs->expand_tabs_in_log_default; diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh index aaf197d2edc4d8..e3937138a94055 100755 --- a/t/t4211-line-log.sh +++ b/t/t4211-line-log.sh @@ -368,7 +368,6 @@ test_expect_success '-L diff output includes index and new file mode' ' test_expect_success '-L with --word-diff' ' cat >expect <<-\EOF && - diff --git a/file.c b/file.c --- a/file.c +++ b/file.c @@ -377,7 +376,6 @@ test_expect_success '-L with --word-diff' ' { return [-F2;-]{+F2 + 2;+} } - diff --git a/file.c b/file.c new file mode 100644 --- /dev/null @@ -433,7 +431,6 @@ test_expect_success 'show line-log with graph' ' null_blob=$(test_oid zero | cut -c1-7) && qz_to_tab_space >expect <<-EOF && * $head_oid Modify func2() in file.c - |Z | diff --git a/file.c b/file.c | index $head_blob_old..$head_blob_new 100644 | --- a/file.c @@ -445,7 +442,6 @@ test_expect_success 'show line-log with graph' ' | + return F2 + 2; | } * $root_oid Add func1() and func2() in file.c - ZZ diff --git a/file.c b/file.c new file mode 100644 index $null_blob..$root_blob @@ -494,23 +490,17 @@ test_expect_success '-L --find-object does not crash with merge and rename' ' --find-object=$(git rev-parse HEAD:file) >actual ' -# Commit-level filtering with pickaxe does not yet work for -L. -# show_log() prints the commit header before diffcore_std() runs -# pickaxe, so commits cannot be suppressed even when no diff pairs -# survive filtering. Fixing this would require deferring show_log() -# until after diffcore_std(), which is a larger restructuring of the -# log-tree output pipeline. -test_expect_failure '-L -G should filter commits by pattern' ' +test_expect_success '-L -G should filter commits by pattern' ' git log --format="%s" --no-patch -L 1,1:file -G "nomatch" >actual && test_must_be_empty actual ' -test_expect_failure '-L -S should filter commits by pattern' ' +test_expect_success '-L -S should filter commits by pattern' ' git log --format="%s" --no-patch -L 1,1:file -S "nomatch" >actual && test_must_be_empty actual ' -test_expect_failure '-L --find-object should filter commits by object' ' +test_expect_success '-L --find-object should filter commits by object' ' git log --format="%s" --no-patch -L 1,1:file \ --find-object=$ZERO_OID >actual && test_must_be_empty actual @@ -711,4 +701,41 @@ test_expect_success '-L with -G filters to diff-text matches' ' grep "F2 + 2" actual ' +test_expect_success '-L with --diff-filter=M excludes root commit' ' + git checkout parent-oids && + git log -L:func2:file.c --diff-filter=M --format=%s --no-patch >actual && + # Root commit is an Add (A), not a Modify (M), so it should + # be excluded; only the modification commit remains. + echo "Modify func2() in file.c" >expect && + test_cmp expect actual +' + +test_expect_success '-L with --diff-filter=A shows only root commit' ' + git checkout parent-oids && + git log -L:func2:file.c --diff-filter=A --format=%s --no-patch >actual && + echo "Add func1() and func2() in file.c" >expect && + test_cmp expect actual +' + +test_expect_success '-L with -S suppresses non-matching commits' ' + git checkout parent-oids && + git log -L:func2:file.c -S "F2 + 2" --format=%s --no-patch >actual && + # Only the commit that changes the count of "F2 + 2" should appear. + echo "Modify func2() in file.c" >expect && + test_cmp expect actual +' + +test_expect_success '--full-diff is not yet supported with -L' ' + test_must_fail git log -L1,24:b.c --full-diff 2>err && + test_grep "does not yet support" err +' + +test_expect_success '-L --oneline has no extra blank line before diff' ' + git checkout parent-oids && + git log --oneline -L:func2:file.c -1 >actual && + # Oneline header on line 1, diff starts immediately on line 2 + sed -n 2p actual >line2 && + test_grep "^diff --git" line2 +' + test_done diff --git a/t/t4211/sha1/expect.parallel-change-f-to-main b/t/t4211/sha1/expect.parallel-change-f-to-main index 65a8cc673a6fca..6d7a20103631cc 100644 --- a/t/t4211/sha1/expect.parallel-change-f-to-main +++ b/t/t4211/sha1/expect.parallel-change-f-to-main @@ -5,7 +5,6 @@ Date: Fri Apr 12 16:16:24 2013 +0200 Merge across the rename - commit 6ce3c4ff690136099bb17e1a8766b75764726ea7 Author: Thomas Rast Date: Thu Feb 28 10:49:50 2013 +0100 diff --git a/t/t4211/sha256/expect.parallel-change-f-to-main b/t/t4211/sha256/expect.parallel-change-f-to-main index 3178989253a885..c93e03bef40544 100644 --- a/t/t4211/sha256/expect.parallel-change-f-to-main +++ b/t/t4211/sha256/expect.parallel-change-f-to-main @@ -5,7 +5,6 @@ Date: Fri Apr 12 16:16:24 2013 +0200 Merge across the rename - commit 4f7a58195a92c400e28a2354328587f1ff14fb77f5cf894536f17ccbc72931b9 Author: Thomas Rast Date: Thu Feb 28 10:49:50 2013 +0100 From 4b5d8a0163fe4e9a4ac074f407e0599ba27acf68 Mon Sep 17 00:00:00 2001 From: Michael Montalbo Date: Thu, 28 May 2026 20:47:46 +0000 Subject: [PATCH 05/30] line-log: allow non-patch diff formats with -L Now that -L flows through log_tree_diff_flush() and diff_flush(), metadata-only diff formats work because they only read filepair fields (status, mode, path, oid) already set on the pre-computed pairs. Expand the allowlist in setup_revisions() to also accept --raw, --name-only, --name-status, and --summary. Diff stat formats (--stat, --numstat, --shortstat, --dirstat) remain blocked because they call compute_diffstat() on full blob content and would show whole-file statistics rather than range-scoped ones. Signed-off-by: Michael Montalbo Signed-off-by: Junio C Hamano --- Documentation/line-range-options.adoc | 10 +++--- revision.c | 4 ++- t/t4211-line-log.sh | 47 +++++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 7 deletions(-) diff --git a/Documentation/line-range-options.adoc b/Documentation/line-range-options.adoc index ecb2c79fb9bde8..72f639b5e79ea4 100644 --- a/Documentation/line-range-options.adoc +++ b/Documentation/line-range-options.adoc @@ -8,12 +8,14 @@ give zero or one positive revision arguments, and __ and __ (or __) must exist in the starting revision. You can specify this option more than once. Implies `--patch`. - Patch output can be suppressed using `--no-patch`, but other diff formats - (namely `--raw`, `--numstat`, `--shortstat`, `--dirstat`, `--summary`, - `--name-only`, `--name-status`, `--check`) are not currently implemented. + Patch output can be suppressed using `--no-patch`. + Non-patch diff formats `--raw`, `--name-only`, `--name-status`, + and `--summary` are supported. Diff stat formats + (`--stat`, `--numstat`, `--shortstat`, `--dirstat`) are not + currently implemented. + Patch formatting options such as `--word-diff`, `--color-moved`, `--no-prefix`, and whitespace options (`-w`, `-b`) are supported, -as are pickaxe options (`-S`, `-G`). +as are pickaxe options (`-S`, `-G`) and `--diff-filter`. + include::line-range-format.adoc[] diff --git a/revision.c b/revision.c index c903f7a1b4c4c8..f26fc1f4d5e48e 100644 --- a/revision.c +++ b/revision.c @@ -3181,7 +3181,9 @@ int setup_revisions(int argc, const char **argv, struct rev_info *revs, struct s if (revs->line_level_traverse && (revs->full_diff || (revs->diffopt.output_format & - ~(DIFF_FORMAT_PATCH | DIFF_FORMAT_NO_OUTPUT)))) + ~(DIFF_FORMAT_PATCH | DIFF_FORMAT_NO_OUTPUT | + DIFF_FORMAT_RAW | DIFF_FORMAT_NAME | + DIFF_FORMAT_NAME_STATUS | DIFF_FORMAT_SUMMARY)))) die(_("-L does not yet support the requested diff format")); if (revs->expand_tabs_in_log < 0) diff --git a/t/t4211-line-log.sh b/t/t4211-line-log.sh index e3937138a94055..ca4eb7bbc713ef 100755 --- a/t/t4211-line-log.sh +++ b/t/t4211-line-log.sh @@ -155,8 +155,45 @@ test_expect_success '-p shows the default patch output' ' test_cmp expect actual ' -test_expect_success '--raw is forbidden' ' - test_must_fail git log -L1,24:b.c --raw +test_expect_success '--raw shows mode, oid, status and path' ' + git log -L1,24:b.c --raw --format= >actual && + test_grep "^:100644 100644 [0-9a-f]\{7\} [0-9a-f]\{7\} M b.c$" actual && + test_grep ! "^diff --git" actual && + test_grep ! "^@@" actual +' + +test_expect_success '--name-only shows path' ' + git log -L1,24:b.c --name-only --format= >actual && + test_grep "^b.c$" actual && + test_grep ! "^diff --git" actual && + test_grep ! "^@@" actual +' + +test_expect_success '--name-status shows status and path' ' + git log -L1,24:b.c --name-status --format= >actual && + test_grep "^M b.c$" actual && + test_grep ! "^diff --git" actual && + test_grep ! "^@@" actual +' + +test_expect_success '--stat is not yet supported with -L' ' + test_must_fail git log -L1,24:b.c --stat 2>err && + test_grep "does not yet support" err +' + +test_expect_success '--numstat is not yet supported with -L' ' + test_must_fail git log -L1,24:b.c --numstat 2>err && + test_grep "does not yet support" err +' + +test_expect_success '--shortstat is not yet supported with -L' ' + test_must_fail git log -L1,24:b.c --shortstat 2>err && + test_grep "does not yet support" err +' + +test_expect_success '--dirstat is not yet supported with -L' ' + test_must_fail git log -L1,24:b.c --dirstat 2>err && + test_grep "does not yet support" err ' test_expect_success 'setup for checking fancy rename following' ' @@ -738,4 +775,10 @@ test_expect_success '-L --oneline has no extra blank line before diff' ' test_grep "^diff --git" line2 ' +test_expect_success '--summary shows new file on root commit' ' + git checkout parent-oids && + git log -L:func2:file.c --summary --format= >actual && + test_grep "create mode 100644 file.c" actual +' + test_done From b8cda126b4e0fbfd514b26dec4ee8a1c6849abe9 Mon Sep 17 00:00:00 2001 From: Sebastien Tardif Date: Thu, 28 May 2026 02:56:54 +0000 Subject: [PATCH 06/30] daemon: fix IPv6 address corruption in lookup_hostname() getaddrinfo() is called with AF_UNSPEC hints, so it may return IPv6 results. However, the code unconditionally casts ai_addr to sockaddr_in and passes AF_INET to inet_ntop(). On IPv6-only hosts, this reads from the wrong struct offset, producing garbage IP addresses. Fix this by checking ai_family and extracting the address pointer into a local variable before calling inet_ntop() once with the correct family. Die on unexpected address families. Signed-off-by: Sebastien Tardif Signed-off-by: Junio C Hamano --- daemon.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/daemon.c b/daemon.c index 0a7b1aae447912..80fa0226d89f03 100644 --- a/daemon.c +++ b/daemon.c @@ -674,9 +674,20 @@ static void lookup_hostname(struct hostinfo *hi) gai = getaddrinfo(hi->hostname.buf, NULL, &hints, &ai); if (!gai) { - struct sockaddr_in *sin_addr = (void *)ai->ai_addr; + void *addr; + + if (ai->ai_family == AF_INET) { + struct sockaddr_in *sa = (void *)ai->ai_addr; + addr = &sa->sin_addr; + } else if (ai->ai_family == AF_INET6) { + struct sockaddr_in6 *sa6 = (void *)ai->ai_addr; + addr = &sa6->sin6_addr; + } else { + die("unexpected address family: %d", + ai->ai_family); + } - inet_ntop(AF_INET, &sin_addr->sin_addr, + inet_ntop(ai->ai_family, addr, addrbuf, sizeof(addrbuf)); strbuf_addstr(&hi->ip_address, addrbuf); From 30c8fda1ab6d55d3b0129bb1686c23bf06cd5b0d Mon Sep 17 00:00:00 2001 From: Sebastien Tardif Date: Thu, 28 May 2026 02:56:55 +0000 Subject: [PATCH 07/30] daemon: fix IPv6 address truncation in ip2str() The sockaddr struct size (ai_addrlen) is passed as the output buffer size to inet_ntop(). For IPv6, sizeof(sockaddr_in6) is 28 bytes but INET6_ADDRSTRLEN is 46, so long IPv6 addresses are silently truncated. Fix this by passing sizeof(ip) instead, which is the actual size of the destination buffer. Drop the now-unused len parameter from ip2str() and update all callers. Signed-off-by: Sebastien Tardif Signed-off-by: Junio C Hamano --- daemon.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/daemon.c b/daemon.c index 80fa0226d89f03..103c08d868d5de 100644 --- a/daemon.c +++ b/daemon.c @@ -947,7 +947,7 @@ struct socketlist { size_t alloc; }; -static const char *ip2str(int family, struct sockaddr *sin, socklen_t len) +static const char *ip2str(int family, struct sockaddr *sin) { #ifdef NO_IPV6 static char ip[INET_ADDRSTRLEN]; @@ -958,11 +958,11 @@ static const char *ip2str(int family, struct sockaddr *sin, socklen_t len) switch (family) { #ifndef NO_IPV6 case AF_INET6: - inet_ntop(family, &((struct sockaddr_in6*)sin)->sin6_addr, ip, len); + inet_ntop(family, &((struct sockaddr_in6*)sin)->sin6_addr, ip, sizeof(ip)); break; #endif case AF_INET: - inet_ntop(family, &((struct sockaddr_in*)sin)->sin_addr, ip, len); + inet_ntop(family, &((struct sockaddr_in*)sin)->sin_addr, ip, sizeof(ip)); break; default: xsnprintf(ip, sizeof(ip), ""); @@ -1019,14 +1019,14 @@ static int setup_named_sock(char *listen_addr, int listen_port, struct socketlis if (bind(sockfd, ai->ai_addr, ai->ai_addrlen) < 0) { logerror("Could not bind to %s: %s", - ip2str(ai->ai_family, ai->ai_addr, ai->ai_addrlen), + ip2str(ai->ai_family, ai->ai_addr), strerror(errno)); close(sockfd); continue; /* not fatal */ } if (listen(sockfd, 5) < 0) { logerror("Could not listen to %s: %s", - ip2str(ai->ai_family, ai->ai_addr, ai->ai_addrlen), + ip2str(ai->ai_family, ai->ai_addr), strerror(errno)); close(sockfd); continue; /* not fatal */ @@ -1080,7 +1080,7 @@ static int setup_named_sock(char *listen_addr, int listen_port, struct socketlis if ( bind(sockfd, (struct sockaddr *)&sin, sizeof sin) < 0 ) { logerror("Could not bind to %s: %s", - ip2str(AF_INET, (struct sockaddr *)&sin, sizeof(sin)), + ip2str(AF_INET, (struct sockaddr *)&sin), strerror(errno)); close(sockfd); return 0; @@ -1088,7 +1088,7 @@ static int setup_named_sock(char *listen_addr, int listen_port, struct socketlis if (listen(sockfd, 5) < 0) { logerror("Could not listen to %s: %s", - ip2str(AF_INET, (struct sockaddr *)&sin, sizeof(sin)), + ip2str(AF_INET, (struct sockaddr *)&sin), strerror(errno)); close(sockfd); return 0; From 422a5bf57575a8c5d06faedfd77376501917e22c Mon Sep 17 00:00:00 2001 From: Sebastien Tardif Date: Thu, 28 May 2026 02:56:56 +0000 Subject: [PATCH 08/30] daemon: guard NULL REMOTE_PORT in execute() logging REMOTE_ADDR and REMOTE_PORT are both set by the same code path in handle(), so when the existing REMOTE_ADDR check passes, REMOTE_PORT is guaranteed to be non-NULL. Guard REMOTE_PORT as well so that a future change that breaks this invariant does not pass NULL to printf's %s, which is undefined behavior. Signed-off-by: Sebastien Tardif Signed-off-by: Junio C Hamano --- daemon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daemon.c b/daemon.c index 103c08d868d5de..78cca8673fdb34 100644 --- a/daemon.c +++ b/daemon.c @@ -753,7 +753,7 @@ static int execute(void) struct strvec env = STRVEC_INIT; if (addr) - loginfo("Connection from %s:%s", addr, port); + loginfo("Connection from %s:%s", addr, port ? port : "?"); set_keep_alive(0); alarm(init_timeout ? init_timeout : timeout); From 514f039c9052c23047c310f911ba8c0c2e74a1c7 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:24 +0200 Subject: [PATCH 09/30] odb/source-loose: move loose source into "odb/" subsystem In subsequent patches we'll be turning `struct odb_source_loose` into a proper `struct odb_source`. As a first step towards this goal, move its struct out of "object-file.c" and into "odb/source-loose.c". This detaches the implementation of the loose object source from the generic object file code, following the same convention already used by the "files" and "in-memory" sources. No functional changes are intended. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- Makefile | 1 + meson.build | 1 + object-file.c | 8 -------- object-file.h | 21 +-------------------- odb/source-loose.c | 10 ++++++++++ odb/source-loose.h | 34 ++++++++++++++++++++++++++++++++++ 6 files changed, 47 insertions(+), 28 deletions(-) create mode 100644 odb/source-loose.c create mode 100644 odb/source-loose.h diff --git a/Makefile b/Makefile index a43b8ee0674df8..01356235c3e11d 100644 --- a/Makefile +++ b/Makefile @@ -1217,6 +1217,7 @@ LIB_OBJS += odb.o LIB_OBJS += odb/source.o LIB_OBJS += odb/source-files.o LIB_OBJS += odb/source-inmemory.o +LIB_OBJS += odb/source-loose.o LIB_OBJS += odb/streaming.o LIB_OBJS += odb/transaction.o LIB_OBJS += oid-array.o diff --git a/meson.build b/meson.build index 664d8313295a26..c85e5988351b1f 100644 --- a/meson.build +++ b/meson.build @@ -405,6 +405,7 @@ libgit_sources = [ 'odb/source.c', 'odb/source-files.c', 'odb/source-inmemory.c', + 'odb/source-loose.c', 'odb/streaming.c', 'odb/transaction.c', 'oid-array.c', diff --git a/object-file.c b/object-file.c index 90f995d0000bf6..641bd9c0799dec 100644 --- a/object-file.c +++ b/object-file.c @@ -2205,14 +2205,6 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source) return &transaction->base; } -struct odb_source_loose *odb_source_loose_new(struct odb_source *source) -{ - struct odb_source_loose *loose; - CALLOC_ARRAY(loose, 1); - loose->source = source; - return loose; -} - void odb_source_loose_free(struct odb_source_loose *loose) { if (!loose) diff --git a/object-file.h b/object-file.h index 5241b8dd5c564d..1d8312cf7f9ff9 100644 --- a/object-file.h +++ b/object-file.h @@ -4,6 +4,7 @@ #include "git-zlib.h" #include "object.h" #include "odb.h" +#include "odb/source-loose.h" struct index_state; @@ -20,26 +21,6 @@ struct object_info; struct odb_read_stream; struct odb_source; -struct odb_source_loose { - struct odb_source *source; - - /* - * Used to store the results of readdir(3) calls when we are OK - * sacrificing accuracy due to races for speed. That includes - * object existence with OBJECT_INFO_QUICK, as well as - * our search for unique abbreviated hashes. Don't use it for tasks - * requiring greater accuracy! - * - * Be sure to call odb_load_loose_cache() before using. - */ - uint32_t subdir_seen[8]; /* 256 bits */ - struct oidtree *cache; - - /* Map between object IDs for loose objects. */ - struct loose_object_map *map; -}; - -struct odb_source_loose *odb_source_loose_new(struct odb_source *source); void odb_source_loose_free(struct odb_source_loose *loose); /* Reprepare the loose source by emptying the loose object cache. */ diff --git a/odb/source-loose.c b/odb/source-loose.c new file mode 100644 index 00000000000000..b944d2181324ce --- /dev/null +++ b/odb/source-loose.c @@ -0,0 +1,10 @@ +#include "git-compat-util.h" +#include "odb/source-loose.h" + +struct odb_source_loose *odb_source_loose_new(struct odb_source *source) +{ + struct odb_source_loose *loose; + CALLOC_ARRAY(loose, 1); + loose->source = source; + return loose; +} diff --git a/odb/source-loose.h b/odb/source-loose.h new file mode 100644 index 00000000000000..8b4bac77ea39e8 --- /dev/null +++ b/odb/source-loose.h @@ -0,0 +1,34 @@ +#ifndef ODB_SOURCE_LOOSE_H +#define ODB_SOURCE_LOOSE_H + +#include "odb/source.h" + +struct object_database; +struct oidtree; + +/* + * An object database source that stores its objects in loose format, one + * file per object. This source is part of the files source. + */ +struct odb_source_loose { + struct odb_source *source; + + /* + * Used to store the results of readdir(3) calls when we are OK + * sacrificing accuracy due to races for speed. That includes + * object existence with OBJECT_INFO_QUICK, as well as + * our search for unique abbreviated hashes. Don't use it for tasks + * requiring greater accuracy! + * + * Be sure to call odb_load_loose_cache() before using. + */ + uint32_t subdir_seen[8]; /* 256 bits */ + struct oidtree *cache; + + /* Map between object IDs for loose objects. */ + struct loose_object_map *map; +}; + +struct odb_source_loose *odb_source_loose_new(struct odb_source *source); + +#endif From 1d451ba6fec076d357abf62607b97f585283030a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:25 +0200 Subject: [PATCH 10/30] odb/source-loose: store pointer to "files" instead of generic source The `struct odb_source_loose` holds a pointer to its owning parent source. The way that Git is currently structured, this parent is always the "files" source. In subsequent commits we're going to detangle that so that the "loose" source doesn't have any owning parent source at all so that it can be used as a completely standalone source. Detangling this mess is somewhat intricate though, and is made even more intricate because it's not always clear which kind of source one is holding at a specific point in time -- either the parent "files" source, or the child "loose" source. Make this relationship more explicit by storing a pointer to the "files" source instead of storing a pointer to a generic `struct odb_source`. This will help make subsequent steps a bit clearer. Note that this is a temporary step, only. At the end of this series we will have dropped the parent pointer completely. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 4 ++-- odb/source-files.c | 2 +- odb/source-loose.c | 4 ++-- odb/source-loose.h | 5 +++-- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/object-file.c b/object-file.c index 641bd9c0799dec..7a1908bfc05cb5 100644 --- a/object-file.c +++ b/object-file.c @@ -178,7 +178,7 @@ static int open_loose_object(struct odb_source_loose *loose, static struct strbuf buf = STRBUF_INIT; int fd; - *path = odb_loose_path(loose->source, &buf, oid); + *path = odb_loose_path(&loose->files->base, &buf, oid); fd = git_open(*path); if (fd >= 0) return fd; @@ -189,7 +189,7 @@ static int open_loose_object(struct odb_source_loose *loose, static int quick_has_loose(struct odb_source_loose *loose, const struct object_id *oid) { - return !!oidtree_contains(odb_source_loose_cache(loose->source, oid), oid); + return !!oidtree_contains(odb_source_loose_cache(&loose->files->base, oid), oid); } /* diff --git a/odb/source-files.c b/odb/source-files.c index b5abd20e971e78..185cc6903e35f2 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -264,7 +264,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, CALLOC_ARRAY(files, 1); odb_source_init(&files->base, odb, ODB_SOURCE_FILES, path, local); - files->loose = odb_source_loose_new(&files->base); + files->loose = odb_source_loose_new(files); files->packed = packfile_store_new(&files->base); files->base.free = odb_source_files_free; diff --git a/odb/source-loose.c b/odb/source-loose.c index b944d2181324ce..c9e7414814814d 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -1,10 +1,10 @@ #include "git-compat-util.h" #include "odb/source-loose.h" -struct odb_source_loose *odb_source_loose_new(struct odb_source *source) +struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) { struct odb_source_loose *loose; CALLOC_ARRAY(loose, 1); - loose->source = source; + loose->files = files; return loose; } diff --git a/odb/source-loose.h b/odb/source-loose.h index 8b4bac77ea39e8..bf61e767c8aab4 100644 --- a/odb/source-loose.h +++ b/odb/source-loose.h @@ -3,6 +3,7 @@ #include "odb/source.h" +struct odb_source_files; struct object_database; struct oidtree; @@ -11,7 +12,7 @@ struct oidtree; * file per object. This source is part of the files source. */ struct odb_source_loose { - struct odb_source *source; + struct odb_source_files *files; /* * Used to store the results of readdir(3) calls when we are OK @@ -29,6 +30,6 @@ struct odb_source_loose { struct loose_object_map *map; }; -struct odb_source_loose *odb_source_loose_new(struct odb_source *source); +struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files); #endif From ead691927b05dbbd2655db9a7183d5fcb935bf3b Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:26 +0200 Subject: [PATCH 11/30] odb/source-loose: start converting to a proper `struct odb_source` Start converting `struct odb_source_loose` into a proper pluggable `struct odb_source` by embedding the base struct and assigning it the new `ODB_SOURCE_LOOSE` type. Furthermore, wire up lifecycle management of this source by implementing the `free` callback and taking ownership of the chdir notifications. Note that the loose source is not yet functional as a standalone `struct odb_source`, as it's missing all of the callback implementations. These will be wired up in subsequent commits. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 17 ----------------- object-file.h | 2 -- odb/source-files.c | 2 +- odb/source-loose.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ odb/source-loose.h | 14 ++++++++++++++ odb/source.h | 3 +++ 6 files changed, 63 insertions(+), 20 deletions(-) diff --git a/object-file.c b/object-file.c index 7a1908bfc05cb5..977d959d333166 100644 --- a/object-file.c +++ b/object-file.c @@ -2041,14 +2041,6 @@ static struct oidtree *odb_source_loose_cache(struct odb_source *source, return files->loose->cache; } -static void odb_source_loose_clear_cache(struct odb_source_loose *loose) -{ - oidtree_clear(loose->cache); - FREE_AND_NULL(loose->cache); - memset(&loose->subdir_seen, 0, - sizeof(loose->subdir_seen)); -} - void odb_source_loose_reprepare(struct odb_source *source) { struct odb_source_files *files = odb_source_files_downcast(source); @@ -2205,15 +2197,6 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source) return &transaction->base; } -void odb_source_loose_free(struct odb_source_loose *loose) -{ - if (!loose) - return; - odb_source_loose_clear_cache(loose); - loose_object_map_clear(&loose->map); - free(loose); -} - struct odb_loose_read_stream { struct odb_read_stream base; git_zstream z; diff --git a/object-file.h b/object-file.h index 1d8312cf7f9ff9..02c9680980ab0f 100644 --- a/object-file.h +++ b/object-file.h @@ -21,8 +21,6 @@ struct object_info; struct odb_read_stream; struct odb_source; -void odb_source_loose_free(struct odb_source_loose *loose); - /* Reprepare the loose source by emptying the loose object cache. */ void odb_source_loose_reprepare(struct odb_source *source); diff --git a/odb/source-files.c b/odb/source-files.c index 185cc6903e35f2..ccc637311b9c21 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -27,7 +27,7 @@ static void odb_source_files_free(struct odb_source *source) { struct odb_source_files *files = odb_source_files_downcast(source); chdir_notify_unregister(NULL, odb_source_files_reparent, files); - odb_source_loose_free(files->loose); + odb_source_free(&files->loose->base); packfile_store_free(files->packed); odb_source_release(&files->base); free(files); diff --git a/odb/source-loose.c b/odb/source-loose.c index c9e7414814814d..92e18f5adb2b89 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -1,10 +1,55 @@ #include "git-compat-util.h" +#include "abspath.h" +#include "chdir-notify.h" +#include "loose.h" +#include "odb.h" +#include "odb/source-files.h" #include "odb/source-loose.h" +#include "oidtree.h" + +void odb_source_loose_clear_cache(struct odb_source_loose *loose) +{ + oidtree_clear(loose->cache); + FREE_AND_NULL(loose->cache); + memset(&loose->subdir_seen, 0, + sizeof(loose->subdir_seen)); +} + +static void odb_source_loose_reparent(const char *name UNUSED, + const char *old_cwd, + const char *new_cwd, + void *cb_data) +{ + struct odb_source_loose *loose = cb_data; + char *path = reparent_relative_path(old_cwd, new_cwd, + loose->base.path); + free(loose->base.path); + loose->base.path = path; +} + +static void odb_source_loose_free(struct odb_source *source) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + odb_source_loose_clear_cache(loose); + loose_object_map_clear(&loose->map); + chdir_notify_unregister(NULL, odb_source_loose_reparent, loose); + odb_source_release(&loose->base); + free(loose); +} struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) { struct odb_source_loose *loose; + CALLOC_ARRAY(loose, 1); + odb_source_init(&loose->base, files->base.odb, ODB_SOURCE_LOOSE, + files->base.path, files->base.local); loose->files = files; + + loose->base.free = odb_source_loose_free; + + if (!is_absolute_path(loose->base.path)) + chdir_notify_register(NULL, odb_source_loose_reparent, loose); + return loose; } diff --git a/odb/source-loose.h b/odb/source-loose.h index bf61e767c8aab4..bd989f0728e622 100644 --- a/odb/source-loose.h +++ b/odb/source-loose.h @@ -12,6 +12,7 @@ struct oidtree; * file per object. This source is part of the files source. */ struct odb_source_loose { + struct odb_source base; struct odb_source_files *files; /* @@ -32,4 +33,17 @@ struct odb_source_loose { struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files); +/* + * Cast the given object database source to the loose backend. This will cause + * a BUG in case the source doesn't use this backend. + */ +static inline struct odb_source_loose *odb_source_loose_downcast(struct odb_source *source) +{ + if (source->type != ODB_SOURCE_LOOSE) + BUG("trying to downcast source of type '%d' to loose", source->type); + return container_of(source, struct odb_source_loose, base); +} + +void odb_source_loose_clear_cache(struct odb_source_loose *loose); + #endif diff --git a/odb/source.h b/odb/source.h index 0a440884e4f0ab..8bcb67787ebafd 100644 --- a/odb/source.h +++ b/odb/source.h @@ -14,6 +14,9 @@ enum odb_source_type { /* The "files" backend that uses loose objects and packfiles. */ ODB_SOURCE_FILES, + /* The "loose" backend that uses loose objects, only. */ + ODB_SOURCE_LOOSE, + /* The "in-memory" backend that stores objects in memory. */ ODB_SOURCE_INMEMORY, }; From a2b7db9bc8d52f133fe8fcb317788d9fe8696f07 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:27 +0200 Subject: [PATCH 12/30] odb/source-loose: wire up `reprepare()` callback Move `odb_source_loose_reprepare()` from "object-file.c" into "odb/source-loose.c" and wire it up as the `reprepare()` callback of the loose source. While at it, make `odb_source_loose_clear_cache()` static, as it is no longer needed outside of its file. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 6 ------ object-file.h | 3 --- odb/source-files.c | 2 +- odb/source-loose.c | 9 ++++++++- odb/source-loose.h | 2 -- 5 files changed, 9 insertions(+), 13 deletions(-) diff --git a/object-file.c b/object-file.c index 977d959d333166..0f4f1e7bdc0733 100644 --- a/object-file.c +++ b/object-file.c @@ -2041,12 +2041,6 @@ static struct oidtree *odb_source_loose_cache(struct odb_source *source, return files->loose->cache; } -void odb_source_loose_reprepare(struct odb_source *source) -{ - struct odb_source_files *files = odb_source_files_downcast(source); - odb_source_loose_clear_cache(files->loose); -} - static int check_stream_oid(git_zstream *stream, const char *hdr, unsigned long size, diff --git a/object-file.h b/object-file.h index 02c9680980ab0f..420a0fff2e7d7e 100644 --- a/object-file.h +++ b/object-file.h @@ -21,9 +21,6 @@ struct object_info; struct odb_read_stream; struct odb_source; -/* Reprepare the loose source by emptying the loose object cache. */ -void odb_source_loose_reprepare(struct odb_source *source); - int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, struct object_info *oi, diff --git a/odb/source-files.c b/odb/source-files.c index ccc637311b9c21..10832e81e4e206 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -42,7 +42,7 @@ static void odb_source_files_close(struct odb_source *source) static void odb_source_files_reprepare(struct odb_source *source) { struct odb_source_files *files = odb_source_files_downcast(source); - odb_source_loose_reprepare(&files->base); + odb_source_reprepare(&files->loose->base); packfile_store_reprepare(files->packed); } diff --git a/odb/source-loose.c b/odb/source-loose.c index 92e18f5adb2b89..e0fe0d513d2532 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -7,7 +7,7 @@ #include "odb/source-loose.h" #include "oidtree.h" -void odb_source_loose_clear_cache(struct odb_source_loose *loose) +static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { oidtree_clear(loose->cache); FREE_AND_NULL(loose->cache); @@ -15,6 +15,12 @@ void odb_source_loose_clear_cache(struct odb_source_loose *loose) sizeof(loose->subdir_seen)); } +static void odb_source_loose_reprepare(struct odb_source *source) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + odb_source_loose_clear_cache(loose); +} + static void odb_source_loose_reparent(const char *name UNUSED, const char *old_cwd, const char *new_cwd, @@ -47,6 +53,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) loose->files = files; loose->base.free = odb_source_loose_free; + loose->base.reprepare = odb_source_loose_reprepare; if (!is_absolute_path(loose->base.path)) chdir_notify_register(NULL, odb_source_loose_reparent, loose); diff --git a/odb/source-loose.h b/odb/source-loose.h index bd989f0728e622..4dd4fd6ce30a7e 100644 --- a/odb/source-loose.h +++ b/odb/source-loose.h @@ -44,6 +44,4 @@ static inline struct odb_source_loose *odb_source_loose_downcast(struct odb_sour return container_of(source, struct odb_source_loose, base); } -void odb_source_loose_clear_cache(struct odb_source_loose *loose); - #endif From 337b7fccba1cca8b7d9232b5e6e9ff53271f0398 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:28 +0200 Subject: [PATCH 13/30] odb/source-loose: wire up `close()` callback Wire up a new `close()` callback for the loose source and call it from the "files" source via the generic `odb_source_close()` interface. The callback itself is a no-op as the loose source has no resources that need to be released on close. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-files.c | 1 + odb/source-loose.c | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/odb/source-files.c b/odb/source-files.c index 10832e81e4e206..59e3a70d80d355 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -36,6 +36,7 @@ static void odb_source_files_free(struct odb_source *source) static void odb_source_files_close(struct odb_source *source) { struct odb_source_files *files = odb_source_files_downcast(source); + odb_source_close(&files->loose->base); packfile_store_close(files->packed); } diff --git a/odb/source-loose.c b/odb/source-loose.c index e0fe0d513d2532..65c1076659b8fd 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -21,6 +21,11 @@ static void odb_source_loose_reprepare(struct odb_source *source) odb_source_loose_clear_cache(loose); } +static void odb_source_loose_close(struct odb_source *source UNUSED) +{ + /* Nothing to do. */ +} + static void odb_source_loose_reparent(const char *name UNUSED, const char *old_cwd, const char *new_cwd, @@ -53,6 +58,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) loose->files = files; loose->base.free = odb_source_loose_free; + loose->base.close = odb_source_loose_close; loose->base.reprepare = odb_source_loose_reprepare; if (!is_absolute_path(loose->base.path)) From 584338ed92735f3be768c16b53266d5bad439a7a Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:29 +0200 Subject: [PATCH 14/30] odb/source-loose: wire up `read_object_info()` callback Move `odb_source_loose_read_object_info()` from "object-file.c" into "odb/source-loose.c" and wire it up as the `read_object_info()` callback of the loose source. Callers that previously invoked it directly now go through the generic `odb_source_read_object_info()` interface instead. The function `read_object_info_from_path()` cannot be moved along with it because it is still called by `for_each_object_wrapper_cb()`. It is therefore kept in place, but adjusted to take a loose source to clarify that it's always operating on this structure. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 46 +++++++++++++--------------------------------- object-file.h | 11 ++++++----- odb/source-files.c | 2 +- odb/source-loose.c | 24 ++++++++++++++++++++++++ 4 files changed, 44 insertions(+), 39 deletions(-) diff --git a/object-file.c b/object-file.c index 0f4f1e7bdc0733..fa174512a43c75 100644 --- a/object-file.c +++ b/object-file.c @@ -396,13 +396,12 @@ static int parse_loose_header(const char *hdr, struct object_info *oi) return 0; } -static int read_object_info_from_path(struct odb_source *source, - const char *path, - const struct object_id *oid, - struct object_info *oi, - enum object_info_flags flags) +int read_object_info_from_path(struct odb_source_loose *loose, + const char *path, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags) { - struct odb_source_files *files = odb_source_files_downcast(source); int ret; int fd; unsigned long mapsize; @@ -425,7 +424,7 @@ static int read_object_info_from_path(struct odb_source *source, struct stat st; if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) { - ret = quick_has_loose(files->loose, oid) ? 0 : -1; + ret = quick_has_loose(loose, oid) ? 0 : -1; goto out; } @@ -532,7 +531,7 @@ static int read_object_info_from_path(struct odb_source *source, if (oi->typep == &type_scratch) oi->typep = NULL; if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, source->odb->repo->hash_algo); + oidclr(oi->delta_base_oid, loose->base.odb->repo->hash_algo); if (!ret) oi->whence = OI_LOOSE; } @@ -540,26 +539,6 @@ static int read_object_info_from_path(struct odb_source *source, return ret; } -int odb_source_loose_read_object_info(struct odb_source *source, - const struct object_id *oid, - struct object_info *oi, - enum object_info_flags flags) -{ - static struct strbuf buf = STRBUF_INIT; - - /* - * The second read shouldn't cause new loose objects to show up, unless - * there was a race condition with a secondary process. We don't care - * about this case though, so we simply skip reading loose objects a - * second time. - */ - if (flags & OBJECT_INFO_SECOND_READ) - return -1; - - odb_loose_path(source, &buf, oid); - return read_object_info_from_path(source, buf.buf, oid, oi, flags); -} - static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c, const void *buf, unsigned long len, struct object_id *oid, @@ -1833,7 +1812,7 @@ int for_each_loose_file_in_source(struct odb_source *source, } struct for_each_object_wrapper_data { - struct odb_source *source; + struct odb_source_loose *loose; const struct object_info *request; odb_for_each_object_cb cb; void *cb_data; @@ -1848,7 +1827,7 @@ static int for_each_object_wrapper_cb(const struct object_id *oid, if (data->request) { struct object_info oi = *data->request; - if (read_object_info_from_path(data->source, path, oid, &oi, 0) < 0) + if (read_object_info_from_path(data->loose, path, oid, &oi, 0) < 0) return -1; return data->cb(oid, &oi, data->cb_data); @@ -1865,8 +1844,8 @@ static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, if (data->request) { struct object_info oi = *data->request; - if (odb_source_loose_read_object_info(data->source, - oid, &oi, 0) < 0) + if (odb_source_read_object_info(&data->loose->base, + oid, &oi, 0) < 0) return -1; return data->cb(oid, &oi, data->cb_data); @@ -1881,8 +1860,9 @@ int odb_source_loose_for_each_object(struct odb_source *source, void *cb_data, const struct odb_for_each_object_options *opts) { + struct odb_source_files *files = odb_source_files_downcast(source); struct for_each_object_wrapper_data data = { - .source = source, + .loose = files->loose, .request = request, .cb = cb, .cb_data = cb_data, diff --git a/object-file.h b/object-file.h index 420a0fff2e7d7e..8ac2832dac3439 100644 --- a/object-file.h +++ b/object-file.h @@ -21,11 +21,6 @@ struct object_info; struct odb_read_stream; struct odb_source; -int odb_source_loose_read_object_info(struct odb_source *source, - const struct object_id *oid, - struct object_info *oi, - enum object_info_flags flags); - int odb_source_loose_read_object_stream(struct odb_read_stream **out, struct odb_source *source, const struct object_id *oid); @@ -198,6 +193,12 @@ int read_loose_object(struct repository *repo, void **contents, struct object_info *oi); +int read_object_info_from_path(struct odb_source_loose *loose, + const char *path, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags); + struct odb_transaction; /* diff --git a/odb/source-files.c b/odb/source-files.c index 59e3a70d80d355..8d6924755ffb70 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -55,7 +55,7 @@ static int odb_source_files_read_object_info(struct odb_source *source, struct odb_source_files *files = odb_source_files_downcast(source); if (!packfile_store_read_object_info(files->packed, oid, oi, flags) || - !odb_source_loose_read_object_info(source, oid, oi, flags)) + !odb_source_read_object_info(&files->loose->base, oid, oi, flags)) return 0; return -1; diff --git a/odb/source-loose.c b/odb/source-loose.c index 65c1076659b8fd..50f387ecf31e38 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -2,10 +2,33 @@ #include "abspath.h" #include "chdir-notify.h" #include "loose.h" +#include "object-file.h" #include "odb.h" #include "odb/source-files.h" #include "odb/source-loose.h" #include "oidtree.h" +#include "strbuf.h" + +static int odb_source_loose_read_object_info(struct odb_source *source, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + static struct strbuf buf = STRBUF_INIT; + + /* + * The second read shouldn't cause new loose objects to show up, unless + * there was a race condition with a secondary process. We don't care + * about this case though, so we simply skip reading loose objects a + * second time. + */ + if (flags & OBJECT_INFO_SECOND_READ) + return -1; + + odb_loose_path(source, &buf, oid); + return read_object_info_from_path(loose, buf.buf, oid, oi, flags); +} static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { @@ -60,6 +83,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) loose->base.free = odb_source_loose_free; loose->base.close = odb_source_loose_close; loose->base.reprepare = odb_source_loose_reprepare; + loose->base.read_object_info = odb_source_loose_read_object_info; if (!is_absolute_path(loose->base.path)) chdir_notify_register(NULL, odb_source_loose_reparent, loose); From 727a935a71c29524c936520d8aba4de7098f7566 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:30 +0200 Subject: [PATCH 15/30] odb/source-loose: wire up `read_object_stream()` callback Move `odb_source_loose_read_object_stream()` and its associated helpers from "object-file.c" into "odb/source-loose.c" and wire it up as the `read_object_stream()` callback of the loose source. As part of the move we are also forced to expose a couple of functions from "object-file.h" that parse object headers in a somewhat-generic way, as those functions are now used by both subsystems. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 200 ++------------------------------------------- object-file.h | 31 +++++-- odb/source-files.c | 2 +- odb/source-loose.c | 189 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 222 insertions(+), 200 deletions(-) diff --git a/object-file.c b/object-file.c index fa174512a43c75..adfb6724936452 100644 --- a/object-file.c +++ b/object-file.c @@ -164,28 +164,6 @@ int stream_object_signature(struct repository *r, return !oideq(oid, &real_oid) ? -1 : 0; } -/* - * Find "oid" as a loose object in given source, open the object and return its - * file descriptor. Returns the file descriptor on success, negative on failure. - * - * The "path" out-parameter will give the path of the object we found (if any). - * Note that it may point to static storage and is only valid until another - * call to stat_loose_object(). - */ -static int open_loose_object(struct odb_source_loose *loose, - const struct object_id *oid, const char **path) -{ - static struct strbuf buf = STRBUF_INIT; - int fd; - - *path = odb_loose_path(&loose->files->base, &buf, oid); - fd = git_open(*path); - if (fd >= 0) - return fd; - - return -1; -} - static int quick_has_loose(struct odb_source_loose *loose, const struct object_id *oid) { @@ -215,42 +193,11 @@ static void *map_fd(int fd, const char *path, unsigned long *size) return map; } -static void *odb_source_loose_map_object(struct odb_source *source, - const struct object_id *oid, - unsigned long *size) -{ - struct odb_source_files *files = odb_source_files_downcast(source); - const char *p; - int fd = open_loose_object(files->loose, oid, &p); - - if (fd < 0) - return NULL; - return map_fd(fd, p, size); -} - -enum unpack_loose_header_result { - ULHR_OK, - ULHR_BAD, - ULHR_TOO_LONG, -}; - -/** - * unpack_loose_header() initializes the data stream needed to unpack - * a loose object header. - * - * Returns: - * - * - ULHR_OK on success - * - ULHR_BAD on error - * - ULHR_TOO_LONG if the header was too long - * - * It will only parse up to MAX_HEADER_LEN bytes. - */ -static enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, - unsigned char *map, - unsigned long mapsize, - void *buffer, - unsigned long bufsiz) +enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, + unsigned char *map, + unsigned long mapsize, + void *buffer, + unsigned long bufsiz) { int status; @@ -340,7 +287,7 @@ static void *unpack_loose_rest(git_zstream *stream, * too permissive for what we want to check. So do an anal * object header parse by hand. */ -static int parse_loose_header(const char *hdr, struct object_info *oi) +int parse_loose_header(const char *hdr, struct object_info *oi) { const char *type_buf = hdr; size_t size; @@ -2170,138 +2117,3 @@ struct odb_transaction *odb_transaction_files_begin(struct odb_source *source) return &transaction->base; } - -struct odb_loose_read_stream { - struct odb_read_stream base; - git_zstream z; - enum { - ODB_LOOSE_READ_STREAM_INUSE, - ODB_LOOSE_READ_STREAM_DONE, - ODB_LOOSE_READ_STREAM_ERROR, - } z_state; - void *mapped; - unsigned long mapsize; - char hdr[32]; - int hdr_avail; - int hdr_used; -}; - -static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) -{ - struct odb_loose_read_stream *st = - container_of(_st, struct odb_loose_read_stream, base); - size_t total_read = 0; - - switch (st->z_state) { - case ODB_LOOSE_READ_STREAM_DONE: - return 0; - case ODB_LOOSE_READ_STREAM_ERROR: - return -1; - default: - break; - } - - if (st->hdr_used < st->hdr_avail) { - size_t to_copy = st->hdr_avail - st->hdr_used; - if (sz < to_copy) - to_copy = sz; - memcpy(buf, st->hdr + st->hdr_used, to_copy); - st->hdr_used += to_copy; - total_read += to_copy; - } - - while (total_read < sz) { - int status; - - st->z.next_out = (unsigned char *)buf + total_read; - st->z.avail_out = sz - total_read; - status = git_inflate(&st->z, Z_FINISH); - - total_read = st->z.next_out - (unsigned char *)buf; - - if (status == Z_STREAM_END) { - git_inflate_end(&st->z); - st->z_state = ODB_LOOSE_READ_STREAM_DONE; - break; - } - if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { - git_inflate_end(&st->z); - st->z_state = ODB_LOOSE_READ_STREAM_ERROR; - return -1; - } - } - return total_read; -} - -static int close_istream_loose(struct odb_read_stream *_st) -{ - struct odb_loose_read_stream *st = - container_of(_st, struct odb_loose_read_stream, base); - - if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) - git_inflate_end(&st->z); - munmap(st->mapped, st->mapsize); - return 0; -} - -int odb_source_loose_read_object_stream(struct odb_read_stream **out, - struct odb_source *source, - const struct object_id *oid) -{ - struct object_info oi = OBJECT_INFO_INIT; - struct odb_loose_read_stream *st; - unsigned long mapsize; - unsigned long size_ul; - void *mapped; - - mapped = odb_source_loose_map_object(source, oid, &mapsize); - if (!mapped) - return -1; - - /* - * Note: we must allocate this structure early even though we may still - * fail. This is because we need to initialize the zlib stream, and it - * is not possible to copy the stream around after the fact because it - * has self-referencing pointers. - */ - CALLOC_ARRAY(st, 1); - - switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, - sizeof(st->hdr))) { - case ULHR_OK: - break; - case ULHR_BAD: - case ULHR_TOO_LONG: - goto error; - } - - /* - * object_info.sizep is unsigned long* (32-bit on Windows), but - * st->base.size is size_t (64-bit). Use temporary variable. - * Note: loose objects >4GB would still truncate here, but such - * large loose objects are uncommon (they'd normally be packed). - */ - oi.sizep = &size_ul; - oi.typep = &st->base.type; - - if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) - goto error; - st->base.size = size_ul; - - st->mapped = mapped; - st->mapsize = mapsize; - st->hdr_used = strlen(st->hdr) + 1; - st->hdr_avail = st->z.total_out; - st->z_state = ODB_LOOSE_READ_STREAM_INUSE; - st->base.close = close_istream_loose; - st->base.read = read_istream_loose; - - *out = &st->base; - - return 0; -error: - git_inflate_end(&st->z); - munmap(mapped, mapsize); - free(st); - return -1; -} diff --git a/object-file.h b/object-file.h index 8ac2832dac3439..d93b7ffad704b0 100644 --- a/object-file.h +++ b/object-file.h @@ -18,13 +18,8 @@ int index_fd(struct index_state *istate, struct object_id *oid, int fd, struct s int index_path(struct index_state *istate, struct object_id *oid, const char *path, struct stat *st, unsigned flags); struct object_info; -struct odb_read_stream; struct odb_source; -int odb_source_loose_read_object_stream(struct odb_read_stream **out, - struct odb_source *source, - const struct object_id *oid); - /* * Return true iff an object database source has a loose object * with the specified name. This function does not respect replace @@ -199,6 +194,32 @@ int read_object_info_from_path(struct odb_source_loose *loose, struct object_info *oi, enum object_info_flags flags); +enum unpack_loose_header_result { + ULHR_OK, + ULHR_BAD, + ULHR_TOO_LONG, +}; + +/** + * unpack_loose_header() initializes the data stream needed to unpack + * a loose object header. + * + * Returns: + * + * - ULHR_OK on success + * - ULHR_BAD on error + * - ULHR_TOO_LONG if the header was too long + * + * It will only parse up to MAX_HEADER_LEN bytes. + */ +enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, + unsigned char *map, + unsigned long mapsize, + void *buffer, + unsigned long bufsiz); + +int parse_loose_header(const char *hdr, struct object_info *oi); + struct odb_transaction; /* diff --git a/odb/source-files.c b/odb/source-files.c index 8d6924755ffb70..90806ddf86b662 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -67,7 +67,7 @@ static int odb_source_files_read_object_stream(struct odb_read_stream **out, { struct odb_source_files *files = odb_source_files_downcast(source); if (!packfile_store_read_object_stream(out, files->packed, oid) || - !odb_source_loose_read_object_stream(out, source, oid)) + !odb_source_read_object_stream(out, &files->loose->base, oid)) return 0; return -1; } diff --git a/odb/source-loose.c b/odb/source-loose.c index 50f387ecf31e38..4b82c6f316512e 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -1,11 +1,13 @@ #include "git-compat-util.h" #include "abspath.h" #include "chdir-notify.h" +#include "gettext.h" #include "loose.h" #include "object-file.h" #include "odb.h" #include "odb/source-files.h" #include "odb/source-loose.h" +#include "odb/streaming.h" #include "oidtree.h" #include "strbuf.h" @@ -30,6 +32,192 @@ static int odb_source_loose_read_object_info(struct odb_source *source, return read_object_info_from_path(loose, buf.buf, oid, oi, flags); } +/* + * Find "oid" as a loose object in given source, open the object and return its + * file descriptor. Returns the file descriptor on success, negative on failure. + * + * The "path" out-parameter will give the path of the object we found (if any). + * Note that it may point to static storage and is only valid until another + * call to open_loose_object(). + */ +static int open_loose_object(struct odb_source_loose *loose, + const struct object_id *oid, const char **path) +{ + static struct strbuf buf = STRBUF_INIT; + int fd; + + *path = odb_loose_path(&loose->base, &buf, oid); + fd = git_open(*path); + if (fd >= 0) + return fd; + + return -1; +} + +static void *odb_source_loose_map_object(struct odb_source_loose *loose, + const struct object_id *oid, + unsigned long *size) +{ + const char *p; + int fd = open_loose_object(loose, oid, &p); + void *map = NULL; + struct stat st; + + if (fd < 0) + return NULL; + + if (!fstat(fd, &st)) { + *size = xsize_t(st.st_size); + if (!*size) { + /* mmap() is forbidden on empty files */ + error(_("object file %s is empty"), p); + goto out; + } + + map = xmmap(NULL, *size, PROT_READ, MAP_PRIVATE, fd, 0); + } + +out: + close(fd); + return map; +} + +struct odb_loose_read_stream { + struct odb_read_stream base; + git_zstream z; + enum { + ODB_LOOSE_READ_STREAM_INUSE, + ODB_LOOSE_READ_STREAM_DONE, + ODB_LOOSE_READ_STREAM_ERROR, + } z_state; + void *mapped; + unsigned long mapsize; + char hdr[32]; + int hdr_avail; + int hdr_used; +}; + +static ssize_t read_istream_loose(struct odb_read_stream *_st, char *buf, size_t sz) +{ + struct odb_loose_read_stream *st = + container_of(_st, struct odb_loose_read_stream, base); + size_t total_read = 0; + + switch (st->z_state) { + case ODB_LOOSE_READ_STREAM_DONE: + return 0; + case ODB_LOOSE_READ_STREAM_ERROR: + return -1; + default: + break; + } + + if (st->hdr_used < st->hdr_avail) { + size_t to_copy = st->hdr_avail - st->hdr_used; + if (sz < to_copy) + to_copy = sz; + memcpy(buf, st->hdr + st->hdr_used, to_copy); + st->hdr_used += to_copy; + total_read += to_copy; + } + + while (total_read < sz) { + int status; + + st->z.next_out = (unsigned char *)buf + total_read; + st->z.avail_out = sz - total_read; + status = git_inflate(&st->z, Z_FINISH); + + total_read = st->z.next_out - (unsigned char *)buf; + + if (status == Z_STREAM_END) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_DONE; + break; + } + if (status != Z_OK && (status != Z_BUF_ERROR || total_read < sz)) { + git_inflate_end(&st->z); + st->z_state = ODB_LOOSE_READ_STREAM_ERROR; + return -1; + } + } + return total_read; +} + +static int close_istream_loose(struct odb_read_stream *_st) +{ + struct odb_loose_read_stream *st = + container_of(_st, struct odb_loose_read_stream, base); + + if (st->z_state == ODB_LOOSE_READ_STREAM_INUSE) + git_inflate_end(&st->z); + munmap(st->mapped, st->mapsize); + return 0; +} + +static int odb_source_loose_read_object_stream(struct odb_read_stream **out, + struct odb_source *source, + const struct object_id *oid) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + struct object_info oi = OBJECT_INFO_INIT; + struct odb_loose_read_stream *st; + unsigned long mapsize; + unsigned long size_ul; + void *mapped; + + mapped = odb_source_loose_map_object(loose, oid, &mapsize); + if (!mapped) + return -1; + + /* + * Note: we must allocate this structure early even though we may still + * fail. This is because we need to initialize the zlib stream, and it + * is not possible to copy the stream around after the fact because it + * has self-referencing pointers. + */ + CALLOC_ARRAY(st, 1); + + switch (unpack_loose_header(&st->z, mapped, mapsize, st->hdr, + sizeof(st->hdr))) { + case ULHR_OK: + break; + case ULHR_BAD: + case ULHR_TOO_LONG: + goto error; + } + + /* + * object_info.sizep is unsigned long* (32-bit on Windows), but + * st->base.size is size_t (64-bit). Use temporary variable. + * Note: loose objects >4GB would still truncate here, but such + * large loose objects are uncommon (they'd normally be packed). + */ + oi.sizep = &size_ul; + oi.typep = &st->base.type; + + if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) + goto error; + st->base.size = size_ul; + + st->mapped = mapped; + st->mapsize = mapsize; + st->hdr_used = strlen(st->hdr) + 1; + st->hdr_avail = st->z.total_out; + st->z_state = ODB_LOOSE_READ_STREAM_INUSE; + st->base.close = close_istream_loose; + st->base.read = read_istream_loose; + + *out = &st->base; + + return 0; +error: + git_inflate_end(&st->z); + munmap(mapped, mapsize); + free(st); + return -1; +} + static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { oidtree_clear(loose->cache); @@ -84,6 +272,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) loose->base.close = odb_source_loose_close; loose->base.reprepare = odb_source_loose_reprepare; loose->base.read_object_info = odb_source_loose_read_object_info; + loose->base.read_object_stream = odb_source_loose_read_object_stream; if (!is_absolute_path(loose->base.path)) chdir_notify_register(NULL, odb_source_loose_reparent, loose); From e4f1d9ba5714957389bee87dd5f9fedb69d8a764 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:31 +0200 Subject: [PATCH 16/30] odb/source-loose: wire up `for_each_object()` callback Move `odb_source_loose_for_each_object()` and its associated helpers from "object-file.c" into "odb/source-loose.c" and wire it up as the `for_each_object()` callback of the loose source. Again, as in the preceding commit, we are forced to expose a couple of functions from "object-file.c" that are now used by both subsystems. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/cat-file.c | 5 +- object-file.c | 299 +++------------------------------------------ object-file.h | 32 ++--- odb/source-files.c | 2 +- odb/source-loose.c | 264 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 297 insertions(+), 305 deletions(-) diff --git a/builtin/cat-file.c b/builtin/cat-file.c index d9fbad535868bb..2958fc53579336 100644 --- a/builtin/cat-file.c +++ b/builtin/cat-file.c @@ -862,8 +862,9 @@ static void batch_each_object(struct batch_options *opt, */ odb_prepare_alternates(the_repository->objects); for (source = the_repository->objects->sources; source; source = source->next) { - int ret = odb_source_loose_for_each_object(source, NULL, batch_one_object_oi, - &payload, &opts); + struct odb_source_files *files = odb_source_files_downcast(source); + int ret = odb_source_for_each_object(&files->loose->base, NULL, batch_one_object_oi, + &payload, &opts); if (ret) break; } diff --git a/object-file.c b/object-file.c index adfb6724936452..157ecad3ea204a 100644 --- a/object-file.c +++ b/object-file.c @@ -22,7 +22,6 @@ #include "odb.h" #include "odb/streaming.h" #include "odb/transaction.h" -#include "oidtree.h" #include "pack.h" #include "packfile.h" #include "path.h" @@ -31,12 +30,6 @@ #include "tempfile.h" #include "tmp-objdir.h" -/* The maximum size for an object header. */ -#define MAX_HEADER_LEN 32 - -static struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid); - static int get_conv_flags(unsigned flags) { if (flags & INDEX_RENORMALIZE) @@ -164,12 +157,6 @@ int stream_object_signature(struct repository *r, return !oideq(oid, &real_oid) ? -1 : 0; } -static int quick_has_loose(struct odb_source_loose *loose, - const struct object_id *oid) -{ - return !!oidtree_contains(odb_source_loose_cache(&loose->files->base, oid), oid); -} - /* * Map and close the given loose object fd. The path argument is used for * error reporting. @@ -227,9 +214,9 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, return ULHR_TOO_LONG; } -static void *unpack_loose_rest(git_zstream *stream, - void *buffer, unsigned long size, - const struct object_id *oid) +void *unpack_loose_rest(git_zstream *stream, + void *buffer, unsigned long size, + const struct object_id *oid) { size_t bytes = strlen(buffer) + 1, n; unsigned char *buf = xmallocz(size); @@ -343,149 +330,6 @@ int parse_loose_header(const char *hdr, struct object_info *oi) return 0; } -int read_object_info_from_path(struct odb_source_loose *loose, - const char *path, - const struct object_id *oid, - struct object_info *oi, - enum object_info_flags flags) -{ - int ret; - int fd; - unsigned long mapsize; - void *map = NULL; - git_zstream stream, *stream_to_end = NULL; - char hdr[MAX_HEADER_LEN]; - unsigned long size_scratch; - enum object_type type_scratch; - struct stat st; - - /* - * If we don't care about type or size, then we don't - * need to look inside the object at all. Note that we - * do not optimize out the stat call, even if the - * caller doesn't care about the disk-size, since our - * return value implicitly indicates whether the - * object even exists. - */ - if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) { - struct stat st; - - if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) { - ret = quick_has_loose(loose, oid) ? 0 : -1; - goto out; - } - - if (lstat(path, &st) < 0) { - ret = -1; - goto out; - } - - if (oi) { - if (oi->disk_sizep) - *oi->disk_sizep = st.st_size; - if (oi->mtimep) - *oi->mtimep = st.st_mtime; - } - - ret = 0; - goto out; - } - - fd = git_open(path); - if (fd < 0) { - if (errno != ENOENT) - error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); - ret = -1; - goto out; - } - - if (fstat(fd, &st)) { - close(fd); - ret = -1; - goto out; - } - - mapsize = xsize_t(st.st_size); - if (!mapsize) { - close(fd); - ret = error(_("object file %s is empty"), path); - goto out; - } - - map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0); - close(fd); - if (!map) { - ret = -1; - goto out; - } - - if (oi->disk_sizep) - *oi->disk_sizep = mapsize; - if (oi->mtimep) - *oi->mtimep = st.st_mtime; - - stream_to_end = &stream; - - switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) { - case ULHR_OK: - if (!oi->sizep) - oi->sizep = &size_scratch; - if (!oi->typep) - oi->typep = &type_scratch; - - if (parse_loose_header(hdr, oi) < 0) { - ret = error(_("unable to parse %s header"), oid_to_hex(oid)); - goto corrupt; - } - - if (*oi->typep < 0) - die(_("invalid object type")); - - if (oi->contentp) { - *oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid); - if (!*oi->contentp) { - ret = -1; - goto corrupt; - } - } - - break; - case ULHR_BAD: - ret = error(_("unable to unpack %s header"), - oid_to_hex(oid)); - goto corrupt; - case ULHR_TOO_LONG: - ret = error(_("header for %s too long, exceeds %d bytes"), - oid_to_hex(oid), MAX_HEADER_LEN); - goto corrupt; - } - - ret = 0; - -corrupt: - if (ret && (flags & OBJECT_INFO_DIE_IF_CORRUPT)) - die(_("loose object %s (stored in %s) is corrupt"), - oid_to_hex(oid), path); - -out: - if (stream_to_end) - git_inflate_end(stream_to_end); - if (map) - munmap(map, mapsize); - if (oi) { - if (oi->sizep == &size_scratch) - oi->sizep = NULL; - if (oi->typep == &type_scratch) - oi->typep = NULL; - if (oi->delta_base_oid) - oidclr(oi->delta_base_oid, loose->base.odb->repo->hash_algo); - if (!ret) - oi->whence = OI_LOOSE; - } - - return ret; -} - static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_ctx *c, const void *buf, unsigned long len, struct object_id *oid, @@ -1667,13 +1511,13 @@ int read_pack_header(int fd, struct pack_header *header) return 0; } -static int for_each_file_in_obj_subdir(unsigned int subdir_nr, - struct strbuf *path, - const struct git_hash_algo *algop, - each_loose_object_fn obj_cb, - each_loose_cruft_fn cruft_cb, - each_loose_subdir_fn subdir_cb, - void *data) +int for_each_file_in_obj_subdir(unsigned int subdir_nr, + struct strbuf *path, + const struct git_hash_algo *algop, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data) { size_t origlen, baselen; DIR *dir; @@ -1758,78 +1602,6 @@ int for_each_loose_file_in_source(struct odb_source *source, return r; } -struct for_each_object_wrapper_data { - struct odb_source_loose *loose; - const struct object_info *request; - odb_for_each_object_cb cb; - void *cb_data; -}; - -static int for_each_object_wrapper_cb(const struct object_id *oid, - const char *path, - void *cb_data) -{ - struct for_each_object_wrapper_data *data = cb_data; - - if (data->request) { - struct object_info oi = *data->request; - - if (read_object_info_from_path(data->loose, path, oid, &oi, 0) < 0) - return -1; - - return data->cb(oid, &oi, data->cb_data); - } else { - return data->cb(oid, NULL, data->cb_data); - } -} - -static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, - void *node_data UNUSED, - void *cb_data) -{ - struct for_each_object_wrapper_data *data = cb_data; - if (data->request) { - struct object_info oi = *data->request; - - if (odb_source_read_object_info(&data->loose->base, - oid, &oi, 0) < 0) - return -1; - - return data->cb(oid, &oi, data->cb_data); - } else { - return data->cb(oid, NULL, data->cb_data); - } -} - -int odb_source_loose_for_each_object(struct odb_source *source, - const struct object_info *request, - odb_for_each_object_cb cb, - void *cb_data, - const struct odb_for_each_object_options *opts) -{ - struct odb_source_files *files = odb_source_files_downcast(source); - struct for_each_object_wrapper_data data = { - .loose = files->loose, - .request = request, - .cb = cb, - .cb_data = cb_data, - }; - - /* There are no loose promisor objects, so we can return immediately. */ - if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) - return 0; - if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) - return 0; - - if (opts->prefix) - return oidtree_each(odb_source_loose_cache(source, opts->prefix), - opts->prefix, opts->prefix_hex_len, - for_each_prefixed_object_wrapper_cb, &data); - - return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, - NULL, NULL, &data); -} - static int count_loose_object(const struct object_id *oid UNUSED, struct object_info *oi UNUSED, void *payload) @@ -1843,6 +1615,7 @@ int odb_source_loose_count_objects(struct odb_source *source, enum odb_count_objects_flags flags, unsigned long *out) { + struct odb_source_files *files = odb_source_files_downcast(source); const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2; char *path = NULL; DIR *dir = NULL; @@ -1878,8 +1651,8 @@ int odb_source_loose_count_objects(struct odb_source *source, } else { struct odb_for_each_object_options opts = { 0 }; *out = 0; - ret = odb_source_loose_for_each_object(source, NULL, count_loose_object, - out, &opts); + ret = odb_source_for_each_object(&files->loose->base, NULL, count_loose_object, + out, &opts); } out: @@ -1910,6 +1683,7 @@ int odb_source_loose_find_abbrev_len(struct odb_source *source, unsigned min_len, unsigned *out) { + struct odb_source_files *files = odb_source_files_downcast(source); struct odb_for_each_object_options opts = { .prefix = oid, .prefix_hex_len = min_len, @@ -1920,54 +1694,13 @@ int odb_source_loose_find_abbrev_len(struct odb_source *source, }; int ret; - ret = odb_source_loose_for_each_object(source, NULL, find_abbrev_len_cb, - &data, &opts); + ret = odb_source_for_each_object(&files->loose->base, NULL, find_abbrev_len_cb, + &data, &opts); *out = data.len; return ret; } -static int append_loose_object(const struct object_id *oid, - const char *path UNUSED, - void *data) -{ - oidtree_insert(data, oid, NULL); - return 0; -} - -static struct oidtree *odb_source_loose_cache(struct odb_source *source, - const struct object_id *oid) -{ - struct odb_source_files *files = odb_source_files_downcast(source); - int subdir_nr = oid->hash[0]; - struct strbuf buf = STRBUF_INIT; - size_t word_bits = bitsizeof(files->loose->subdir_seen[0]); - size_t word_index = subdir_nr / word_bits; - size_t mask = (size_t)1u << (subdir_nr % word_bits); - uint32_t *bitmap; - - if (subdir_nr < 0 || - (size_t) subdir_nr >= bitsizeof(files->loose->subdir_seen)) - BUG("subdir_nr out of range"); - - bitmap = &files->loose->subdir_seen[word_index]; - if (*bitmap & mask) - return files->loose->cache; - if (!files->loose->cache) { - ALLOC_ARRAY(files->loose->cache, 1); - oidtree_init(files->loose->cache); - } - strbuf_addstr(&buf, source->path); - for_each_file_in_obj_subdir(subdir_nr, &buf, - source->odb->repo->hash_algo, - append_loose_object, - NULL, NULL, - files->loose->cache); - *bitmap |= mask; - strbuf_release(&buf); - return files->loose->cache; -} - static int check_stream_oid(git_zstream *stream, const char *hdr, unsigned long size, diff --git a/object-file.h b/object-file.h index d93b7ffad704b0..9ee5649220931b 100644 --- a/object-file.h +++ b/object-file.h @@ -6,6 +6,9 @@ #include "odb.h" #include "odb/source-loose.h" +/* The maximum size for an object header. */ +#define MAX_HEADER_LEN 32 + struct index_state; enum { @@ -85,19 +88,13 @@ int for_each_loose_file_in_source(struct odb_source *source, each_loose_cruft_fn cruft_cb, each_loose_subdir_fn subdir_cb, void *data); - -/* - * Iterate through all loose objects in the given object database source and - * invoke the callback function for each of them. If an object info request is - * given, then the object info will be read for every individual object and - * passed to the callback as if `odb_source_loose_read_object_info()` was - * called for the object. - */ -int odb_source_loose_for_each_object(struct odb_source *source, - const struct object_info *request, - odb_for_each_object_cb cb, - void *cb_data, - const struct odb_for_each_object_options *opts); +int for_each_file_in_obj_subdir(unsigned int subdir_nr, + struct strbuf *path, + const struct git_hash_algo *algop, + each_loose_object_fn obj_cb, + each_loose_cruft_fn cruft_cb, + each_loose_subdir_fn subdir_cb, + void *data); /* * Count the number of loose objects in this source. @@ -188,12 +185,6 @@ int read_loose_object(struct repository *repo, void **contents, struct object_info *oi); -int read_object_info_from_path(struct odb_source_loose *loose, - const char *path, - const struct object_id *oid, - struct object_info *oi, - enum object_info_flags flags); - enum unpack_loose_header_result { ULHR_OK, ULHR_BAD, @@ -217,6 +208,9 @@ enum unpack_loose_header_result unpack_loose_header(git_zstream *stream, unsigned long mapsize, void *buffer, unsigned long bufsiz); +void *unpack_loose_rest(git_zstream *stream, + void *buffer, unsigned long size, + const struct object_id *oid); int parse_loose_header(const char *hdr, struct object_info *oi); diff --git a/odb/source-files.c b/odb/source-files.c index 90806ddf86b662..676a641739bcbf 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -82,7 +82,7 @@ static int odb_source_files_for_each_object(struct odb_source *source, int ret; if (!(opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) { - ret = odb_source_loose_for_each_object(source, request, cb, cb_data, opts); + ret = odb_source_for_each_object(&files->loose->base, request, cb, cb_data, opts); if (ret) return ret; } diff --git a/odb/source-loose.c b/odb/source-loose.c index 4b82c6f316512e..4e8b923498b5b2 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -2,6 +2,7 @@ #include "abspath.h" #include "chdir-notify.h" #include "gettext.h" +#include "hex.h" #include "loose.h" #include "object-file.h" #include "odb.h" @@ -9,8 +10,198 @@ #include "odb/source-loose.h" #include "odb/streaming.h" #include "oidtree.h" +#include "repository.h" #include "strbuf.h" +static int append_loose_object(const struct object_id *oid, + const char *path UNUSED, + void *data) +{ + oidtree_insert(data, oid, NULL); + return 0; +} + +static struct oidtree *odb_source_loose_cache(struct odb_source_loose *loose, + const struct object_id *oid) +{ + int subdir_nr = oid->hash[0]; + struct strbuf buf = STRBUF_INIT; + size_t word_bits = bitsizeof(loose->subdir_seen[0]); + size_t word_index = subdir_nr / word_bits; + size_t mask = (size_t)1u << (subdir_nr % word_bits); + uint32_t *bitmap; + + if (subdir_nr < 0 || + (size_t) subdir_nr >= bitsizeof(loose->subdir_seen)) + BUG("subdir_nr out of range"); + + bitmap = &loose->subdir_seen[word_index]; + if (*bitmap & mask) + return loose->cache; + if (!loose->cache) { + ALLOC_ARRAY(loose->cache, 1); + oidtree_init(loose->cache); + } + strbuf_addstr(&buf, loose->base.path); + for_each_file_in_obj_subdir(subdir_nr, &buf, + loose->base.odb->repo->hash_algo, + append_loose_object, + NULL, NULL, + loose->cache); + *bitmap |= mask; + strbuf_release(&buf); + return loose->cache; +} + +static int quick_has_loose(struct odb_source_loose *loose, + const struct object_id *oid) +{ + return !!oidtree_contains(odb_source_loose_cache(loose, oid), oid); +} + +static int read_object_info_from_path(struct odb_source_loose *loose, + const char *path, + const struct object_id *oid, + struct object_info *oi, + enum object_info_flags flags) +{ + int ret; + int fd; + unsigned long mapsize; + void *map = NULL; + git_zstream stream, *stream_to_end = NULL; + char hdr[MAX_HEADER_LEN]; + unsigned long size_scratch; + enum object_type type_scratch; + struct stat st; + + /* + * If we don't care about type or size, then we don't + * need to look inside the object at all. Note that we + * do not optimize out the stat call, even if the + * caller doesn't care about the disk-size, since our + * return value implicitly indicates whether the + * object even exists. + */ + if (!oi || (!oi->typep && !oi->sizep && !oi->contentp)) { + struct stat st; + + if ((!oi || (!oi->disk_sizep && !oi->mtimep)) && (flags & OBJECT_INFO_QUICK)) { + ret = quick_has_loose(loose, oid) ? 0 : -1; + goto out; + } + + if (lstat(path, &st) < 0) { + ret = -1; + goto out; + } + + if (oi) { + if (oi->disk_sizep) + *oi->disk_sizep = st.st_size; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; + } + + ret = 0; + goto out; + } + + fd = git_open(path); + if (fd < 0) { + if (errno != ENOENT) + error_errno(_("unable to open loose object %s"), oid_to_hex(oid)); + ret = -1; + goto out; + } + + if (fstat(fd, &st)) { + close(fd); + ret = -1; + goto out; + } + + mapsize = xsize_t(st.st_size); + if (!mapsize) { + close(fd); + ret = error(_("object file %s is empty"), path); + goto out; + } + + map = xmmap(NULL, mapsize, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + if (!map) { + ret = -1; + goto out; + } + + if (oi->disk_sizep) + *oi->disk_sizep = mapsize; + if (oi->mtimep) + *oi->mtimep = st.st_mtime; + + stream_to_end = &stream; + + switch (unpack_loose_header(&stream, map, mapsize, hdr, sizeof(hdr))) { + case ULHR_OK: + if (!oi->sizep) + oi->sizep = &size_scratch; + if (!oi->typep) + oi->typep = &type_scratch; + + if (parse_loose_header(hdr, oi) < 0) { + ret = error(_("unable to parse %s header"), oid_to_hex(oid)); + goto corrupt; + } + + if (*oi->typep < 0) + die(_("invalid object type")); + + if (oi->contentp) { + *oi->contentp = unpack_loose_rest(&stream, hdr, *oi->sizep, oid); + if (!*oi->contentp) { + ret = -1; + goto corrupt; + } + } + + break; + case ULHR_BAD: + ret = error(_("unable to unpack %s header"), + oid_to_hex(oid)); + goto corrupt; + case ULHR_TOO_LONG: + ret = error(_("header for %s too long, exceeds %d bytes"), + oid_to_hex(oid), MAX_HEADER_LEN); + goto corrupt; + } + + ret = 0; + +corrupt: + if (ret && (flags & OBJECT_INFO_DIE_IF_CORRUPT)) + die(_("loose object %s (stored in %s) is corrupt"), + oid_to_hex(oid), path); + +out: + if (stream_to_end) + git_inflate_end(stream_to_end); + if (map) + munmap(map, mapsize); + if (oi) { + if (oi->sizep == &size_scratch) + oi->sizep = NULL; + if (oi->typep == &type_scratch) + oi->typep = NULL; + if (oi->delta_base_oid) + oidclr(oi->delta_base_oid, loose->base.odb->repo->hash_algo); + if (!ret) + oi->whence = OI_LOOSE; + } + + return ret; +} + static int odb_source_loose_read_object_info(struct odb_source *source, const struct object_id *oid, struct object_info *oi, @@ -218,6 +409,78 @@ static int odb_source_loose_read_object_stream(struct odb_read_stream **out, return -1; } +struct for_each_object_wrapper_data { + struct odb_source_loose *loose; + const struct object_info *request; + odb_for_each_object_cb cb; + void *cb_data; +}; + +static int for_each_object_wrapper_cb(const struct object_id *oid, + const char *path, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + + if (data->request) { + struct object_info oi = *data->request; + + if (read_object_info_from_path(data->loose, path, oid, &oi, 0) < 0) + return -1; + + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + +static int for_each_prefixed_object_wrapper_cb(const struct object_id *oid, + void *node_data UNUSED, + void *cb_data) +{ + struct for_each_object_wrapper_data *data = cb_data; + if (data->request) { + struct object_info oi = *data->request; + + if (odb_source_read_object_info(&data->loose->base, + oid, &oi, 0) < 0) + return -1; + + return data->cb(oid, &oi, data->cb_data); + } else { + return data->cb(oid, NULL, data->cb_data); + } +} + +static int odb_source_loose_for_each_object(struct odb_source *source, + const struct object_info *request, + odb_for_each_object_cb cb, + void *cb_data, + const struct odb_for_each_object_options *opts) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + struct for_each_object_wrapper_data data = { + .loose = loose, + .request = request, + .cb = cb, + .cb_data = cb_data, + }; + + /* There are no loose promisor objects, so we can return immediately. */ + if ((opts->flags & ODB_FOR_EACH_OBJECT_PROMISOR_ONLY)) + return 0; + if ((opts->flags & ODB_FOR_EACH_OBJECT_LOCAL_ONLY) && !source->local) + return 0; + + if (opts->prefix) + return oidtree_each(odb_source_loose_cache(loose, opts->prefix), + opts->prefix, opts->prefix_hex_len, + for_each_prefixed_object_wrapper_cb, &data); + + return for_each_loose_file_in_source(source, for_each_object_wrapper_cb, + NULL, NULL, &data); +} + static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { oidtree_clear(loose->cache); @@ -273,6 +536,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) loose->base.reprepare = odb_source_loose_reprepare; loose->base.read_object_info = odb_source_loose_read_object_info; loose->base.read_object_stream = odb_source_loose_read_object_stream; + loose->base.for_each_object = odb_source_loose_for_each_object; if (!is_absolute_path(loose->base.path)) chdir_notify_register(NULL, odb_source_loose_reparent, loose); From 8a6da81cc113607bdc1ac08395f6e7121cd652e9 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:32 +0200 Subject: [PATCH 17/30] odb/source-loose: wire up `find_abbrev_len()` callback Move `odb_source_loose_find_abbrev_len()` and its associated helpers from "object-file.c" into "odb/source-loose.c" and wire it up as the `find_abbrev_len` callback of the loose source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 39 --------------------------------------- object-file.h | 12 ------------ odb/source-files.c | 2 +- odb/source-loose.c | 40 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 41 insertions(+), 52 deletions(-) diff --git a/object-file.c b/object-file.c index 157ecad3ea204a..11957aa44f44fd 100644 --- a/object-file.c +++ b/object-file.c @@ -1662,45 +1662,6 @@ int odb_source_loose_count_objects(struct odb_source *source, return ret; } -struct find_abbrev_len_data { - const struct object_id *oid; - unsigned len; -}; - -static int find_abbrev_len_cb(const struct object_id *oid, - struct object_info *oi UNUSED, - void *cb_data) -{ - struct find_abbrev_len_data *data = cb_data; - unsigned len = oid_common_prefix_hexlen(oid, data->oid); - if (len != hash_algos[oid->algo].hexsz && len >= data->len) - data->len = len + 1; - return 0; -} - -int odb_source_loose_find_abbrev_len(struct odb_source *source, - const struct object_id *oid, - unsigned min_len, - unsigned *out) -{ - struct odb_source_files *files = odb_source_files_downcast(source); - struct odb_for_each_object_options opts = { - .prefix = oid, - .prefix_hex_len = min_len, - }; - struct find_abbrev_len_data data = { - .oid = oid, - .len = min_len, - }; - int ret; - - ret = odb_source_for_each_object(&files->loose->base, NULL, find_abbrev_len_cb, - &data, &opts); - *out = data.len; - - return ret; -} - static int check_stream_oid(git_zstream *stream, const char *hdr, unsigned long size, diff --git a/object-file.h b/object-file.h index 9ee5649220931b..96760db0e1cb2b 100644 --- a/object-file.h +++ b/object-file.h @@ -110,18 +110,6 @@ int odb_source_loose_count_objects(struct odb_source *source, enum odb_count_objects_flags flags, unsigned long *out); -/* - * Find the shortest unique prefix for the given object ID, where `min_len` is - * the minimum length that the prefix should have. - * - * Returns 0 on success, in which case the computed length will be written to - * `out`. Otherwise, a negative error code is returned. - */ -int odb_source_loose_find_abbrev_len(struct odb_source *source, - const struct object_id *oid, - unsigned min_len, - unsigned *out); - /** * format_object_header() is a thin wrapper around s xsnprintf() that * writes the initial " " part of the loose object diff --git a/odb/source-files.c b/odb/source-files.c index 676a641739bcbf..4a54b10e4af11d 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -136,7 +136,7 @@ static int odb_source_files_find_abbrev_len(struct odb_source *source, if (ret < 0) goto out; - ret = odb_source_loose_find_abbrev_len(source, oid, len, &len); + ret = odb_source_find_abbrev_len(&files->loose->base, oid, len, &len); if (ret < 0) goto out; diff --git a/odb/source-loose.c b/odb/source-loose.c index 4e8b923498b5b2..4b8d10bc870374 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -481,6 +481,45 @@ static int odb_source_loose_for_each_object(struct odb_source *source, NULL, NULL, &data); } +struct find_abbrev_len_data { + const struct object_id *oid; + unsigned len; +}; + +static int find_abbrev_len_cb(const struct object_id *oid, + struct object_info *oi UNUSED, + void *cb_data) +{ + struct find_abbrev_len_data *data = cb_data; + unsigned len = oid_common_prefix_hexlen(oid, data->oid); + if (len != hash_algos[oid->algo].hexsz && len >= data->len) + data->len = len + 1; + return 0; +} + +static int odb_source_loose_find_abbrev_len(struct odb_source *source, + const struct object_id *oid, + unsigned min_len, + unsigned *out) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + struct odb_for_each_object_options opts = { + .prefix = oid, + .prefix_hex_len = min_len, + }; + struct find_abbrev_len_data data = { + .oid = oid, + .len = min_len, + }; + int ret; + + ret = odb_source_for_each_object(&loose->base, NULL, find_abbrev_len_cb, + &data, &opts); + *out = data.len; + + return ret; +} + static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { oidtree_clear(loose->cache); @@ -537,6 +576,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) loose->base.read_object_info = odb_source_loose_read_object_info; loose->base.read_object_stream = odb_source_loose_read_object_stream; loose->base.for_each_object = odb_source_loose_for_each_object; + loose->base.find_abbrev_len = odb_source_loose_find_abbrev_len; if (!is_absolute_path(loose->base.path)) chdir_notify_register(NULL, odb_source_loose_reparent, loose); From 2ade08ac2978dc1c908602c2a4d653836ecb5acb Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:33 +0200 Subject: [PATCH 18/30] odb/source-loose: wire up `count_objects()` callback Move `odb_source_loose_count_objects()` and its associated helpers from "object-file.c" into "odb/source-loose.c" and wire it up as the `count_objects()` callback of the loose source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/gc.c | 6 ++--- object-file.c | 60 --------------------------------------------- object-file.h | 14 ----------- odb/source-files.c | 2 +- odb/source-loose.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 65 insertions(+), 78 deletions(-) diff --git a/builtin/gc.c b/builtin/gc.c index 84a66d32404e4d..c26c93ee0fe4a3 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -466,6 +466,7 @@ static int rerere_gc_condition(struct gc_config *cfg UNUSED) static int too_many_loose_objects(int limit) { + struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources); /* * This is weird, but stems from legacy behaviour: the GC auto * threshold was always essentially interpreted as if it was rounded up @@ -474,9 +475,8 @@ static int too_many_loose_objects(int limit) int auto_threshold = DIV_ROUND_UP(limit, 256) * 256; unsigned long loose_count; - if (odb_source_loose_count_objects(the_repository->objects->sources, - ODB_COUNT_OBJECTS_APPROXIMATE, - &loose_count) < 0) + if (odb_source_count_objects(&files->loose->base, ODB_COUNT_OBJECTS_APPROXIMATE, + &loose_count) < 0) return 0; return loose_count > auto_threshold; diff --git a/object-file.c b/object-file.c index 11957aa44f44fd..9b2044de3784e6 100644 --- a/object-file.c +++ b/object-file.c @@ -1602,66 +1602,6 @@ int for_each_loose_file_in_source(struct odb_source *source, return r; } -static int count_loose_object(const struct object_id *oid UNUSED, - struct object_info *oi UNUSED, - void *payload) -{ - unsigned long *count = payload; - (*count)++; - return 0; -} - -int odb_source_loose_count_objects(struct odb_source *source, - enum odb_count_objects_flags flags, - unsigned long *out) -{ - struct odb_source_files *files = odb_source_files_downcast(source); - const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2; - char *path = NULL; - DIR *dir = NULL; - int ret; - - if (flags & ODB_COUNT_OBJECTS_APPROXIMATE) { - unsigned long count = 0; - struct dirent *ent; - - path = xstrfmt("%s/17", source->path); - - dir = opendir(path); - if (!dir) { - if (errno == ENOENT) { - *out = 0; - ret = 0; - goto out; - } - - ret = error_errno("cannot open object shard '%s'", path); - goto out; - } - - while ((ent = readdir(dir)) != NULL) { - if (strspn(ent->d_name, "0123456789abcdef") != hexsz || - ent->d_name[hexsz] != '\0') - continue; - count++; - } - - *out = count * 256; - ret = 0; - } else { - struct odb_for_each_object_options opts = { 0 }; - *out = 0; - ret = odb_source_for_each_object(&files->loose->base, NULL, count_loose_object, - out, &opts); - } - -out: - if (dir) - closedir(dir); - free(path); - return ret; -} - static int check_stream_oid(git_zstream *stream, const char *hdr, unsigned long size, diff --git a/object-file.h b/object-file.h index 96760db0e1cb2b..bc72d89f548915 100644 --- a/object-file.h +++ b/object-file.h @@ -96,20 +96,6 @@ int for_each_file_in_obj_subdir(unsigned int subdir_nr, each_loose_subdir_fn subdir_cb, void *data); -/* - * Count the number of loose objects in this source. - * - * The object count is approximated by opening a single sharding directory for - * loose objects and scanning its contents. The result is then extrapolated by - * 256. This should generally work as a reasonable estimate given that the - * object hash is supposed to be indistinguishable from random. - * - * Returns 0 on success, a negative error code otherwise. - */ -int odb_source_loose_count_objects(struct odb_source *source, - enum odb_count_objects_flags flags, - unsigned long *out); - /** * format_object_header() is a thin wrapper around s xsnprintf() that * writes the initial " " part of the loose object diff --git a/odb/source-files.c b/odb/source-files.c index 4a54b10e4af11d..d5454e170dee66 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -109,7 +109,7 @@ static int odb_source_files_count_objects(struct odb_source *source, if (!(flags & ODB_COUNT_OBJECTS_APPROXIMATE)) { unsigned long loose_count; - ret = odb_source_loose_count_objects(source, flags, &loose_count); + ret = odb_source_count_objects(&files->loose->base, flags, &loose_count); if (ret < 0) goto out; diff --git a/odb/source-loose.c b/odb/source-loose.c index 4b8d10bc870374..27be066327a313 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -520,6 +520,66 @@ static int odb_source_loose_find_abbrev_len(struct odb_source *source, return ret; } +static int count_loose_object(const struct object_id *oid UNUSED, + struct object_info *oi UNUSED, + void *payload) +{ + unsigned long *count = payload; + (*count)++; + return 0; +} + +static int odb_source_loose_count_objects(struct odb_source *source, + enum odb_count_objects_flags flags, + unsigned long *out) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + const unsigned hexsz = source->odb->repo->hash_algo->hexsz - 2; + char *path = NULL; + DIR *dir = NULL; + int ret; + + if (flags & ODB_COUNT_OBJECTS_APPROXIMATE) { + unsigned long count = 0; + struct dirent *ent; + + path = xstrfmt("%s/17", source->path); + + dir = opendir(path); + if (!dir) { + if (errno == ENOENT) { + *out = 0; + ret = 0; + goto out; + } + + ret = error_errno("cannot open object shard '%s'", path); + goto out; + } + + while ((ent = readdir(dir)) != NULL) { + if (strspn(ent->d_name, "0123456789abcdef") != hexsz || + ent->d_name[hexsz] != '\0') + continue; + count++; + } + + *out = count * 256; + ret = 0; + } else { + struct odb_for_each_object_options opts = { 0 }; + *out = 0; + ret = odb_source_for_each_object(&loose->base, NULL, count_loose_object, + out, &opts); + } + +out: + if (dir) + closedir(dir); + free(path); + return ret; +} + static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { oidtree_clear(loose->cache); @@ -577,6 +637,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) loose->base.read_object_stream = odb_source_loose_read_object_stream; loose->base.for_each_object = odb_source_loose_for_each_object; loose->base.find_abbrev_len = odb_source_loose_find_abbrev_len; + loose->base.count_objects = odb_source_loose_count_objects; if (!is_absolute_path(loose->base.path)) chdir_notify_register(NULL, odb_source_loose_reparent, loose); From 86f7ab5a1f12ecfdf51b6df0b9b014e2329944be Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:34 +0200 Subject: [PATCH 19/30] odb/source-loose: drop `odb_source_loose_has_object()` The function `odb_source_loose_has_object()` checks whether a specific object exists as a loose object on disk by using lstat(3p). This interface is somewhat redundant, as we typically check for object existence in a generic way via `odb_source_read_object_info()`. In fact, these two calls are redundant in case the latter is called in a specific way: when called without an object info request and without the `OBJECT_INFO_QUICK` flag, then we will end up doing the same call to lstat(3p) in `read_object_info_from_path()`. Drop the function and adapt callers to instead use the generic interface so that its calling conventions align with that of other sources. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/pack-objects.c | 12 ++++++++---- object-file.c | 12 ++++-------- object-file.h | 8 -------- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c index 480cc0bd8c8d22..a6be3d659f8e36 100644 --- a/builtin/pack-objects.c +++ b/builtin/pack-objects.c @@ -1750,9 +1750,11 @@ static int want_object_in_pack_mtime(const struct object_id *oid, * skip the local object source. */ struct odb_source *source = the_repository->objects->sources->next; - for (; source; source = source->next) - if (odb_source_loose_has_object(source, oid)) + for (; source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + if (!odb_source_read_object_info(&files->loose->base, oid, NULL, 0)) return 0; + } } /* @@ -4135,9 +4137,11 @@ static void add_cruft_object_entry(const struct object_id *oid, enum object_type struct odb_source *source = the_repository->objects->sources; int found = 0; - for (; !found && source; source = source->next) - if (odb_source_loose_has_object(source, oid)) + for (; !found && source; source = source->next) { + struct odb_source_files *files = odb_source_files_downcast(source); + if (!odb_source_read_object_info(&files->loose->base, oid, NULL, 0)) found = 1; + } /* * If a traversed tree has a missing blob then we want diff --git a/object-file.c b/object-file.c index 9b2044de3784e6..c83136cf70024c 100644 --- a/object-file.c +++ b/object-file.c @@ -96,12 +96,6 @@ static int check_and_freshen_source(struct odb_source *source, return check_and_freshen_file(path.buf, freshen); } -int odb_source_loose_has_object(struct odb_source *source, - const struct object_id *oid) -{ - return check_and_freshen_source(source, oid, 0); -} - int format_object_header(char *str, size_t size, enum object_type type, size_t objsize) { @@ -1000,9 +994,11 @@ int force_object_loose(struct odb_source *source, int hdrlen; int ret; - for (struct odb_source *s = source->odb->sources; s; s = s->next) - if (odb_source_loose_has_object(s, oid)) + for (struct odb_source *s = source->odb->sources; s; s = s->next) { + struct odb_source_files *files = odb_source_files_downcast(s); + if (!odb_source_read_object_info(&files->loose->base, oid, NULL, 0)) return 0; + } oi.typep = &type; oi.sizep = &len; diff --git a/object-file.h b/object-file.h index bc72d89f548915..506ca6be40b749 100644 --- a/object-file.h +++ b/object-file.h @@ -23,14 +23,6 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa struct object_info; struct odb_source; -/* - * Return true iff an object database source has a loose object - * with the specified name. This function does not respect replace - * references. - */ -int odb_source_loose_has_object(struct odb_source *source, - const struct object_id *oid); - int odb_source_loose_freshen_object(struct odb_source *source, const struct object_id *oid); From d8b9e8bb23ece128179ad54ed5ecbcd4bd809b1e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:35 +0200 Subject: [PATCH 20/30] odb/source-loose: wire up `freshen_object()` callback Move `odb_source_loose_freshen_object()` from "object-file.c" into "odb/source-loose.c" and wire it up as the `freshen_object()` callback of the loose source. As part of the move, `check_and_freshen_source()` is inlined into the callback function, as it has no other callers anymore. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 15 --------------- object-file.h | 3 --- odb/source-files.c | 2 +- odb/source-loose.c | 9 +++++++++ 4 files changed, 10 insertions(+), 19 deletions(-) diff --git a/object-file.c b/object-file.c index c83136cf70024c..0689a4e67b156a 100644 --- a/object-file.c +++ b/object-file.c @@ -87,15 +87,6 @@ int check_and_freshen_file(const char *fn, int freshen) return 1; } -static int check_and_freshen_source(struct odb_source *source, - const struct object_id *oid, - int freshen) -{ - static struct strbuf path = STRBUF_INIT; - odb_loose_path(source, &path, oid); - return check_and_freshen_file(path.buf, freshen); -} - int format_object_header(char *str, size_t size, enum object_type type, size_t objsize) { @@ -815,12 +806,6 @@ static int write_loose_object(struct odb_source *source, FOF_SKIP_COLLISION_CHECK); } -int odb_source_loose_freshen_object(struct odb_source *source, - const struct object_id *oid) -{ - return !!check_and_freshen_source(source, oid, 1); -} - int odb_source_loose_write_stream(struct odb_source *source, struct odb_write_stream *in_stream, size_t len, struct object_id *oid) diff --git a/object-file.h b/object-file.h index 506ca6be40b749..1d90df9d98b78e 100644 --- a/object-file.h +++ b/object-file.h @@ -23,9 +23,6 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa struct object_info; struct odb_source; -int odb_source_loose_freshen_object(struct odb_source *source, - const struct object_id *oid); - int odb_source_loose_write_object(struct odb_source *source, const void *buf, unsigned long len, enum object_type type, struct object_id *oid, diff --git a/odb/source-files.c b/odb/source-files.c index d5454e170dee66..ef548e6fe69cd0 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -152,7 +152,7 @@ static int odb_source_files_freshen_object(struct odb_source *source, { struct odb_source_files *files = odb_source_files_downcast(source); if (packfile_store_freshen_object(files->packed, oid) || - odb_source_loose_freshen_object(source, oid)) + odb_source_freshen_object(&files->loose->base, oid)) return 1; return 0; } diff --git a/odb/source-loose.c b/odb/source-loose.c index 27be066327a313..e519365d23f680 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -580,6 +580,14 @@ static int odb_source_loose_count_objects(struct odb_source *source, return ret; } +static int odb_source_loose_freshen_object(struct odb_source *source, + const struct object_id *oid) +{ + static struct strbuf path = STRBUF_INIT; + odb_loose_path(source, &path, oid); + return !!check_and_freshen_file(path.buf, 1); +} + static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { oidtree_clear(loose->cache); @@ -638,6 +646,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) loose->base.for_each_object = odb_source_loose_for_each_object; loose->base.find_abbrev_len = odb_source_loose_find_abbrev_len; loose->base.count_objects = odb_source_loose_count_objects; + loose->base.freshen_object = odb_source_loose_freshen_object; if (!is_absolute_path(loose->base.path)) chdir_notify_register(NULL, odb_source_loose_reparent, loose); From 87588db131a5c1c33471606860951c9959bbe6ae Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:36 +0200 Subject: [PATCH 21/30] loose: refactor object map to operate on `struct odb_source_loose` While the loose object map functions in "loose.c" accept a generic `struct odb_source *`, they always expect this to be the "files" backend. Furthermore, the subsystem doesn't even care about the "files" backend, but only uses it as a stepping stone to get to the "loose" backend. This assumption is implicit and thus not immediately obvious. Refactor the interfaces to instead operate on a `struct odb_source_loose` instead, which eliminates the implicit dependency and unnecessary detour via the "files" source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- loose.c | 45 ++++++++++++++++++++++----------------------- loose.h | 4 ++-- object-file.c | 9 ++++++--- 3 files changed, 30 insertions(+), 28 deletions(-) diff --git a/loose.c b/loose.c index f7a3dd1a72f0fc..0b626c1b854642 100644 --- a/loose.c +++ b/loose.c @@ -46,38 +46,36 @@ static int insert_oid_pair(kh_oid_map_t *map, const struct object_id *key, const return 1; } -static int insert_loose_map(struct odb_source *source, +static int insert_loose_map(struct odb_source_loose *loose, const struct object_id *oid, const struct object_id *compat_oid) { - struct odb_source_files *files = odb_source_files_downcast(source); - struct loose_object_map *map = files->loose->map; + struct loose_object_map *map = loose->map; int inserted = 0; inserted |= insert_oid_pair(map->to_compat, oid, compat_oid); inserted |= insert_oid_pair(map->to_storage, compat_oid, oid); if (inserted) - oidtree_insert(files->loose->cache, compat_oid, NULL); + oidtree_insert(loose->cache, compat_oid, NULL); return inserted; } -static int load_one_loose_object_map(struct repository *repo, struct odb_source *source) +static int load_one_loose_object_map(struct repository *repo, struct odb_source_loose *loose) { - struct odb_source_files *files = odb_source_files_downcast(source); struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; FILE *fp; - if (!files->loose->map) - loose_object_map_init(&files->loose->map); - if (!files->loose->cache) { - ALLOC_ARRAY(files->loose->cache, 1); - oidtree_init(files->loose->cache); + if (!loose->map) + loose_object_map_init(&loose->map); + if (!loose->cache) { + ALLOC_ARRAY(loose->cache, 1); + oidtree_init(loose->cache); } - insert_loose_map(source, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree); - insert_loose_map(source, repo->hash_algo->empty_blob, repo->compat_hash_algo->empty_blob); - insert_loose_map(source, repo->hash_algo->null_oid, repo->compat_hash_algo->null_oid); + insert_loose_map(loose, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree); + insert_loose_map(loose, repo->hash_algo->empty_blob, repo->compat_hash_algo->empty_blob); + insert_loose_map(loose, repo->hash_algo->null_oid, repo->compat_hash_algo->null_oid); repo_common_path_replace(repo, &path, "objects/loose-object-idx"); fp = fopen(path.buf, "rb"); @@ -97,7 +95,7 @@ static int load_one_loose_object_map(struct repository *repo, struct odb_source parse_oid_hex_algop(p, &compat_oid, &p, repo->compat_hash_algo) || p != buf.buf + buf.len) goto err; - insert_loose_map(source, &oid, &compat_oid); + insert_loose_map(loose, &oid, &compat_oid); } strbuf_release(&buf); @@ -119,7 +117,8 @@ int repo_read_loose_object_map(struct repository *repo) odb_prepare_alternates(repo->objects); for (source = repo->objects->sources; source; source = source->next) { - if (load_one_loose_object_map(repo, source) < 0) { + struct odb_source_files *files = odb_source_files_downcast(source); + if (load_one_loose_object_map(repo, files->loose) < 0) { return -1; } } @@ -171,7 +170,7 @@ int repo_write_loose_object_map(struct repository *repo) return -1; } -static int write_one_object(struct odb_source *source, +static int write_one_object(struct odb_source_loose *loose, const struct object_id *oid, const struct object_id *compat_oid) { @@ -180,7 +179,7 @@ static int write_one_object(struct odb_source *source, struct stat st; struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; - strbuf_addf(&path, "%s/loose-object-idx", source->path); + strbuf_addf(&path, "%s/loose-object-idx", loose->base.path); hold_lock_file_for_update_timeout(&lock, path.buf, LOCK_DIE_ON_ERROR, -1); fd = open(path.buf, O_WRONLY | O_CREAT | O_APPEND, 0666); @@ -196,7 +195,7 @@ static int write_one_object(struct odb_source *source, goto errout; if (close(fd)) goto errout; - adjust_shared_perm(source->odb->repo, path.buf); + adjust_shared_perm(loose->base.odb->repo, path.buf); rollback_lock_file(&lock); strbuf_release(&buf); strbuf_release(&path); @@ -210,18 +209,18 @@ static int write_one_object(struct odb_source *source, return -1; } -int repo_add_loose_object_map(struct odb_source *source, +int repo_add_loose_object_map(struct odb_source_loose *loose, const struct object_id *oid, const struct object_id *compat_oid) { int inserted = 0; - if (!should_use_loose_object_map(source->odb->repo)) + if (!should_use_loose_object_map(loose->base.odb->repo)) return 0; - inserted = insert_loose_map(source, oid, compat_oid); + inserted = insert_loose_map(loose, oid, compat_oid); if (inserted) - return write_one_object(source, oid, compat_oid); + return write_one_object(loose, oid, compat_oid); return 0; } diff --git a/loose.h b/loose.h index 6af1702973c058..6c9b3f4571602f 100644 --- a/loose.h +++ b/loose.h @@ -4,7 +4,7 @@ #include "khash.h" struct repository; -struct odb_source; +struct odb_source_loose; struct loose_object_map { kh_oid_map_t *to_compat; @@ -17,7 +17,7 @@ int repo_loose_object_map_oid(struct repository *repo, const struct object_id *src, const struct git_hash_algo *dest_algo, struct object_id *dest); -int repo_add_loose_object_map(struct odb_source *source, +int repo_add_loose_object_map(struct odb_source_loose *loose, const struct object_id *oid, const struct object_id *compat_oid); int repo_read_loose_object_map(struct repository *repo); diff --git a/object-file.c b/object-file.c index 0689a4e67b156a..fe24f00d1b79bf 100644 --- a/object-file.c +++ b/object-file.c @@ -810,6 +810,7 @@ int odb_source_loose_write_stream(struct odb_source *source, struct odb_write_stream *in_stream, size_t len, struct object_id *oid) { + struct odb_source_files *files = odb_source_files_downcast(source); const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; struct object_id compat_oid; int fd, ret, err = 0, flush = 0; @@ -918,7 +919,7 @@ int odb_source_loose_write_stream(struct odb_source *source, err = finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf, FOF_SKIP_COLLISION_CHECK); if (!err && compat) - err = repo_add_loose_object_map(source, oid, &compat_oid); + err = repo_add_loose_object_map(files->loose, oid, &compat_oid); cleanup: strbuf_release(&tmp_file); strbuf_release(&filename); @@ -931,6 +932,7 @@ int odb_source_loose_write_object(struct odb_source *source, struct object_id *compat_oid_in, enum odb_write_object_flags flags) { + struct odb_source_files *files = odb_source_files_downcast(source); const struct git_hash_algo *algo = source->odb->repo->hash_algo; const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; struct object_id compat_oid; @@ -962,13 +964,14 @@ int odb_source_loose_write_object(struct odb_source *source, if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags)) return -1; if (compat) - return repo_add_loose_object_map(source, oid, &compat_oid); + return repo_add_loose_object_map(files->loose, oid, &compat_oid); return 0; } int force_object_loose(struct odb_source *source, const struct object_id *oid, time_t mtime) { + struct odb_source_files *files = odb_source_files_downcast(source); const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; void *buf; unsigned long len; @@ -998,7 +1001,7 @@ int force_object_loose(struct odb_source *source, hdrlen = format_object_header(hdr, sizeof(hdr), type, len); ret = write_loose_object(source, oid, hdr, hdrlen, buf, len, mtime, 0); if (!ret && compat) - ret = repo_add_loose_object_map(source, oid, &compat_oid); + ret = repo_add_loose_object_map(files->loose, oid, &compat_oid); free(buf); return ret; From 04a6e84cbdbebadd01d939168f1c69680c174fce Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:37 +0200 Subject: [PATCH 22/30] odb/source-loose: wire up `write_object()` callback Move `odb_source_loose_write_object()` from "object-file.c" into "odb/source-loose.c" and wire it up as the `write_object()` callback of the loose source. As in preceding commits, this requires us to expose a couple of generic functions from "object-file.c" as they are used in both subsystems now. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.c | 58 +++++++--------------------------------------- object-file.h | 14 ++++++----- odb/source-files.c | 5 ++-- odb/source-loose.c | 44 +++++++++++++++++++++++++++++++++++ 4 files changed, 63 insertions(+), 58 deletions(-) diff --git a/object-file.c b/object-file.c index fe24f00d1b79bf..7bb5b31bcad88b 100644 --- a/object-file.c +++ b/object-file.c @@ -326,10 +326,10 @@ static void hash_object_body(const struct git_hash_algo *algo, struct git_hash_c git_hash_final_oid(oid, c); } -static void write_object_file_prepare(const struct git_hash_algo *algo, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - char *hdr, int *hdrlen) +void write_object_file_prepare(const struct git_hash_algo *algo, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + char *hdr, int *hdrlen) { struct git_hash_ctx c; @@ -746,10 +746,10 @@ static int end_loose_object_common(struct odb_source *source, return Z_OK; } -static int write_loose_object(struct odb_source *source, - const struct object_id *oid, char *hdr, - int hdrlen, const void *buf, unsigned long len, - time_t mtime, unsigned flags) +int write_loose_object(struct odb_source *source, + const struct object_id *oid, char *hdr, + int hdrlen, const void *buf, unsigned long len, + time_t mtime, unsigned flags) { int fd, ret; unsigned char compressed[4096]; @@ -926,48 +926,6 @@ int odb_source_loose_write_stream(struct odb_source *source, return err; } -int odb_source_loose_write_object(struct odb_source *source, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, - enum odb_write_object_flags flags) -{ - struct odb_source_files *files = odb_source_files_downcast(source); - const struct git_hash_algo *algo = source->odb->repo->hash_algo; - const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; - struct object_id compat_oid; - char hdr[MAX_HEADER_LEN]; - int hdrlen = sizeof(hdr); - - /* Generate compat_oid */ - if (compat) { - if (compat_oid_in) - oidcpy(&compat_oid, compat_oid_in); - else if (type == OBJ_BLOB) - hash_object_file(compat, buf, len, type, &compat_oid); - else { - struct strbuf converted = STRBUF_INIT; - convert_object_file(source->odb->repo, &converted, algo, compat, - buf, len, type, 0); - hash_object_file(compat, converted.buf, converted.len, - type, &compat_oid); - strbuf_release(&converted); - } - } - - /* Normally if we have it in the pack then we do not bother writing - * it out into .git/objects/??/?{38} file. - */ - write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); - if (odb_freshen_object(source->odb, oid)) - return 0; - if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags)) - return -1; - if (compat) - return repo_add_loose_object_map(files->loose, oid, &compat_oid); - return 0; -} - int force_object_loose(struct odb_source *source, const struct object_id *oid, time_t mtime) { diff --git a/object-file.h b/object-file.h index 1d90df9d98b78e..2b32592de1135b 100644 --- a/object-file.h +++ b/object-file.h @@ -23,12 +23,6 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa struct object_info; struct odb_source; -int odb_source_loose_write_object(struct odb_source *source, - const void *buf, unsigned long len, - enum object_type type, struct object_id *oid, - struct object_id *compat_oid_in, - enum odb_write_object_flags flags); - int odb_source_loose_write_stream(struct odb_source *source, struct odb_write_stream *stream, size_t len, struct object_id *oid); @@ -129,6 +123,14 @@ int finalize_object_file_flags(struct repository *repo, void hash_object_file(const struct git_hash_algo *algo, const void *buf, unsigned long len, enum object_type type, struct object_id *oid); +void write_object_file_prepare(const struct git_hash_algo *algo, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + char *hdr, int *hdrlen); +int write_loose_object(struct odb_source *source, + const struct object_id *oid, char *hdr, + int hdrlen, const void *buf, unsigned long len, + time_t mtime, unsigned flags); /* Helper to check and "touch" a file */ int check_and_freshen_file(const char *fn, int freshen); diff --git a/odb/source-files.c b/odb/source-files.c index ef548e6fe69cd0..52ba04237acfd7 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -164,8 +164,9 @@ static int odb_source_files_write_object(struct odb_source *source, struct object_id *compat_oid, enum odb_write_object_flags flags) { - return odb_source_loose_write_object(source, buf, len, type, - oid, compat_oid, flags); + struct odb_source_files *files = odb_source_files_downcast(source); + return odb_source_write_object(&files->loose->base, buf, len, type, + oid, compat_oid, flags); } static int odb_source_files_write_object_stream(struct odb_source *source, diff --git a/odb/source-loose.c b/odb/source-loose.c index e519365d23f680..c91018109e5b68 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -5,6 +5,7 @@ #include "hex.h" #include "loose.h" #include "object-file.h" +#include "object-file-convert.h" #include "odb.h" #include "odb/source-files.h" #include "odb/source-loose.h" @@ -588,6 +589,48 @@ static int odb_source_loose_freshen_object(struct odb_source *source, return !!check_and_freshen_file(path.buf, 1); } +static int odb_source_loose_write_object(struct odb_source *source, + const void *buf, unsigned long len, + enum object_type type, struct object_id *oid, + struct object_id *compat_oid_in, + enum odb_write_object_flags flags) +{ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + const struct git_hash_algo *algo = source->odb->repo->hash_algo; + const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; + struct object_id compat_oid; + char hdr[MAX_HEADER_LEN]; + int hdrlen = sizeof(hdr); + + /* Generate compat_oid */ + if (compat) { + if (compat_oid_in) + oidcpy(&compat_oid, compat_oid_in); + else if (type == OBJ_BLOB) + hash_object_file(compat, buf, len, type, &compat_oid); + else { + struct strbuf converted = STRBUF_INIT; + convert_object_file(source->odb->repo, &converted, algo, compat, + buf, len, type, 0); + hash_object_file(compat, converted.buf, converted.len, + type, &compat_oid); + strbuf_release(&converted); + } + } + + /* Normally if we have it in the pack then we do not bother writing + * it out into .git/objects/??/?{38} file. + */ + write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); + if (odb_freshen_object(source->odb, oid)) + return 0; + if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags)) + return -1; + if (compat) + return repo_add_loose_object_map(loose, oid, &compat_oid); + return 0; +} + static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { oidtree_clear(loose->cache); @@ -647,6 +690,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) loose->base.find_abbrev_len = odb_source_loose_find_abbrev_len; loose->base.count_objects = odb_source_loose_count_objects; loose->base.freshen_object = odb_source_loose_freshen_object; + loose->base.write_object = odb_source_loose_write_object; if (!is_absolute_path(loose->base.path)) chdir_notify_register(NULL, odb_source_loose_reparent, loose); From b9906a645c38ef77643d661ac9a5a6aa31fbeaf4 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:38 +0200 Subject: [PATCH 23/30] object-file: refactor writing objects to use loose source The "object-file" subsystem still hosts the majority of logic used to write loose objects. Eventually, we'll want to move this logic into "odb/source-loose.c", but this isn't yet easily possible because a lot of the writing logic is still being shared with `force_object_loose()`. We will eventually detangle this logic so that we can indeed move all of it into the "loose" source. Meanwhile though, refactor the code so that it operates on a `struct odb_source_loose` directly to already make the dependency explicit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- http-walker.c | 3 +- http.c | 6 ++-- object-file.c | 75 +++++++++++++++++++++++----------------------- object-file.h | 6 ++-- odb/source-files.c | 3 +- odb/source-loose.c | 9 +++--- 6 files changed, 53 insertions(+), 49 deletions(-) diff --git a/http-walker.c b/http-walker.c index 1b6d496548373e..435a7265408fa4 100644 --- a/http-walker.c +++ b/http-walker.c @@ -539,8 +539,9 @@ static int fetch_object(struct walker *walker, const struct object_id *oid) } else if (!oideq(&obj_req->oid, &req->real_oid)) { ret = error("File %s has bad hash", hex); } else if (req->rename < 0) { + struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources); struct strbuf buf = STRBUF_INIT; - odb_loose_path(the_repository->objects->sources, &buf, &req->oid); + odb_loose_path(files->loose, &buf, &req->oid); ret = error("unable to write sha1 filename %s", buf.buf); strbuf_release(&buf); } diff --git a/http.c b/http.c index ea9b16861bc3d4..3fcc0122337ba4 100644 --- a/http.c +++ b/http.c @@ -2826,6 +2826,7 @@ static size_t fwrite_sha1_file(char *ptr, size_t eltsize, size_t nmemb, struct http_object_request *new_http_object_request(const char *base_url, const struct object_id *oid) { + struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources); char *hex = oid_to_hex(oid); struct strbuf filename = STRBUF_INIT; struct strbuf prevfile = STRBUF_INIT; @@ -2840,7 +2841,7 @@ struct http_object_request *new_http_object_request(const char *base_url, oidcpy(&freq->oid, oid); freq->localfile = -1; - odb_loose_path(the_repository->objects->sources, &filename, oid); + odb_loose_path(files->loose, &filename, oid); strbuf_addf(&freq->tmpfile, "%s.temp", filename.buf); strbuf_addf(&prevfile, "%s.prev", filename.buf); @@ -2966,6 +2967,7 @@ void process_http_object_request(struct http_object_request *freq) int finish_http_object_request(struct http_object_request *freq) { + struct odb_source_files *files = odb_source_files_downcast(the_repository->objects->sources); struct stat st; struct strbuf filename = STRBUF_INIT; @@ -2992,7 +2994,7 @@ int finish_http_object_request(struct http_object_request *freq) unlink_or_warn(freq->tmpfile.buf); return -1; } - odb_loose_path(the_repository->objects->sources, &filename, &freq->oid); + odb_loose_path(files->loose, &filename, &freq->oid); freq->rename = finalize_object_file(the_repository, freq->tmpfile.buf, filename.buf); strbuf_release(&filename); diff --git a/object-file.c b/object-file.c index 7bb5b31bcad88b..bce941874eb994 100644 --- a/object-file.c +++ b/object-file.c @@ -54,14 +54,14 @@ static void fill_loose_path(struct strbuf *buf, } } -const char *odb_loose_path(struct odb_source *source, +const char *odb_loose_path(struct odb_source_loose *loose, struct strbuf *buf, const struct object_id *oid) { strbuf_reset(buf); - strbuf_addstr(buf, source->path); + strbuf_addstr(buf, loose->base.path); strbuf_addch(buf, '/'); - fill_loose_path(buf, oid, source->odb->repo->hash_algo); + fill_loose_path(buf, oid, loose->base.odb->repo->hash_algo); return buf->buf; } @@ -575,14 +575,14 @@ static void flush_loose_object_transaction(struct odb_transaction_files *transac } /* Finalize a file on disk, and close it. */ -static void close_loose_object(struct odb_source *source, +static void close_loose_object(struct odb_source_loose *loose, int fd, const char *filename) { - if (source->will_destroy) + if (loose->base.will_destroy) goto out; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - fsync_loose_object_transaction(source->odb->transaction, fd, filename); + fsync_loose_object_transaction(loose->base.odb->transaction, fd, filename); else if (fsync_object_files > 0) fsync_or_die(fd, filename); else @@ -651,7 +651,7 @@ static int create_tmpfile(struct repository *repo, * Returns a "fd", which should later be provided to * end_loose_object_common(). */ -static int start_loose_object_common(struct odb_source *source, +static int start_loose_object_common(struct odb_source_loose *loose, struct strbuf *tmp_file, const char *filename, unsigned flags, git_zstream *stream, @@ -659,18 +659,18 @@ static int start_loose_object_common(struct odb_source *source, struct git_hash_ctx *c, struct git_hash_ctx *compat_c, char *hdr, int hdrlen) { - const struct git_hash_algo *algo = source->odb->repo->hash_algo; - const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; + const struct git_hash_algo *algo = loose->base.odb->repo->hash_algo; + const struct git_hash_algo *compat = loose->base.odb->repo->compat_hash_algo; int fd; - fd = create_tmpfile(source->odb->repo, tmp_file, filename); + fd = create_tmpfile(loose->base.odb->repo, tmp_file, filename); if (fd < 0) { if (flags & ODB_WRITE_OBJECT_SILENT) return -1; else if (errno == EACCES) return error(_("insufficient permission for adding " "an object to repository database %s"), - source->path); + loose->base.path); else return error_errno( _("unable to create temporary file")); @@ -700,14 +700,14 @@ static int start_loose_object_common(struct odb_source *source, * Common steps for the inner git_deflate() loop for writing loose * objects. Returns what git_deflate() returns. */ -static int write_loose_object_common(struct odb_source *source, +static int write_loose_object_common(struct odb_source_loose *loose, struct git_hash_ctx *c, struct git_hash_ctx *compat_c, git_zstream *stream, const int flush, unsigned char *in0, const int fd, unsigned char *compressed, const size_t compressed_len) { - const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; + const struct git_hash_algo *compat = loose->base.odb->repo->compat_hash_algo; int ret; ret = git_deflate(stream, flush ? Z_FINISH : 0); @@ -728,12 +728,12 @@ static int write_loose_object_common(struct odb_source *source, * - End the compression of zlib stream. * - Get the calculated oid to "oid". */ -static int end_loose_object_common(struct odb_source *source, +static int end_loose_object_common(struct odb_source_loose *loose, struct git_hash_ctx *c, struct git_hash_ctx *compat_c, git_zstream *stream, struct object_id *oid, struct object_id *compat_oid) { - const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; + const struct git_hash_algo *compat = loose->base.odb->repo->compat_hash_algo; int ret; ret = git_deflate_end_gently(stream); @@ -746,7 +746,7 @@ static int end_loose_object_common(struct odb_source *source, return Z_OK; } -int write_loose_object(struct odb_source *source, +int write_loose_object(struct odb_source_loose *loose, const struct object_id *oid, char *hdr, int hdrlen, const void *buf, unsigned long len, time_t mtime, unsigned flags) @@ -760,11 +760,11 @@ int write_loose_object(struct odb_source *source, static struct strbuf filename = STRBUF_INIT; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_transaction(source->odb->transaction); + prepare_loose_object_transaction(loose->base.odb->transaction); - odb_loose_path(source, &filename, oid); + odb_loose_path(loose, &filename, oid); - fd = start_loose_object_common(source, &tmp_file, filename.buf, flags, + fd = start_loose_object_common(loose, &tmp_file, filename.buf, flags, &stream, compressed, sizeof(compressed), &c, NULL, hdr, hdrlen); if (fd < 0) @@ -776,14 +776,14 @@ int write_loose_object(struct odb_source *source, do { unsigned char *in0 = stream.next_in; - ret = write_loose_object_common(source, &c, NULL, &stream, 1, in0, fd, + ret = write_loose_object_common(loose, &c, NULL, &stream, 1, in0, fd, compressed, sizeof(compressed)); } while (ret == Z_OK); if (ret != Z_STREAM_END) die(_("unable to deflate new object %s (%d)"), oid_to_hex(oid), ret); - ret = end_loose_object_common(source, &c, NULL, &stream, ¶no_oid, NULL); + ret = end_loose_object_common(loose, &c, NULL, &stream, ¶no_oid, NULL); if (ret != Z_OK) die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid), ret); @@ -791,7 +791,7 @@ int write_loose_object(struct odb_source *source, die(_("confused by unstable object source data for %s"), oid_to_hex(oid)); - close_loose_object(source, fd, tmp_file.buf); + close_loose_object(loose, fd, tmp_file.buf); if (mtime) { struct utimbuf utb; @@ -802,16 +802,15 @@ int write_loose_object(struct odb_source *source, warning_errno(_("failed utime() on %s"), tmp_file.buf); } - return finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf, + return finalize_object_file_flags(loose->base.odb->repo, tmp_file.buf, filename.buf, FOF_SKIP_COLLISION_CHECK); } -int odb_source_loose_write_stream(struct odb_source *source, +int odb_source_loose_write_stream(struct odb_source_loose *loose, struct odb_write_stream *in_stream, size_t len, struct object_id *oid) { - struct odb_source_files *files = odb_source_files_downcast(source); - const struct git_hash_algo *compat = source->odb->repo->compat_hash_algo; + const struct git_hash_algo *compat = loose->base.odb->repo->compat_hash_algo; struct object_id compat_oid; int fd, ret, err = 0, flush = 0; unsigned char compressed[4096]; @@ -825,10 +824,10 @@ int odb_source_loose_write_stream(struct odb_source *source, int hdrlen; if (batch_fsync_enabled(FSYNC_COMPONENT_LOOSE_OBJECT)) - prepare_loose_object_transaction(source->odb->transaction); + prepare_loose_object_transaction(loose->base.odb->transaction); /* Since oid is not determined, save tmp file to odb path. */ - strbuf_addf(&filename, "%s/", source->path); + strbuf_addf(&filename, "%s/", loose->base.path); hdrlen = format_object_header(hdr, sizeof(hdr), OBJ_BLOB, len); /* @@ -839,7 +838,7 @@ int odb_source_loose_write_stream(struct odb_source *source, * - Setup zlib stream for compression. * - Start to feed header to zlib stream. */ - fd = start_loose_object_common(source, &tmp_file, filename.buf, 0, + fd = start_loose_object_common(loose, &tmp_file, filename.buf, 0, &stream, compressed, sizeof(compressed), &c, &compat_c, hdr, hdrlen); if (fd < 0) { @@ -867,7 +866,7 @@ int odb_source_loose_write_stream(struct odb_source *source, if (in_stream->is_finished) flush = 1; } - ret = write_loose_object_common(source, &c, &compat_c, &stream, flush, in0, fd, + ret = write_loose_object_common(loose, &c, &compat_c, &stream, flush, in0, fd, compressed, sizeof(compressed)); /* * Unlike write_loose_object(), we do not have the entire @@ -890,16 +889,16 @@ int odb_source_loose_write_stream(struct odb_source *source, */ if (ret != Z_STREAM_END) die(_("unable to stream deflate new object (%d)"), ret); - ret = end_loose_object_common(source, &c, &compat_c, &stream, oid, &compat_oid); + ret = end_loose_object_common(loose, &c, &compat_c, &stream, oid, &compat_oid); if (ret != Z_OK) die(_("deflateEnd on stream object failed (%d)"), ret); - close_loose_object(source, fd, tmp_file.buf); + close_loose_object(loose, fd, tmp_file.buf); - if (odb_freshen_object(source->odb, oid)) { + if (odb_freshen_object(loose->base.odb, oid)) { unlink_or_warn(tmp_file.buf); goto cleanup; } - odb_loose_path(source, &filename, oid); + odb_loose_path(loose, &filename, oid); /* We finally know the object path, and create the missing dir. */ dirlen = directory_size(filename.buf); @@ -907,7 +906,7 @@ int odb_source_loose_write_stream(struct odb_source *source, struct strbuf dir = STRBUF_INIT; strbuf_add(&dir, filename.buf, dirlen); - if (safe_create_dir_in_gitdir(source->odb->repo, dir.buf) && + if (safe_create_dir_in_gitdir(loose->base.odb->repo, dir.buf) && errno != EEXIST) { err = error_errno(_("unable to create directory %s"), dir.buf); strbuf_release(&dir); @@ -916,10 +915,10 @@ int odb_source_loose_write_stream(struct odb_source *source, strbuf_release(&dir); } - err = finalize_object_file_flags(source->odb->repo, tmp_file.buf, filename.buf, + err = finalize_object_file_flags(loose->base.odb->repo, tmp_file.buf, filename.buf, FOF_SKIP_COLLISION_CHECK); if (!err && compat) - err = repo_add_loose_object_map(files->loose, oid, &compat_oid); + err = repo_add_loose_object_map(loose, oid, &compat_oid); cleanup: strbuf_release(&tmp_file); strbuf_release(&filename); @@ -957,7 +956,7 @@ int force_object_loose(struct odb_source *source, oid_to_hex(oid), compat->name); } hdrlen = format_object_header(hdr, sizeof(hdr), type, len); - ret = write_loose_object(source, oid, hdr, hdrlen, buf, len, mtime, 0); + ret = write_loose_object(files->loose, oid, hdr, hdrlen, buf, len, mtime, 0); if (!ret && compat) ret = repo_add_loose_object_map(files->loose, oid, &compat_oid); free(buf); diff --git a/object-file.h b/object-file.h index 2b32592de1135b..d30f1b10b2eb36 100644 --- a/object-file.h +++ b/object-file.h @@ -23,7 +23,7 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa struct object_info; struct odb_source; -int odb_source_loose_write_stream(struct odb_source *source, +int odb_source_loose_write_stream(struct odb_source_loose *loose, struct odb_write_stream *stream, size_t len, struct object_id *oid); @@ -31,7 +31,7 @@ int odb_source_loose_write_stream(struct odb_source *source, * Put in `buf` the name of the file in the local object database that * would be used to store a loose object with the specified oid. */ -const char *odb_loose_path(struct odb_source *source, +const char *odb_loose_path(struct odb_source_loose *source, struct strbuf *buf, const struct object_id *oid); @@ -127,7 +127,7 @@ void write_object_file_prepare(const struct git_hash_algo *algo, const void *buf, unsigned long len, enum object_type type, struct object_id *oid, char *hdr, int *hdrlen); -int write_loose_object(struct odb_source *source, +int write_loose_object(struct odb_source_loose *loose, const struct object_id *oid, char *hdr, int hdrlen, const void *buf, unsigned long len, time_t mtime, unsigned flags); diff --git a/odb/source-files.c b/odb/source-files.c index 52ba04237acfd7..2ba1def776e006 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -174,7 +174,8 @@ static int odb_source_files_write_object_stream(struct odb_source *source, size_t len, struct object_id *oid) { - return odb_source_loose_write_stream(source, stream, len, oid); + struct odb_source_files *files = odb_source_files_downcast(source); + return odb_source_loose_write_stream(files->loose, stream, len, oid); } static int odb_source_files_begin_transaction(struct odb_source *source, diff --git a/odb/source-loose.c b/odb/source-loose.c index c91018109e5b68..da8a60dba1c04c 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -220,7 +220,7 @@ static int odb_source_loose_read_object_info(struct odb_source *source, if (flags & OBJECT_INFO_SECOND_READ) return -1; - odb_loose_path(source, &buf, oid); + odb_loose_path(loose, &buf, oid); return read_object_info_from_path(loose, buf.buf, oid, oi, flags); } @@ -238,7 +238,7 @@ static int open_loose_object(struct odb_source_loose *loose, static struct strbuf buf = STRBUF_INIT; int fd; - *path = odb_loose_path(&loose->base, &buf, oid); + *path = odb_loose_path(loose, &buf, oid); fd = git_open(*path); if (fd >= 0) return fd; @@ -584,8 +584,9 @@ static int odb_source_loose_count_objects(struct odb_source *source, static int odb_source_loose_freshen_object(struct odb_source *source, const struct object_id *oid) { + struct odb_source_loose *loose = odb_source_loose_downcast(source); static struct strbuf path = STRBUF_INIT; - odb_loose_path(source, &path, oid); + odb_loose_path(loose, &path, oid); return !!check_and_freshen_file(path.buf, 1); } @@ -624,7 +625,7 @@ static int odb_source_loose_write_object(struct odb_source *source, write_object_file_prepare(algo, buf, len, type, oid, hdr, &hdrlen); if (odb_freshen_object(source->odb, oid)) return 0; - if (write_loose_object(source, oid, hdr, hdrlen, buf, len, 0, flags)) + if (write_loose_object(loose, oid, hdr, hdrlen, buf, len, 0, flags)) return -1; if (compat) return repo_add_loose_object_map(loose, oid, &compat_oid); From e6a39bbe7a6bde5fb7de8d487e8f4ef928e6b751 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:39 +0200 Subject: [PATCH 24/30] odb/source-loose: wire up `write_object_stream()` callback Wire up the `write_object_stream()` callback. Note that we don't move the implementation into "odb/source-loose.c". This is because most of the logic to write loose objects is still contained in "object-file.c", and detangling that requires us to do some refactorings as explained in the preceding commit. So for now, the implementation of writing an object stream is still located in "object-file.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- object-file.h | 12 +++++++++++- odb/source-files.c | 3 ++- odb/source-loose.c | 14 ++++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) diff --git a/object-file.h b/object-file.h index d30f1b10b2eb36..528c4e6e697f87 100644 --- a/object-file.h +++ b/object-file.h @@ -23,7 +23,17 @@ int index_path(struct index_state *istate, struct object_id *oid, const char *pa struct object_info; struct odb_source; -int odb_source_loose_write_stream(struct odb_source_loose *loose, +/* + * Write the given stream into the loose object source. The only difference + * from the generic implementation of this function is that we don't perform an + * object existence check here. + * + * TODO: We should stop exposing this function altogether and move it into + * "odb/source-loose.c". This requires a couple of refactorings though to make + * `force_object_loose()` generic and is thus postponed to a later point in + * time. + */ +int odb_source_loose_write_stream(struct odb_source_loose *source, struct odb_write_stream *stream, size_t len, struct object_id *oid); diff --git a/odb/source-files.c b/odb/source-files.c index 2ba1def776e006..83f8066c67dd3c 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -7,6 +7,7 @@ #include "odb.h" #include "odb/source.h" #include "odb/source-files.h" +#include "odb/source-loose.h" #include "packfile.h" #include "strbuf.h" #include "write-or-die.h" @@ -175,7 +176,7 @@ static int odb_source_files_write_object_stream(struct odb_source *source, struct object_id *oid) { struct odb_source_files *files = odb_source_files_downcast(source); - return odb_source_loose_write_stream(files->loose, stream, len, oid); + return odb_source_write_object_stream(&files->loose->base, stream, len, oid); } static int odb_source_files_begin_transaction(struct odb_source *source, diff --git a/odb/source-loose.c b/odb/source-loose.c index da8a60dba1c04c..e52fc289a24102 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -632,6 +632,19 @@ static int odb_source_loose_write_object(struct odb_source *source, return 0; } +static int odb_source_loose_write_object_stream(struct odb_source *source, + struct odb_write_stream *in_stream, + size_t len, + struct object_id *oid) +{ + /* + * TODO: the implementation should be moved here, see the comment on + * the called function in "object-file.h". + */ + struct odb_source_loose *loose = odb_source_loose_downcast(source); + return odb_source_loose_write_stream(loose, in_stream, len, oid); +} + static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { oidtree_clear(loose->cache); @@ -692,6 +705,7 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) loose->base.count_objects = odb_source_loose_count_objects; loose->base.freshen_object = odb_source_loose_freshen_object; loose->base.write_object = odb_source_loose_write_object; + loose->base.write_object_stream = odb_source_loose_write_object_stream; if (!is_absolute_path(loose->base.path)) chdir_notify_register(NULL, odb_source_loose_reparent, loose); From 87af3bb434b86805f69fae40c966d92db1bd2eae Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:40 +0200 Subject: [PATCH 25/30] odb/source-loose: stub out remaining callbacks Stub out remaining callback functions for the "loose" backend. Note that we also stub out transactions for loose objects. In fact, we already have the infrastructure in place for those, and we could in theory implement those, as well. But there are separate efforts ongoing to polish up transactional interfaces, and doing so now would likely result in some messiness. This omission will thus be worked on in a subsequent patch series, once the dust has settled. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-loose.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/odb/source-loose.c b/odb/source-loose.c index e52fc289a24102..e1749413184160 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -645,6 +645,25 @@ static int odb_source_loose_write_object_stream(struct odb_source *source, return odb_source_loose_write_stream(loose, in_stream, len, oid); } +static int odb_source_loose_begin_transaction(struct odb_source *source UNUSED, + struct odb_transaction **out UNUSED) +{ + /* TODO: this is a known omission that we'll want to address eventually. */ + return error("loose source does not support transactions"); +} + +static int odb_source_loose_read_alternates(struct odb_source *source UNUSED, + struct strvec *out UNUSED) +{ + return 0; +} + +static int odb_source_loose_write_alternate(struct odb_source *source UNUSED, + const char *alternate UNUSED) +{ + return error("loose source does not support alternates"); +} + static void odb_source_loose_clear_cache(struct odb_source_loose *loose) { oidtree_clear(loose->cache); @@ -706,6 +725,9 @@ struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) loose->base.freshen_object = odb_source_loose_freshen_object; loose->base.write_object = odb_source_loose_write_object; loose->base.write_object_stream = odb_source_loose_write_object_stream; + loose->base.begin_transaction = odb_source_loose_begin_transaction; + loose->base.read_alternates = odb_source_loose_read_alternates; + loose->base.write_alternate = odb_source_loose_write_alternate; if (!is_absolute_path(loose->base.path)) chdir_notify_register(NULL, odb_source_loose_reparent, loose); From ef4778bcba323ab38d442811f851af092760b6b5 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Mon, 1 Jun 2026 10:20:41 +0200 Subject: [PATCH 26/30] odb/source-loose: drop pointer to the "files" source Now that all callbacks of the loose source operate on `struct odb_source_loose` directly we no longer have to reach into the "files" source at all. Drop this field and update `odb_source_loose_new()` to instead accept all parameters required to initialize itself. This ensures that the "loose" backend is a fully standalone source. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb/source-files.c | 2 +- odb/source-loose.c | 8 ++++---- odb/source-loose.h | 7 ++++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/odb/source-files.c b/odb/source-files.c index 83f8066c67dd3c..5bdd0429225397 100644 --- a/odb/source-files.c +++ b/odb/source-files.c @@ -268,7 +268,7 @@ struct odb_source_files *odb_source_files_new(struct object_database *odb, CALLOC_ARRAY(files, 1); odb_source_init(&files->base, odb, ODB_SOURCE_FILES, path, local); - files->loose = odb_source_loose_new(files); + files->loose = odb_source_loose_new(odb, path, local); files->packed = packfile_store_new(&files->base); files->base.free = odb_source_files_free; diff --git a/odb/source-loose.c b/odb/source-loose.c index e1749413184160..7d7ea2fb842537 100644 --- a/odb/source-loose.c +++ b/odb/source-loose.c @@ -705,14 +705,14 @@ static void odb_source_loose_free(struct odb_source *source) free(loose); } -struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files) +struct odb_source_loose *odb_source_loose_new(struct object_database *odb, + const char *path, + bool local) { struct odb_source_loose *loose; CALLOC_ARRAY(loose, 1); - odb_source_init(&loose->base, files->base.odb, ODB_SOURCE_LOOSE, - files->base.path, files->base.local); - loose->files = files; + odb_source_init(&loose->base, odb, ODB_SOURCE_LOOSE, path, local); loose->base.free = odb_source_loose_free; loose->base.close = odb_source_loose_close; diff --git a/odb/source-loose.h b/odb/source-loose.h index 4dd4fd6ce30a7e..6070aaf3ce6ab2 100644 --- a/odb/source-loose.h +++ b/odb/source-loose.h @@ -9,11 +9,10 @@ struct oidtree; /* * An object database source that stores its objects in loose format, one - * file per object. This source is part of the files source. + * file per object. */ struct odb_source_loose { struct odb_source base; - struct odb_source_files *files; /* * Used to store the results of readdir(3) calls when we are OK @@ -31,7 +30,9 @@ struct odb_source_loose { struct loose_object_map *map; }; -struct odb_source_loose *odb_source_loose_new(struct odb_source_files *files); +struct odb_source_loose *odb_source_loose_new(struct object_database *odb, + const char *path, + bool local); /* * Cast the given object database source to the loose backend. This will cause From 96ee7f1650e6096561599f069d18c052412d7506 Mon Sep 17 00:00:00 2001 From: LorenzoPegorari Date: Mon, 1 Jun 2026 15:52:01 +0200 Subject: [PATCH 27/30] http: cleanup function fetch_and_setup_pack_index() Cleanup the function `fetch_and_setup_pack_index()` by removing the useless call to the function `unlink()`. This is not necessary anymore since 63aca3f7f1 (dumb-http: store downloaded pack idx as tempfile, 2024-10-25), when `fetch_pack_index()` started registering its return value (in this case `tmp_idx`) as a tempfile to be deleted at process exit. Signed-off-by: LorenzoPegorari Signed-off-by: Junio C Hamano --- http.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/http.c b/http.c index ea9b16861bc3d4..55dd856a279a23 100644 --- a/http.c +++ b/http.c @@ -2609,9 +2609,7 @@ static int fetch_and_setup_pack_index(struct packfile_list *packs, new_pack = parse_pack_index(the_repository, sha1, tmp_idx); if (!new_pack) { - unlink(tmp_idx); free(tmp_idx); - return -1; /* parse_pack_index() already issued error message */ } From 18decad922884a69ea39c0332f7a94ce82cf99cc Mon Sep 17 00:00:00 2001 From: LorenzoPegorari Date: Mon, 1 Jun 2026 15:52:12 +0200 Subject: [PATCH 28/30] http: fix memory leak in fetch_and_setup_pack_index() Inside the function `fetch_and_setup_pack_index()`, when the pack obtained using `parse_pack_index()` fails to be verified by `verify_pack_index()`, the function returns without closing and freeing said pack. Fix this by calling `close_pack_index()` to munmap the index file for the leaking pack (which might have been mmapped by `fetch_pack_index()` or `verify_pack_index()`), and then free it, when the verification fails. Signed-off-by: LorenzoPegorari Signed-off-by: Junio C Hamano --- http.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/http.c b/http.c index 55dd856a279a23..d50a34e4460a90 100644 --- a/http.c +++ b/http.c @@ -2614,11 +2614,13 @@ static int fetch_and_setup_pack_index(struct packfile_list *packs, } ret = verify_pack_index(new_pack); - if (!ret) - close_pack_index(new_pack); + + close_pack_index(new_pack); free(tmp_idx); - if (ret) + if (ret) { + free(new_pack); return -1; + } packfile_list_prepend(packs, new_pack); return 0; From 5cd4d0d8500c6ef1b102f5cb35187a91c299f013 Mon Sep 17 00:00:00 2001 From: Harald Nordgren Date: Tue, 2 Jun 2026 07:37:58 +0000 Subject: [PATCH 29/30] config.mak.uname: avoid macOS linker warning on Xcode 16.3+ Building on macOS with Xcode 16.3 or newer emits: ld: warning: reducing alignment of section __DATA,__common from 0x8000 to 0x4000 because it exceeds segment maximum alignment Pass -fno-common when "ld -v" reports ld-1167 or newer, so tentative definitions of large arrays go into BSS instead of __DATA,__common. Signed-off-by: Harald Nordgren Signed-off-by: Junio C Hamano --- config.mak.uname | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/config.mak.uname b/config.mak.uname index 3c35ae33a3c0c0..32b58e7a95091e 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -160,6 +160,12 @@ ifeq ($(uname_S),Darwin) NEEDS_GOOD_LIBICONV = UnfortunatelyYes endif + # Silence Xcode 16.3+ linker warning about __DATA,__common alignment. + LD_MAJOR_VERSION = $(shell ld -v 2>&1 | sed -n 's/.*PROJECT:ld-\([0-9]*\).*/\1/p') + ifeq ($(shell test -n "$(LD_MAJOR_VERSION)" && test "$(LD_MAJOR_VERSION)" -ge 1167 && echo 1),1) + BASIC_CFLAGS += -fno-common + endif + # The builtin FSMonitor on MacOS builds upon Simple-IPC. Both require # Unix domain sockets and PThreads. ifndef NO_PTHREADS From 3e65291872de10c3f0bf05ea8c24187e7a71ebf0 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 11 Jun 2026 04:29:59 -0700 Subject: [PATCH 30/30] Git 2.55-rc0 Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.55.0.adoc | 28 ++++++++++++++++++++++++++++ GIT-VERSION-GEN | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/Documentation/RelNotes/2.55.0.adoc b/Documentation/RelNotes/2.55.0.adoc index f037568499a95a..5809fd6cb2494f 100644 --- a/Documentation/RelNotes/2.55.0.adoc +++ b/Documentation/RelNotes/2.55.0.adoc @@ -62,6 +62,9 @@ UI, Workflows & Features current branch to a same-named branch on the remote, and detailing the upstream requirements for centralized workflows. + * The documentation for "--word-diff" has been extended with a bit of + implementation detail of where these different words come from. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -160,6 +163,14 @@ Performance, Internal Implementation, Development Support etc. * Encourage original authors to monitor the CI status. + * The `git log -L` implementation has been refactored to use the + standard diff output pipeline, enabling pickaxe and diff-filter to + work as expected. Additionally, metadata-only diff formats like + --raw and --name-only are now supported with -L. + + * The loose object source has been refactored into a proper `struct + odb_source`. + Fixes since v2.54 ----------------- @@ -294,6 +305,23 @@ Fixes since v2.54 triggered a lazy fetch, which has been corrected. (merge fa1468a1f7 th/promisor-quiet-per-repo later to maint). + * Correct use of sockaddr API in "git daemon". + (merge 422a5bf575 st/daemon-sockaddr-fixes later to maint). + + * A memory leak in `fetch_and_setup_pack_index()` when verification of + the downloaded pack index fails has been plugged. Also an obsolete + `unlink()` call on parse failure has been cleaned up. + + * In t3070-wildmatch, "via ls-files" test variants with patterns + containing backslash escapes are now skipped on Windows, avoiding 36 + test failures caused by pathspec separator conversion. + (merge 8c84e6802c kk/wildmatch-windows-ls-files-prereq later to maint). + + * A linker warning on macOS when building with Xcode 16.3 or newer has + been avoided by passing -fno-common to the compiler when a + sufficiently new linker is detected. + (merge 5cd4d0d850 hn/macos-linker-warning later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 80f4b802e9 ja/doc-difftool-synopsis-style later to maint). (merge b96490241e jc/doc-timestamps-in-stat later to maint). diff --git a/GIT-VERSION-GEN b/GIT-VERSION-GEN index fd6979b70e6ac3..9448079974b61c 100755 --- a/GIT-VERSION-GEN +++ b/GIT-VERSION-GEN @@ -1,6 +1,6 @@ #!/bin/sh -DEF_VER=v2.54.0 +DEF_VER=v2.55.0-rc0 LF=' '