diff options
| author | Junio C Hamano <gitster@pobox.com> | 2025-11-30 18:33:22 -0800 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2025-11-30 18:33:22 -0800 |
| commit | e5b5722db84a93b669cb0d065b7988f1421e0675 (patch) | |
| tree | 60151b60f902c5e229d6c979f94a9dca25ca8133 | |
| parent | ca1c1363f581938d1f85c277338a8e71df7f1cdf (diff) | |
| parent | 68aace560b3ed2dba8ca36c34773e26c11b3adca (diff) | |
| download | git-seen.tar.gz | |
Merge branch 'bc/sha1-256-interop-02' into seenseen
The code to maintain mapping between object names in multiple hash
functions is being added, written in Rust.
* bc/sha1-256-interop-02:
object-file-convert: always make sure object ID algo is valid
rust: add a small wrapper around the hashfile code
rust: add a new binary object map format
rust: add functionality to hash an object
rust: add a build.rs script for tests
hash: expose hash context functions to Rust
write-or-die: add an fsync component for the object map
csum-file: define hashwrite's count as a uint32_t
rust: add additional helpers for ObjectID
hash: add a function to look up hash algo structs
rust: add a hash algorithm abstraction
rust: add a ObjectID struct
hash: use uint32_t for object_id algorithm
conversion: don't crash when no destination algo
repository: require Rust support for interoperability
| -rw-r--r-- | Documentation/gitformat-loose.adoc | 78 | ||||
| -rw-r--r-- | Makefile | 5 | ||||
| -rw-r--r-- | build.rs | 17 | ||||
| -rw-r--r-- | csum-file.c | 2 | ||||
| -rw-r--r-- | csum-file.h | 2 | ||||
| -rw-r--r-- | hash.c | 48 | ||||
| -rw-r--r-- | hash.h | 38 | ||||
| -rw-r--r-- | object-file-convert.c | 14 | ||||
| -rw-r--r-- | oidtree.c | 2 | ||||
| -rw-r--r-- | repository.c | 12 | ||||
| -rw-r--r-- | repository.h | 4 | ||||
| -rw-r--r-- | serve.c | 2 | ||||
| -rw-r--r-- | src/csum_file.rs | 81 | ||||
| -rw-r--r-- | src/hash.rs | 466 | ||||
| -rw-r--r-- | src/lib.rs | 3 | ||||
| -rw-r--r-- | src/loose.rs | 913 | ||||
| -rw-r--r-- | src/meson.build | 3 | ||||
| -rwxr-xr-x | t/t1006-cat-file.sh | 82 | ||||
| -rwxr-xr-x | t/t1016-compatObjectFormat.sh | 6 | ||||
| -rwxr-xr-x | t/t1500-rev-parse.sh | 2 | ||||
| -rwxr-xr-x | t/t9305-fast-import-signatures.sh | 4 | ||||
| -rwxr-xr-x | t/t9350-fast-export.sh | 4 | ||||
| -rw-r--r-- | t/test-lib.sh | 4 | ||||
| -rw-r--r-- | write-or-die.h | 4 |
24 files changed, 1722 insertions, 74 deletions
diff --git a/Documentation/gitformat-loose.adoc b/Documentation/gitformat-loose.adoc index 947993663e..b0b569761b 100644 --- a/Documentation/gitformat-loose.adoc +++ b/Documentation/gitformat-loose.adoc @@ -10,6 +10,7 @@ SYNOPSIS -------- [verse] $GIT_DIR/objects/[0-9a-f][0-9a-f]/* +$GIT_DIR/objects/object-map/map-*.map DESCRIPTION ----------- @@ -48,6 +49,83 @@ stored under Similarly, a blob containing the contents `abc` would have the uncompressed data of `blob 3\0abc`. +== Loose object mapping + +When the `compatObjectFormat` option is used, Git needs to store a mapping +between the repository's main algorithm and the compatibility algorithm for +loose objects as well as some auxiliary information. + +The mapping consists of a set of files under `$GIT_DIR/objects/object-map` +ending in `.map`. The portion of the filename before the extension is that of +the main hash checksum (that is, the one specified in +`extensions.objectformat`) in hex format. + +`git gc` will repack existing entries into one file, removing any unnecessary +objects, such as obsolete shallow entries or loose objects that have been +packed. + +The file format is as follows. All values are in network byte order and all +4-byte and 8-byte values must be 4-byte aligned in the file, so the NUL padding +may be required in some cases. Git always uses the smallest number of NUL +bytes (including zero) that is required for the padding in order to make +writing files deterministic. + +- A header appears at the beginning and consists of the following: + * A 4-byte mapping signature: `LMAP` + * 4-byte version number: 1 + * 4-byte length of the header section (including reserved entries but + excluding any NUL padding). + * 4-byte number of objects declared in this map file. + * 4-byte number of object formats declared in this map file. + * For each object format: + ** 4-byte format identifier (e.g., `sha1` for SHA-1) + ** 4-byte length in bytes of shortened object names (that is, prefixes of + the full object names). This is the shortest possible length needed to + make names in the shortened object name table unambiguous. + ** 8-byte integer, recording where tables relating to this format + are stored in this index file, as an offset from the beginning. + * 8-byte offset to the trailer from the beginning of this file. + * The remainder of the header section is reserved for future use. + Readers must ignore unrecognized data here. +- Zero or more NUL bytes. These are used to improve the alignment of the + 4-byte quantities below. +- Tables for the first object format: + * A sorted table of shortened object names. These are prefixes of the names + of all objects in this file, packed together to reduce the cache footprint + of the binary search for a specific object name. + * A sorted table of full object names. + * A table of 4-byte metadata values. +- Zero or more NUL bytes. +- Tables for subsequent object formats: + * A sorted table of shortened object names. These are prefixes of the names + of all objects in this file, packed together without offset values to + reduce the cache footprint of the binary search for a specific object name. + * A table of full object names in the order specified by the first object format. + * A table of 4-byte values mapping object name order to the order of the + first object format. For an object in the table of sorted shortened object + names, the value at the corresponding index in this table is the index in + the previous table for that same object. + * Zero or more NUL bytes. +- The trailer consists of the following: + * Hash checksum of all of the above using the main hash. + +The lower six bits of each metadata table contain a type field indicating the +reason that this object is stored: + +0:: + Reserved. +1:: + This object is stored as a loose object in the repository. +2:: + This object is a shallow entry. The mapping refers to a shallow value + returned by a remote server. +3:: + This object is a submodule entry. The mapping refers to the commit stored + representing a submodule. + +Other data may be stored in this field in the future. Bits that are not used +must be zero. + GIT --- Part of the linkgit:git[1] suite @@ -1538,7 +1538,10 @@ CLAR_TEST_OBJS += $(UNIT_TEST_DIR)/unit-test.o UNIT_TEST_OBJS += $(UNIT_TEST_DIR)/test-lib.o +RUST_SOURCES += src/csum_file.rs +RUST_SOURCES += src/hash.rs RUST_SOURCES += src/lib.rs +RUST_SOURCES += src/loose.rs RUST_SOURCES += src/varint.rs GIT-VERSION-FILE: FORCE @@ -2973,7 +2976,7 @@ scalar$X: scalar.o GIT-LDFLAGS $(GITLIBS) $(LIB_FILE): $(LIB_OBJS) $(QUIET_AR)$(RM) $@ && $(AR) $(ARFLAGS) $@ $^ -$(RUST_LIB): Cargo.toml $(RUST_SOURCES) +$(RUST_LIB): Cargo.toml $(RUST_SOURCES) $(LIB_FILE) $(QUIET_CARGO)cargo build $(CARGO_ARGS) .PHONY: rust diff --git a/build.rs b/build.rs new file mode 100644 index 0000000000..3724b3a930 --- /dev/null +++ b/build.rs @@ -0,0 +1,17 @@ +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation: version 2 of the License, dated June 1991. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, see <https://www.gnu.org/licenses/>. + +fn main() { + println!("cargo:rustc-link-search=."); + println!("cargo:rustc-link-lib=git"); + println!("cargo:rustc-link-lib=z"); +} diff --git a/csum-file.c b/csum-file.c index 6e21e3cac8..3d3047c776 100644 --- a/csum-file.c +++ b/csum-file.c @@ -110,7 +110,7 @@ void discard_hashfile(struct hashfile *f) free_hashfile(f); } -void hashwrite(struct hashfile *f, const void *buf, unsigned int count) +void hashwrite(struct hashfile *f, const void *buf, uint32_t count) { while (count) { unsigned left = f->buffer_len - f->offset; diff --git a/csum-file.h b/csum-file.h index 07ae11024a..ecce9d27b0 100644 --- a/csum-file.h +++ b/csum-file.h @@ -63,7 +63,7 @@ void free_hashfile(struct hashfile *f); */ int finalize_hashfile(struct hashfile *, unsigned char *, enum fsync_component, unsigned int); void discard_hashfile(struct hashfile *); -void hashwrite(struct hashfile *, const void *, unsigned int); +void hashwrite(struct hashfile *, const void *, uint32_t); void hashflush(struct hashfile *f); void crc32_begin(struct hashfile *); uint32_t crc32_end(struct hashfile *); @@ -241,7 +241,49 @@ const char *empty_tree_oid_hex(const struct git_hash_algo *algop) return oid_to_hex_r(buf, algop->empty_tree); } -int hash_algo_by_name(const char *name) +const struct git_hash_algo *hash_algo_ptr_by_number(uint32_t algo) +{ + if (algo >= GIT_HASH_NALGOS) + return NULL; + return &hash_algos[algo]; +} + +struct git_hash_ctx *git_hash_alloc(void) +{ + return xmalloc(sizeof(struct git_hash_ctx)); +} + +void git_hash_free(struct git_hash_ctx *ctx) +{ + free(ctx); +} + +void git_hash_init(struct git_hash_ctx *ctx, const struct git_hash_algo *algop) +{ + algop->init_fn(ctx); +} + +void git_hash_clone(struct git_hash_ctx *dst, const struct git_hash_ctx *src) +{ + src->algop->clone_fn(dst, src); +} + +void git_hash_update(struct git_hash_ctx *ctx, const void *in, size_t len) +{ + ctx->algop->update_fn(ctx, in, len); +} + +void git_hash_final(unsigned char *hash, struct git_hash_ctx *ctx) +{ + ctx->algop->final_fn(hash, ctx); +} + +void git_hash_final_oid(struct object_id *oid, struct git_hash_ctx *ctx) +{ + ctx->algop->final_oid_fn(oid, ctx); +} + +uint32_t hash_algo_by_name(const char *name) { if (!name) return GIT_HASH_UNKNOWN; @@ -251,7 +293,7 @@ int hash_algo_by_name(const char *name) return GIT_HASH_UNKNOWN; } -int hash_algo_by_id(uint32_t format_id) +uint32_t hash_algo_by_id(uint32_t format_id) { for (size_t i = 1; i < GIT_HASH_NALGOS; i++) if (format_id == hash_algos[i].format_id) @@ -259,7 +301,7 @@ int hash_algo_by_id(uint32_t format_id) return GIT_HASH_UNKNOWN; } -int hash_algo_by_length(size_t len) +uint32_t hash_algo_by_length(size_t len) { for (size_t i = 1; i < GIT_HASH_NALGOS; i++) if (len == hash_algos[i].rawsz) @@ -211,7 +211,7 @@ static inline void git_SHA256_Clone(git_SHA256_CTX *dst, const git_SHA256_CTX *s struct object_id { unsigned char hash[GIT_MAX_RAWSZ]; - int algo; /* XXX requires 4-byte alignment */ + uint32_t algo; /* XXX requires 4-byte alignment */ }; #define GET_OID_QUIETLY 01 @@ -320,37 +320,25 @@ struct git_hash_algo { }; extern const struct git_hash_algo hash_algos[GIT_HASH_NALGOS]; -static inline void git_hash_clone(struct git_hash_ctx *dst, const struct git_hash_ctx *src) -{ - src->algop->clone_fn(dst, src); -} - -static inline void git_hash_update(struct git_hash_ctx *ctx, const void *in, size_t len) -{ - ctx->algop->update_fn(ctx, in, len); -} - -static inline void git_hash_final(unsigned char *hash, struct git_hash_ctx *ctx) -{ - ctx->algop->final_fn(hash, ctx); -} - -static inline void git_hash_final_oid(struct object_id *oid, struct git_hash_ctx *ctx) -{ - ctx->algop->final_oid_fn(oid, ctx); -} - +void git_hash_init(struct git_hash_ctx *ctx, const struct git_hash_algo *algop); +void git_hash_clone(struct git_hash_ctx *dst, const struct git_hash_ctx *src); +void git_hash_update(struct git_hash_ctx *ctx, const void *in, size_t len); +void git_hash_final(unsigned char *hash, struct git_hash_ctx *ctx); +void git_hash_final_oid(struct object_id *oid, struct git_hash_ctx *ctx); +const struct git_hash_algo *hash_algo_ptr_by_number(uint32_t algo); +struct git_hash_ctx *git_hash_alloc(void); +void git_hash_free(struct git_hash_ctx *ctx); /* * Return a GIT_HASH_* constant based on the name. Returns GIT_HASH_UNKNOWN if * the name doesn't match a known algorithm. */ -int hash_algo_by_name(const char *name); +uint32_t hash_algo_by_name(const char *name); /* Identical, except based on the format ID. */ -int hash_algo_by_id(uint32_t format_id); +uint32_t hash_algo_by_id(uint32_t format_id); /* Identical, except based on the length. */ -int hash_algo_by_length(size_t len); +uint32_t hash_algo_by_length(size_t len); /* Identical, except for a pointer to struct git_hash_algo. */ -static inline int hash_algo_by_ptr(const struct git_hash_algo *p) +static inline uint32_t hash_algo_by_ptr(const struct git_hash_algo *p) { size_t i; for (i = 0; i < GIT_HASH_NALGOS; i++) { diff --git a/object-file-convert.c b/object-file-convert.c index 7ab875afe6..f8dce94811 100644 --- a/object-file-convert.c +++ b/object-file-convert.c @@ -13,7 +13,7 @@ #include "gpg-interface.h" #include "object-file-convert.h" -int repo_oid_to_algop(struct repository *repo, const struct object_id *src, +int repo_oid_to_algop(struct repository *repo, const struct object_id *srcoid, const struct git_hash_algo *to, struct object_id *dest) { /* @@ -21,9 +21,17 @@ int repo_oid_to_algop(struct repository *repo, const struct object_id *src, * default hash algorithm for that object. */ const struct git_hash_algo *from = - src->algo ? &hash_algos[src->algo] : repo->hash_algo; + srcoid->algo ? &hash_algos[srcoid->algo] : repo->hash_algo; + struct object_id temp; + const struct object_id *src = srcoid; - if (from == to) { + if (!srcoid->algo) { + oidcpy(&temp, srcoid); + temp.algo = hash_algo_by_ptr(repo->hash_algo); + src = &temp; + } + + if (from == to || !to) { if (src != dest) oidcpy(dest, src); return 0; @@ -10,7 +10,7 @@ struct oidtree_iter_data { oidtree_iter fn; void *arg; size_t *last_nibble_at; - int algo; + uint32_t algo; uint8_t last_byte; }; diff --git a/repository.c b/repository.c index fec742dd8f..fb7b8bfe59 100644 --- a/repository.c +++ b/repository.c @@ -3,6 +3,7 @@ #include "repository.h" #include "odb.h" #include "config.h" +#include "gettext.h" #include "object.h" #include "lockfile.h" #include "path.h" @@ -38,7 +39,7 @@ struct repository *the_repository = &the_repo; static void set_default_hash_algo(struct repository *repo) { const char *hash_name; - int algo; + uint32_t algo; hash_name = getenv("GIT_TEST_DEFAULT_HASH_ALGO"); if (!hash_name) @@ -178,18 +179,23 @@ void repo_set_gitdir(struct repository *repo, repo->gitdir, "index"); } -void repo_set_hash_algo(struct repository *repo, int hash_algo) +void repo_set_hash_algo(struct repository *repo, uint32_t hash_algo) { repo->hash_algo = &hash_algos[hash_algo]; } -void repo_set_compat_hash_algo(struct repository *repo, int algo) +void repo_set_compat_hash_algo(struct repository *repo MAYBE_UNUSED, uint32_t algo) { +#ifdef WITH_RUST if (hash_algo_by_ptr(repo->hash_algo) == algo) BUG("hash_algo and compat_hash_algo match"); repo->compat_hash_algo = algo ? &hash_algos[algo] : NULL; if (repo->compat_hash_algo) repo_read_loose_object_map(repo); +#else + if (algo) + die(_("compatibility hash algorithm support requires Rust")); +#endif } void repo_set_ref_storage_format(struct repository *repo, diff --git a/repository.h b/repository.h index 0b045fd988..fd107a5e83 100644 --- a/repository.h +++ b/repository.h @@ -202,8 +202,8 @@ struct set_gitdir_args { void repo_set_gitdir(struct repository *repo, const char *root, const struct set_gitdir_args *extra_args); void repo_set_worktree(struct repository *repo, const char *path); -void repo_set_hash_algo(struct repository *repo, int algo); -void repo_set_compat_hash_algo(struct repository *repo, int compat_algo); +void repo_set_hash_algo(struct repository *repo, uint32_t algo); +void repo_set_compat_hash_algo(struct repository *repo, uint32_t compat_algo); void repo_set_ref_storage_format(struct repository *repo, enum ref_storage_format format); void initialize_repository(struct repository *repo); @@ -14,7 +14,7 @@ static int advertise_sid = -1; static int advertise_object_info = -1; -static int client_hash_algo = GIT_HASH_SHA1_LEGACY; +static uint32_t client_hash_algo = GIT_HASH_SHA1_LEGACY; static int always_advertise(struct repository *r UNUSED, struct strbuf *value UNUSED) diff --git a/src/csum_file.rs b/src/csum_file.rs new file mode 100644 index 0000000000..7f2c6c4fcb --- /dev/null +++ b/src/csum_file.rs @@ -0,0 +1,81 @@ +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation: version 2 of the License, dated June 1991. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, see <https://www.gnu.org/licenses/>. + +use crate::hash::{HashAlgorithm, GIT_MAX_RAWSZ}; +use std::ffi::CStr; +use std::io::{self, Write}; +use std::os::raw::c_void; + +/// A writer that can write files identified by their hash or containing a trailing hash. +pub struct HashFile { + ptr: *mut c_void, + algo: HashAlgorithm, +} + +impl HashFile { + /// Create a new HashFile. + /// + /// The hash used will be `algo`, its name should be in `name`, and an open file descriptor + /// pointing to that file should be in `fd`. + pub fn new(algo: HashAlgorithm, fd: i32, name: &CStr) -> HashFile { + HashFile { + ptr: unsafe { c::hashfd(algo.hash_algo_ptr(), fd, name.as_ptr()) }, + algo, + } + } + + /// Finalize this HashFile instance. + /// + /// Returns the hash computed over the data. + pub fn finalize(self, component: u32, flags: u32) -> Vec<u8> { + let mut result = vec![0u8; GIT_MAX_RAWSZ]; + unsafe { c::finalize_hashfile(self.ptr, result.as_mut_ptr(), component, flags) }; + result.truncate(self.algo.raw_len()); + result + } +} + +impl Write for HashFile { + fn write(&mut self, data: &[u8]) -> io::Result<usize> { + for chunk in data.chunks(u32::MAX as usize) { + unsafe { + c::hashwrite( + self.ptr, + chunk.as_ptr() as *const c_void, + chunk.len() as u32, + ) + }; + } + Ok(data.len()) + } + + fn flush(&mut self) -> io::Result<()> { + unsafe { c::hashflush(self.ptr) }; + Ok(()) + } +} + +pub mod c { + use std::os::raw::{c_char, c_int, c_void}; + + extern "C" { + pub fn hashfd(algop: *const c_void, fd: i32, name: *const c_char) -> *mut c_void; + pub fn hashwrite(f: *mut c_void, data: *const c_void, len: u32); + pub fn hashflush(f: *mut c_void); + pub fn finalize_hashfile( + f: *mut c_void, + data: *mut u8, + component: u32, + flags: u32, + ) -> c_int; + } +} diff --git a/src/hash.rs b/src/hash.rs new file mode 100644 index 0000000000..dea2998de4 --- /dev/null +++ b/src/hash.rs @@ -0,0 +1,466 @@ +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation: version 2 of the License, dated June 1991. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, see <https://www.gnu.org/licenses/>. + +use std::error::Error; +use std::fmt::{self, Debug, Display}; +use std::io::{self, Write}; +use std::os::raw::c_void; + +pub const GIT_MAX_RAWSZ: usize = 32; + +/// An error indicating an invalid hash algorithm. +/// +/// The contained `u32` is the same as the `algo` field in `ObjectID`. +#[derive(Debug, Copy, Clone)] +pub struct InvalidHashAlgorithm(pub u32); + +impl Display for InvalidHashAlgorithm { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "invalid hash algorithm {}", self.0) + } +} + +impl Error for InvalidHashAlgorithm {} + +/// A binary object ID. +#[repr(C)] +#[derive(Clone, Ord, PartialOrd, Eq, PartialEq)] +pub struct ObjectID { + pub hash: [u8; GIT_MAX_RAWSZ], + pub algo: u32, +} + +#[allow(dead_code)] +impl ObjectID { + /// Return a new object ID with the given algorithm and hash. + /// + /// `hash` must be exactly the proper length for `algo` and this function panics if it is not. + /// The extra internal storage of `hash`, if any, is zero filled. + pub fn new(algo: HashAlgorithm, hash: &[u8]) -> Self { + let mut data = [0u8; GIT_MAX_RAWSZ]; + // This verifies that the length of `hash` is correct. + data[0..algo.raw_len()].copy_from_slice(hash); + Self { + hash: data, + algo: algo as u32, + } + } + + /// Return the algorithm for this object ID. + /// + /// If the algorithm set internally is not valid, this function panics. + pub fn algo(&self) -> Result<HashAlgorithm, InvalidHashAlgorithm> { + HashAlgorithm::from_u32(self.algo).ok_or(InvalidHashAlgorithm(self.algo)) + } + + pub fn as_slice(&self) -> Result<&[u8], InvalidHashAlgorithm> { + match HashAlgorithm::from_u32(self.algo) { + Some(algo) => Ok(&self.hash[0..algo.raw_len()]), + None => Err(InvalidHashAlgorithm(self.algo)), + } + } + + pub fn as_mut_slice(&mut self) -> Result<&mut [u8], InvalidHashAlgorithm> { + match HashAlgorithm::from_u32(self.algo) { + Some(algo) => Ok(&mut self.hash[0..algo.raw_len()]), + None => Err(InvalidHashAlgorithm(self.algo)), + } + } +} + +impl Display for ObjectID { + /// Format this object ID as a hex object ID. + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let hash = self.as_slice().unwrap(); + for x in hash { + write!(f, "{:02x}", x)?; + } + Ok(()) + } +} + +impl Debug for ObjectID { + /// Format this object ID as a hex object ID with a colon and name appended to it. + /// + /// ``` + /// assert_eq!( + /// format!("{:?}", HashAlgorithm::SHA256.null_oid()), + /// "0000000000000000000000000000000000000000000000000000000000000000:sha256" + /// ); + /// ``` + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let hash = match self.as_slice() { + Ok(hash) => hash, + Err(_) => &self.hash, + }; + for x in hash { + write!(f, "{:02x}", x)?; + } + match self.algo() { + Ok(algo) => write!(f, ":{}", algo.name()), + Err(e) => write!(f, ":invalid-hash-algo-{}", e.0), + } + } +} + +/// A trait to implement hashing with a cryptographic algorithm. +pub trait CryptoDigest { + /// Return true if this digest is safe for use with untrusted data, false otherwise. + fn is_safe(&self) -> bool; + + /// Update the digest with the specified data. + fn update(&mut self, data: &[u8]); + + /// Return an object ID, consuming the hasher. + fn into_oid(self) -> ObjectID; + + /// Return a hash as a `Vec`, consuming the hasher. + fn into_vec(self) -> Vec<u8>; +} + +/// A structure to hash data with a cryptographic hash algorithm. +/// +/// Instances of this class are safe for use with untrusted data, provided Git has been compiled +/// with a collision-detecting implementation of SHA-1. +pub struct CryptoHasher { + algo: HashAlgorithm, + ctx: *mut c_void, +} + +impl CryptoHasher { + /// Create a new hasher with the algorithm specified with `algo`. + /// + /// This hasher is safe to use on untrusted data. If SHA-1 is selected and Git was compiled + /// with a collision-detecting implementation of SHA-1, then this function will use that + /// implementation and detect any attempts at a collision. + pub fn new(algo: HashAlgorithm) -> Self { + let ctx = unsafe { c::git_hash_alloc() }; + unsafe { c::git_hash_init(ctx, algo.hash_algo_ptr()) }; + Self { algo, ctx } + } +} + +impl CryptoDigest for CryptoHasher { + /// Return true if this digest is safe for use with untrusted data, false otherwise. + fn is_safe(&self) -> bool { + true + } + + /// Update the hasher with the specified data. + fn update(&mut self, data: &[u8]) { + unsafe { c::git_hash_update(self.ctx, data.as_ptr() as *const c_void, data.len()) }; + } + + /// Return an object ID, consuming the hasher. + fn into_oid(self) -> ObjectID { + let mut oid = ObjectID { + hash: [0u8; 32], + algo: self.algo as u32, + }; + unsafe { c::git_hash_final_oid(&mut oid as *mut ObjectID as *mut c_void, self.ctx) }; + oid + } + + /// Return a hash as a `Vec`, consuming the hasher. + fn into_vec(self) -> Vec<u8> { + let mut v = vec![0u8; self.algo.raw_len()]; + unsafe { c::git_hash_final(v.as_mut_ptr(), self.ctx) }; + v + } +} + +impl Clone for CryptoHasher { + fn clone(&self) -> Self { + let ctx = unsafe { c::git_hash_alloc() }; + unsafe { c::git_hash_clone(ctx, self.ctx) }; + Self { + algo: self.algo, + ctx, + } + } +} + +impl Drop for CryptoHasher { + fn drop(&mut self) { + unsafe { c::git_hash_free(self.ctx) }; + } +} + +impl Write for CryptoHasher { + fn write(&mut self, data: &[u8]) -> io::Result<usize> { + self.update(data); + Ok(data.len()) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +/// A hash algorithm, +#[repr(C)] +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +pub enum HashAlgorithm { + SHA1 = 1, + SHA256 = 2, +} + +#[allow(dead_code)] +impl HashAlgorithm { + const SHA1_NULL_OID: ObjectID = ObjectID { + hash: [0u8; 32], + algo: Self::SHA1 as u32, + }; + const SHA256_NULL_OID: ObjectID = ObjectID { + hash: [0u8; 32], + algo: Self::SHA256 as u32, + }; + + const SHA1_EMPTY_TREE: ObjectID = ObjectID { + hash: *b"\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + algo: Self::SHA1 as u32, + }; + const SHA256_EMPTY_TREE: ObjectID = ObjectID { + hash: *b"\x6e\xf1\x9b\x41\x22\x5c\x53\x69\xf1\xc1\x04\xd4\x5d\x8d\x85\xef\xa9\xb0\x57\xb5\x3b\x14\xb4\xb9\xb9\x39\xdd\x74\xde\xcc\x53\x21", + algo: Self::SHA256 as u32, + }; + + const SHA1_EMPTY_BLOB: ObjectID = ObjectID { + hash: *b"\xe6\x9d\xe2\x9b\xb2\xd1\xd6\x43\x4b\x8b\x29\xae\x77\x5a\xd8\xc2\xe4\x8c\x53\x91\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", + algo: Self::SHA1 as u32, + }; + const SHA256_EMPTY_BLOB: ObjectID = ObjectID { + hash: *b"\x47\x3a\x0f\x4c\x3b\xe8\xa9\x36\x81\xa2\x67\xe3\xb1\xe9\xa7\xdc\xda\x11\x85\x43\x6f\xe1\x41\xf7\x74\x91\x20\xa3\x03\x72\x18\x13", + algo: Self::SHA256 as u32, + }; + + /// Return a hash algorithm based on the internal integer ID used by Git. + /// + /// Returns `None` if the algorithm doesn't indicate a valid algorithm. + pub const fn from_u32(algo: u32) -> Option<HashAlgorithm> { + match algo { + 1 => Some(HashAlgorithm::SHA1), + 2 => Some(HashAlgorithm::SHA256), + _ => None, + } + } + + /// Return a hash algorithm based on the internal integer ID used by Git. + /// + /// Returns `None` if the algorithm doesn't indicate a valid algorithm. + pub const fn from_format_id(algo: u32) -> Option<HashAlgorithm> { + match algo { + 0x73686131 => Some(HashAlgorithm::SHA1), + 0x73323536 => Some(HashAlgorithm::SHA256), + _ => None, + } + } + + /// The name of this hash algorithm as a string suitable for the configuration file. + pub const fn name(self) -> &'static str { + match self { + HashAlgorithm::SHA1 => "sha1", + HashAlgorithm::SHA256 => "sha256", + } + } + + /// The format ID of this algorithm for binary formats. + /// + /// Note that when writing this to a data format, it should be written in big-endian format + /// explicitly. + pub const fn format_id(self) -> u32 { + match self { + HashAlgorithm::SHA1 => 0x73686131, + HashAlgorithm::SHA256 => 0x73323536, + } + } + + /// The length of binary object IDs in this algorithm in bytes. + pub const fn raw_len(self) -> usize { + match self { + HashAlgorithm::SHA1 => 20, + HashAlgorithm::SHA256 => 32, + } + } + + /// The length of object IDs in this algorithm in hexadecimal characters. + pub const fn hex_len(self) -> usize { + self.raw_len() * 2 + } + + /// The number of bytes which is processed by one iteration of this algorithm's compression + /// function. + pub const fn block_size(self) -> usize { + match self { + HashAlgorithm::SHA1 => 64, + HashAlgorithm::SHA256 => 64, + } + } + + /// The object ID representing the empty blob. + pub const fn empty_blob(self) -> &'static ObjectID { + match self { + HashAlgorithm::SHA1 => &Self::SHA1_EMPTY_BLOB, + HashAlgorithm::SHA256 => &Self::SHA256_EMPTY_BLOB, + } + } + + /// The object ID representing the empty tree. + pub const fn empty_tree(self) -> &'static ObjectID { + match self { + HashAlgorithm::SHA1 => &Self::SHA1_EMPTY_TREE, + HashAlgorithm::SHA256 => &Self::SHA256_EMPTY_TREE, + } + } + + /// The object ID which is all zeros. + pub const fn null_oid(self) -> &'static ObjectID { + match self { + HashAlgorithm::SHA1 => &Self::SHA1_NULL_OID, + HashAlgorithm::SHA256 => &Self::SHA256_NULL_OID, + } + } + + /// A pointer to the C `struct git_hash_algo` for interoperability with C. + pub fn hash_algo_ptr(self) -> *const c_void { + unsafe { c::hash_algo_ptr_by_number(self as u32) } + } + + /// Create a hasher for this algorithm. + pub fn hasher(self) -> CryptoHasher { + CryptoHasher::new(self) + } +} + +pub mod c { + use std::os::raw::c_void; + + extern "C" { + pub fn hash_algo_ptr_by_number(n: u32) -> *const c_void; + pub fn unsafe_hash_algo(algop: *const c_void) -> *const c_void; + pub fn git_hash_alloc() -> *mut c_void; + pub fn git_hash_free(ctx: *mut c_void); + pub fn git_hash_init(dst: *mut c_void, algop: *const c_void); + pub fn git_hash_clone(dst: *mut c_void, src: *const c_void); + pub fn git_hash_update(ctx: *mut c_void, inp: *const c_void, len: usize); + pub fn git_hash_final(hash: *mut u8, ctx: *mut c_void); + pub fn git_hash_final_oid(hash: *mut c_void, ctx: *mut c_void); + } +} + +#[cfg(test)] +mod tests { + use super::{CryptoDigest, HashAlgorithm, ObjectID}; + use std::io::Write; + + fn all_algos() -> &'static [HashAlgorithm] { + &[HashAlgorithm::SHA1, HashAlgorithm::SHA256] + } + + #[test] + fn format_id_round_trips() { + for algo in all_algos() { + assert_eq!( + *algo, + HashAlgorithm::from_format_id(algo.format_id()).unwrap() + ); + } + } + + #[test] + fn offset_round_trips() { + for algo in all_algos() { + assert_eq!(*algo, HashAlgorithm::from_u32(*algo as u32).unwrap()); + } + } + + #[test] + fn slices_have_correct_length() { + for algo in all_algos() { + for oid in [algo.null_oid(), algo.empty_blob(), algo.empty_tree()] { + assert_eq!(oid.as_slice().unwrap().len(), algo.raw_len()); + } + } + } + + #[test] + fn object_ids_format_correctly() { + let entries = &[ + ( + HashAlgorithm::SHA1.null_oid(), + "0000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000:sha1", + ), + ( + HashAlgorithm::SHA1.empty_blob(), + "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", + "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391:sha1", + ), + ( + HashAlgorithm::SHA1.empty_tree(), + "4b825dc642cb6eb9a060e54bf8d69288fbee4904", + "4b825dc642cb6eb9a060e54bf8d69288fbee4904:sha1", + ), + ( + HashAlgorithm::SHA256.null_oid(), + "0000000000000000000000000000000000000000000000000000000000000000", + "0000000000000000000000000000000000000000000000000000000000000000:sha256", + ), + ( + HashAlgorithm::SHA256.empty_blob(), + "473a0f4c3be8a93681a267e3b1e9a7dcda1185436fe141f7749120a303721813", + "473a0f4c3be8a93681a267e3b1e9a7dcda1185436fe141f7749120a303721813:sha256", + ), + ( + HashAlgorithm::SHA256.empty_tree(), + "6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321", + "6ef19b41225c5369f1c104d45d8d85efa9b057b53b14b4b9b939dd74decc5321:sha256", + ), + ]; + for (oid, display, debug) in entries { + assert_eq!(format!("{}", oid), *display); + assert_eq!(format!("{:?}", oid), *debug); + } + } + + #[test] + fn hasher_works_correctly() { + for algo in all_algos() { + let tests: &[(&[u8], &ObjectID)] = &[ + (b"blob 0\0", algo.empty_blob()), + (b"tree 0\0", algo.empty_tree()), + ]; + for (data, oid) in tests { + let mut h = algo.hasher(); + assert!(h.is_safe()); + // Test that this works incrementally. + h.update(&data[0..2]); + h.update(&data[2..]); + + let h2 = h.clone(); + + let actual_oid = h.into_oid(); + assert_eq!(**oid, actual_oid); + + let v = h2.into_vec(); + assert_eq!((*oid).as_slice().unwrap(), &v); + + let mut h = algo.hasher(); + h.write_all(&data[0..2]).unwrap(); + h.write_all(&data[2..]).unwrap(); + + let actual_oid = h.into_oid(); + assert_eq!(**oid, actual_oid); + } + } + } +} diff --git a/src/lib.rs b/src/lib.rs index 9da70d8b57..0c598298b1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1 +1,4 @@ +pub mod csum_file; +pub mod hash; +pub mod loose; pub mod varint; diff --git a/src/loose.rs b/src/loose.rs new file mode 100644 index 0000000000..24accf9c33 --- /dev/null +++ b/src/loose.rs @@ -0,0 +1,913 @@ +// This program is free software; you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation: version 2 of the License, dated June 1991. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License along +// with this program; if not, see <https://www.gnu.org/licenses/>. + +use crate::hash::{HashAlgorithm, ObjectID, GIT_MAX_RAWSZ}; +use std::collections::BTreeMap; +use std::convert::TryInto; +use std::io::{self, Write}; + +/// The type of object stored in the map. +/// +/// If this value is `Reserved`, then it is never written to disk and is used primarily to store +/// certain hard-coded objects, like the empty tree, empty blob, or null object ID. +/// +/// If this value is `LooseObject`, then this represents a loose object. `Shallow` represents a +/// shallow commit, its parent, or its tree. `Submodule` represents a submodule commit. +#[repr(C)] +#[derive(Debug, Clone, Copy, Ord, PartialOrd, Eq, PartialEq)] +pub enum MapType { + Reserved = 0, + LooseObject = 1, + Shallow = 2, + Submodule = 3, +} + +impl MapType { + pub fn from_u32(n: u32) -> Option<MapType> { + match n { + 0 => Some(Self::Reserved), + 1 => Some(Self::LooseObject), + 2 => Some(Self::Shallow), + 3 => Some(Self::Submodule), + _ => None, + } + } +} + +/// The value of an object stored in a `ObjectMemoryMap`. +/// +/// This keeps the object ID to which the key is mapped and its kind together. +struct MappedObject { + oid: ObjectID, + kind: MapType, +} + +/// Memory storage for a loose object. +struct ObjectMemoryMap { + to_compat: BTreeMap<ObjectID, MappedObject>, + to_storage: BTreeMap<ObjectID, MappedObject>, + compat: HashAlgorithm, + storage: HashAlgorithm, +} + +impl ObjectMemoryMap { + /// Create a new `ObjectMemoryMap`. + /// + /// The storage and compatibility `HashAlgorithm` instances are used to store the object IDs in + /// the correct map. + fn new(storage: HashAlgorithm, compat: HashAlgorithm) -> Self { + Self { + to_compat: BTreeMap::new(), + to_storage: BTreeMap::new(), + compat, + storage, + } + } + + fn len(&self) -> usize { + self.to_compat.len() + } + + /// Write this map to an interface implementing `std::io::Write`. + fn write<W: Write>(&self, wrtr: W) -> io::Result<()> { + const VERSION_NUMBER: u32 = 1; + const NUM_OBJECT_FORMATS: u32 = 2; + const PADDING: [u8; 4] = [0u8; 4]; + + let mut wrtr = wrtr; + let header_size: u32 = (4 * 5) + (4 + 4 + 8) * NUM_OBJECT_FORMATS + 8; + + wrtr.write_all(b"LMAP")?; + wrtr.write_all(&VERSION_NUMBER.to_be_bytes())?; + wrtr.write_all(&header_size.to_be_bytes())?; + wrtr.write_all(&(self.to_compat.len() as u32).to_be_bytes())?; + wrtr.write_all(&NUM_OBJECT_FORMATS.to_be_bytes())?; + + let storage_short_len = self.find_short_name_len(&self.to_compat, self.storage); + let compat_short_len = self.find_short_name_len(&self.to_storage, self.compat); + + let storage_npadding = Self::required_nul_padding(self.to_compat.len(), storage_short_len); + let compat_npadding = Self::required_nul_padding(self.to_compat.len(), compat_short_len); + + let mut offset: u64 = header_size as u64; + + for (algo, len, npadding) in &[ + (self.storage, storage_short_len, storage_npadding), + (self.compat, compat_short_len, compat_npadding), + ] { + wrtr.write_all(&algo.format_id().to_be_bytes())?; + wrtr.write_all(&(*len as u32).to_be_bytes())?; + + offset += *npadding; + wrtr.write_all(&offset.to_be_bytes())?; + + offset += self.to_compat.len() as u64 * (*len as u64 + algo.raw_len() as u64 + 4); + } + + wrtr.write_all(&offset.to_be_bytes())?; + + let order_map: BTreeMap<&ObjectID, usize> = self + .to_compat + .keys() + .enumerate() + .map(|(i, oid)| (oid, i)) + .collect(); + + wrtr.write_all(&PADDING[0..storage_npadding as usize])?; + for oid in self.to_compat.keys() { + wrtr.write_all(&oid.as_slice().unwrap()[0..storage_short_len])?; + } + for oid in self.to_compat.keys() { + wrtr.write_all(oid.as_slice().unwrap())?; + } + for meta in self.to_compat.values() { + wrtr.write_all(&(meta.kind as u32).to_be_bytes())?; + } + + wrtr.write_all(&PADDING[0..compat_npadding as usize])?; + for oid in self.to_storage.keys() { + wrtr.write_all(&oid.as_slice().unwrap()[0..compat_short_len])?; + } + for meta in self.to_compat.values() { + wrtr.write_all(meta.oid.as_slice().unwrap())?; + } + for meta in self.to_storage.values() { + wrtr.write_all(&(order_map[&meta.oid] as u32).to_be_bytes())?; + } + + Ok(()) + } + + fn required_nul_padding(nitems: usize, short_len: usize) -> u64 { + let shortened_table_len = nitems as u64 * short_len as u64; + let misalignment = shortened_table_len & 3; + // If the value is 0, return 0; otherwise, return the difference from 4. + (4 - misalignment) & 3 + } + + fn last_matching_offset(a: &ObjectID, b: &ObjectID, algop: HashAlgorithm) -> usize { + for i in 0..=algop.raw_len() { + if a.hash[i] != b.hash[i] { + return i; + } + } + algop.raw_len() + } + + fn find_short_name_len( + &self, + map: &BTreeMap<ObjectID, MappedObject>, + algop: HashAlgorithm, + ) -> usize { + if map.len() <= 1 { + return 1; + } + let mut len = 1; + let mut iter = map.keys(); + let mut cur = match iter.next() { + Some(cur) => cur, + None => return len, + }; + for item in iter { + let offset = Self::last_matching_offset(cur, item, algop); + if offset >= len { + len = offset + 1; + } + cur = item; + } + if len > algop.raw_len() { + algop.raw_len() + } else { + len + } + } +} + +struct ObjectFormatData { + data_off: usize, + shortened_len: usize, + full_off: usize, + mapping_off: Option<usize>, +} + +pub struct MmapedObjectMapIter<'a> { + offset: usize, + algos: Vec<HashAlgorithm>, + source: &'a MmapedObjectMap<'a>, +} + +impl<'a> Iterator for MmapedObjectMapIter<'a> { + type Item = Vec<ObjectID>; + + fn next(&mut self) -> Option<Self::Item> { + if self.offset >= self.source.nitems { + return None; + } + let offset = self.offset; + self.offset += 1; + let v: Vec<ObjectID> = self + .algos + .iter() + .cloned() + .filter_map(|algo| self.source.oid_from_offset(offset, algo)) + .collect(); + if v.len() != self.algos.len() { + return None; + } + Some(v) + } +} + +#[allow(dead_code)] +pub struct MmapedObjectMap<'a> { + memory: &'a [u8], + nitems: usize, + meta_off: usize, + obj_formats: BTreeMap<HashAlgorithm, ObjectFormatData>, + main_algo: HashAlgorithm, +} + +#[derive(Debug)] +#[allow(dead_code)] +enum MmapedParseError { + HeaderTooSmall, + InvalidSignature, + InvalidVersion, + UnknownAlgorithm, + OffsetTooLarge, + TooFewObjectFormats, + UnalignedData, + InvalidTrailerOffset, +} + +#[allow(dead_code)] +impl<'a> MmapedObjectMap<'a> { + fn new( + slice: &'a [u8], + hash_algo: HashAlgorithm, + ) -> Result<MmapedObjectMap<'a>, MmapedParseError> { + let object_format_header_size = 4 + 4 + 8; + let trailer_offset_size = 8; + let header_size: usize = + 4 + 4 + 4 + 4 + 4 + object_format_header_size * 2 + trailer_offset_size; + if slice.len() < header_size { + return Err(MmapedParseError::HeaderTooSmall); + } + if slice[0..4] != *b"LMAP" { + return Err(MmapedParseError::InvalidSignature); + } + if Self::u32_at_offset(slice, 4) != 1 { + return Err(MmapedParseError::InvalidVersion); + } + let _ = Self::u32_at_offset(slice, 8) as usize; + let nitems = Self::u32_at_offset(slice, 12) as usize; + let nobj_formats = Self::u32_at_offset(slice, 16) as usize; + if nobj_formats < 2 { + return Err(MmapedParseError::TooFewObjectFormats); + } + let mut offset = 20; + let mut meta_off = None; + let mut data = BTreeMap::new(); + for i in 0..nobj_formats { + if offset + object_format_header_size + trailer_offset_size > slice.len() { + return Err(MmapedParseError::HeaderTooSmall); + } + let format_id = Self::u32_at_offset(slice, offset); + let shortened_len = Self::u32_at_offset(slice, offset + 4) as usize; + let data_off = Self::u64_at_offset(slice, offset + 8); + + let algo = HashAlgorithm::from_format_id(format_id) + .ok_or(MmapedParseError::UnknownAlgorithm)?; + let data_off: usize = data_off + .try_into() + .map_err(|_| MmapedParseError::OffsetTooLarge)?; + + // Every object format must have these entries. + let shortened_table_len = shortened_len + .checked_mul(nitems) + .ok_or(MmapedParseError::OffsetTooLarge)?; + let full_off = data_off + .checked_add(shortened_table_len) + .ok_or(MmapedParseError::OffsetTooLarge)?; + Self::verify_aligned(full_off)?; + Self::verify_valid(slice, full_off as u64)?; + + let full_length = algo + .raw_len() + .checked_mul(nitems) + .ok_or(MmapedParseError::OffsetTooLarge)?; + let off = full_length + .checked_add(full_off) + .ok_or(MmapedParseError::OffsetTooLarge)?; + Self::verify_aligned(off)?; + Self::verify_valid(slice, off as u64)?; + + // This is for the metadata for the first object format and for the order mapping for + // other object formats. + let meta_size = nitems + .checked_mul(4) + .ok_or(MmapedParseError::OffsetTooLarge)?; + let meta_end = off + .checked_add(meta_size) + .ok_or(MmapedParseError::OffsetTooLarge)?; + Self::verify_valid(slice, meta_end as u64)?; + + let mut mapping_off = None; + if i == 0 { + meta_off = Some(off); + } else { + mapping_off = Some(off); + } + + data.insert( + algo, + ObjectFormatData { + data_off, + shortened_len, + full_off, + mapping_off, + }, + ); + offset += object_format_header_size; + } + let trailer = Self::u64_at_offset(slice, offset); + Self::verify_aligned(trailer as usize)?; + Self::verify_valid(slice, trailer)?; + let end = trailer + .checked_add(hash_algo.raw_len() as u64) + .ok_or(MmapedParseError::OffsetTooLarge)?; + if end != slice.len() as u64 { + return Err(MmapedParseError::InvalidTrailerOffset); + } + match meta_off { + Some(meta_off) => Ok(MmapedObjectMap { + memory: slice, + nitems, + meta_off, + obj_formats: data, + main_algo: hash_algo, + }), + None => Err(MmapedParseError::TooFewObjectFormats), + } + } + + fn iter(&self) -> MmapedObjectMapIter<'_> { + let mut algos = Vec::with_capacity(self.obj_formats.len()); + algos.push(self.main_algo); + for algo in self.obj_formats.keys().cloned() { + if algo != self.main_algo { + algos.push(algo); + } + } + MmapedObjectMapIter { + offset: 0, + algos, + source: self, + } + } + + /// Treats `sl` as if it were a set of slices of `wanted.len()` bytes, and searches for + /// `wanted` within it. + /// + /// If found, returns the offset of the subslice in `sl`. + /// + /// ``` + /// let sl = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; + /// + /// assert_eq!(MmapedObjectMap::binary_search_slice(sl, &[2, 3]), Some(1)); + /// assert_eq!(MmapedObjectMap::binary_search_slice(sl, &[6, 7]), Some(4)); + /// assert_eq!(MmapedObjectMap::binary_search_slice(sl, &[1, 2]), None); + /// assert_eq!(MmapedObjectMap::binary_search_slice(sl, &[10, 20]), None); + /// ``` + fn binary_search_slice(sl: &[u8], wanted: &[u8]) -> Option<usize> { + let len = wanted.len(); + let res = sl.binary_search_by(|item| { + // We would like element_offset, but that is currently nightly only. Instead, do a + // pointer subtraction to find the index. + let index = unsafe { (item as *const u8).offset_from(sl.as_ptr()) } as usize; + // Now we have the index of this object. Round it down to the nearest full-sized + // chunk to find the actual offset where this starts. + let index = index - (index % len); + // Compute the comparison of that value instead, which will provide the expected + // result. + sl[index..index + wanted.len()].cmp(wanted) + }); + res.ok().map(|offset| offset / len) + } + + /// Look up `oid` in the map in order to convert it to `algo`. + /// + /// If this object is in the map, return the offset in the table for the main algorithm. + fn look_up_object(&self, oid: &ObjectID) -> Option<usize> { + let oid_algo = HashAlgorithm::from_u32(oid.algo)?; + let params = self.obj_formats.get(&oid_algo)?; + let short_table = + &self.memory[params.data_off..params.data_off + (params.shortened_len * self.nitems)]; + let index = Self::binary_search_slice( + short_table, + &oid.as_slice().unwrap()[0..params.shortened_len], + )?; + match params.mapping_off { + Some(from_off) => { + // oid is in a compatibility algorithm. Find the mapping index. + let mapped = Self::u32_at_offset(self.memory, from_off + index * 4) as usize; + if mapped >= self.nitems { + return None; + } + let oid_offset = params.full_off + mapped * oid_algo.raw_len(); + if self.memory[oid_offset..oid_offset + oid_algo.raw_len()] + != *oid.as_slice().unwrap() + { + return None; + } + Some(mapped) + } + None => { + // oid is in the main algorithm. Find the object ID in the main map to confirm + // it's correct. + let oid_offset = params.full_off + index * oid_algo.raw_len(); + if self.memory[oid_offset..oid_offset + oid_algo.raw_len()] + != *oid.as_slice().unwrap() + { + return None; + } + Some(index) + } + } + } + + #[allow(dead_code)] + fn map_object(&self, oid: &ObjectID, algo: HashAlgorithm) -> Option<MappedObject> { + let main = self.look_up_object(oid)?; + let meta = MapType::from_u32(Self::u32_at_offset(self.memory, self.meta_off + (main * 4)))?; + Some(MappedObject { + oid: self.oid_from_offset(main, algo)?, + kind: meta, + }) + } + + fn map_oid(&self, oid: &ObjectID, algo: HashAlgorithm) -> Option<ObjectID> { + if algo as u32 == oid.algo { + return Some(oid.clone()); + } + + let main = self.look_up_object(oid)?; + self.oid_from_offset(main, algo) + } + + fn oid_from_offset(&self, offset: usize, algo: HashAlgorithm) -> Option<ObjectID> { + let aparams = self.obj_formats.get(&algo)?; + + let mut hash = [0u8; GIT_MAX_RAWSZ]; + let len = algo.raw_len(); + let oid_off = aparams.full_off + (offset * len); + hash[0..len].copy_from_slice(&self.memory[oid_off..oid_off + len]); + Some(ObjectID { + hash, + algo: algo as u32, + }) + } + + fn u32_at_offset(slice: &[u8], offset: usize) -> u32 { + u32::from_be_bytes(slice[offset..offset + 4].try_into().unwrap()) + } + + fn u64_at_offset(slice: &[u8], offset: usize) -> u64 { + u64::from_be_bytes(slice[offset..offset + 8].try_into().unwrap()) + } + + fn verify_aligned(offset: usize) -> Result<(), MmapedParseError> { + if (offset & 3) != 0 { + return Err(MmapedParseError::UnalignedData); + } + Ok(()) + } + + fn verify_valid(slice: &[u8], offset: u64) -> Result<(), MmapedParseError> { + if offset >= slice.len() as u64 { + return Err(MmapedParseError::OffsetTooLarge); + } + Ok(()) + } +} + +/// A map for loose and other non-packed object IDs that maps between a storage and compatibility +/// mapping. +/// +/// In addition to the in-memory option, there is an optional batched storage, which can be used to +/// write objects to disk in an efficient way. +pub struct ObjectMap { + mem: ObjectMemoryMap, + batch: Option<ObjectMemoryMap>, +} + +impl ObjectMap { + /// Create a new `ObjectMap` with the given hash algorithms. + /// + /// This initializes the memory map to automatically map the empty tree, empty blob, and null + /// object ID. + pub fn new(storage: HashAlgorithm, compat: HashAlgorithm) -> Self { + let mut map = ObjectMemoryMap::new(storage, compat); + for (main, compat) in &[ + (storage.empty_tree(), compat.empty_tree()), + (storage.empty_blob(), compat.empty_blob()), + (storage.null_oid(), compat.null_oid()), + ] { + map.to_storage.insert( + (*compat).clone(), + MappedObject { + oid: (*main).clone(), + kind: MapType::Reserved, + }, + ); + map.to_compat.insert( + (*main).clone(), + MappedObject { + oid: (*compat).clone(), + kind: MapType::Reserved, + }, + ); + } + Self { + mem: map, + batch: None, + } + } + + pub fn hash_algo(&self) -> HashAlgorithm { + self.mem.storage + } + + /// Start a batch for efficient writing. + /// + /// If there is already a batch started, this does nothing and the existing batch is retained. + pub fn start_batch(&mut self) { + if self.batch.is_none() { + self.batch = Some(ObjectMemoryMap::new(self.mem.storage, self.mem.compat)); + } + } + + pub fn batch_len(&self) -> Option<usize> { + self.batch.as_ref().map(|b| b.len()) + } + + /// If a batch exists, write it to the writer. + pub fn finish_batch<W: Write>(&mut self, w: W) -> io::Result<()> { + if let Some(txn) = self.batch.take() { + txn.write(w)?; + } + Ok(()) + } + + /// If a batch exists, write it to the writer. + pub fn abort_batch(&mut self) { + self.batch = None; + } + + /// Return whether there is a batch already started. + /// + /// If you just want a batch to exist and don't care whether one has already been started, you + /// may simply call `start_batch` unconditionally. + pub fn has_batch(&self) -> bool { + self.batch.is_some() + } + + /// Insert an object into the map. + /// + /// If `write` is true and there is a batch started, write the object into the batch as well as + /// into the memory map. + pub fn insert(&mut self, oid1: &ObjectID, oid2: &ObjectID, kind: MapType, write: bool) { + let (compat_oid, storage_oid) = + if HashAlgorithm::from_u32(oid1.algo) == Some(self.mem.compat) { + (oid1, oid2) + } else { + (oid2, oid1) + }; + Self::insert_into(&mut self.mem, storage_oid, compat_oid, kind); + if write { + if let Some(ref mut batch) = self.batch { + Self::insert_into(batch, storage_oid, compat_oid, kind); + } + } + } + + fn insert_into( + map: &mut ObjectMemoryMap, + storage: &ObjectID, + compat: &ObjectID, + kind: MapType, + ) { + map.to_compat.insert( + storage.clone(), + MappedObject { + oid: compat.clone(), + kind, + }, + ); + map.to_storage.insert( + compat.clone(), + MappedObject { + oid: storage.clone(), + kind, + }, + ); + } + + #[allow(dead_code)] + fn map_object(&self, oid: &ObjectID, algo: HashAlgorithm) -> Option<&MappedObject> { + let map = if algo == self.mem.storage { + &self.mem.to_storage + } else { + &self.mem.to_compat + }; + map.get(oid) + } + + #[allow(dead_code)] + fn map_oid<'a, 'b: 'a>( + &'b self, + oid: &'a ObjectID, + algo: HashAlgorithm, + ) -> Option<&'a ObjectID> { + if algo as u32 == oid.algo { + return Some(oid); + } + let entry = self.map_object(oid, algo); + entry.map(|obj| &obj.oid) + } +} + +#[cfg(test)] +mod tests { + use super::{MapType, MmapedObjectMap, ObjectMap, ObjectMemoryMap}; + use crate::hash::{CryptoDigest, CryptoHasher, HashAlgorithm, ObjectID}; + use std::convert::TryInto; + use std::io::{self, Cursor, Write}; + + struct TrailingWriter { + curs: Cursor<Vec<u8>>, + hasher: CryptoHasher, + } + + impl TrailingWriter { + fn new() -> Self { + Self { + curs: Cursor::new(Vec::new()), + hasher: CryptoHasher::new(HashAlgorithm::SHA256), + } + } + + fn finalize(mut self) -> Vec<u8> { + let _ = self.hasher.flush(); + let mut v = self.curs.into_inner(); + v.extend(self.hasher.into_vec()); + v + } + } + + impl Write for TrailingWriter { + fn write(&mut self, data: &[u8]) -> io::Result<usize> { + self.hasher.write_all(data)?; + self.curs.write_all(data)?; + Ok(data.len()) + } + + fn flush(&mut self) -> io::Result<()> { + self.hasher.flush()?; + self.curs.flush()?; + Ok(()) + } + } + + fn sha1_oid(b: &[u8]) -> ObjectID { + assert_eq!(b.len(), 20); + let mut data = [0u8; 32]; + data[0..20].copy_from_slice(b); + ObjectID { + hash: data, + algo: HashAlgorithm::SHA1 as u32, + } + } + + fn sha256_oid(b: &[u8]) -> ObjectID { + assert_eq!(b.len(), 32); + ObjectID { + hash: b.try_into().unwrap(), + algo: HashAlgorithm::SHA256 as u32, + } + } + + #[allow(clippy::type_complexity)] + fn test_entries() -> &'static [(&'static str, &'static [u8], &'static [u8], MapType, bool)] { + // These are all example blobs containing the content in the first argument. + &[ + ("abc", b"\xf2\xba\x8f\x84\xab\x5c\x1b\xce\x84\xa7\xb4\x41\xcb\x19\x59\xcf\xc7\x09\x3b\x7f", b"\xc1\xcf\x6e\x46\x50\x77\x93\x0e\x88\xdc\x51\x36\x64\x1d\x40\x2f\x72\xa2\x29\xdd\xd9\x96\xf6\x27\xd6\x0e\x96\x39\xea\xba\x35\xa6", MapType::LooseObject, false), + ("def", b"\x0c\x00\x38\x32\xe7\xbf\xa9\xca\x8b\x5c\x20\x35\xc9\xbd\x68\x4a\x5f\x26\x23\xbc", b"\x8a\x90\x17\x26\x48\x4d\xb0\xf2\x27\x9f\x30\x8d\x58\x96\xd9\x6b\xf6\x3a\xd6\xde\x95\x7c\xa3\x8a\xdc\x33\x61\x68\x03\x6e\xf6\x63", MapType::Shallow, true), + ("ghi", b"\x45\xa8\x2e\x29\x5c\x52\x47\x31\x14\xc5\x7c\x18\xf4\xf5\x23\x68\xdf\x2a\x3c\xfd", b"\x6e\x47\x4c\x74\xf5\xd7\x78\x14\xc7\xf7\xf0\x7c\x37\x80\x07\x90\x53\x42\xaf\x42\x81\xe6\x86\x8d\x33\x46\x45\x4b\xb8\x63\xab\xc3", MapType::Submodule, false), + ("jkl", b"\x45\x32\x8c\x36\xff\x2e\x9b\x9b\x4e\x59\x2c\x84\x7d\x3f\x9a\x7f\xd9\xb3\xe7\x16", b"\xc3\xee\xf7\x54\xa2\x1e\xc6\x9d\x43\x75\xbe\x6f\x18\x47\x89\xa8\x11\x6f\xd9\x66\xfc\x67\xdc\x31\xd2\x11\x15\x42\xc8\xd5\xa0\xaf", MapType::LooseObject, true), + ] + } + + fn test_map(write_all: bool) -> Box<ObjectMap> { + let mut map = Box::new(ObjectMap::new(HashAlgorithm::SHA256, HashAlgorithm::SHA1)); + + map.start_batch(); + + for (_blob_content, sha1, sha256, kind, swap) in test_entries() { + let s256 = sha256_oid(sha256); + let s1 = sha1_oid(sha1); + let write = write_all || (*kind as u32 & 2) == 0; + if *swap { + // Insert the item into the batch arbitrarily based on the type. This tests that + // we can specify either order and we'll do the right thing. + map.insert(&s256, &s1, *kind, write); + } else { + map.insert(&s1, &s256, *kind, write); + } + } + + map + } + + #[test] + fn can_read_and_write_format() { + for full in &[true, false] { + let mut map = test_map(*full); + let mut wrtr = TrailingWriter::new(); + map.finish_batch(&mut wrtr).unwrap(); + + assert!(!map.has_batch()); + + let data = wrtr.finalize(); + MmapedObjectMap::new(&data, HashAlgorithm::SHA256).unwrap(); + } + } + + #[test] + fn looks_up_from_mmaped() { + let mut map = test_map(true); + let mut wrtr = TrailingWriter::new(); + map.finish_batch(&mut wrtr).unwrap(); + + assert!(!map.has_batch()); + + let data = wrtr.finalize(); + let entries = test_entries(); + let map = MmapedObjectMap::new(&data, HashAlgorithm::SHA256).unwrap(); + + for (_, sha1, sha256, kind, _) in entries { + let s256 = sha256_oid(sha256); + let s1 = sha1_oid(sha1); + + let res = map.map_object(&s256, HashAlgorithm::SHA1).unwrap(); + assert_eq!(res.oid, s1); + assert_eq!(res.kind, *kind); + let res = map.map_oid(&s256, HashAlgorithm::SHA1).unwrap(); + assert_eq!(res, s1); + + let res = map.map_object(&s256, HashAlgorithm::SHA256).unwrap(); + assert_eq!(res.oid, s256); + assert_eq!(res.kind, *kind); + let res = map.map_oid(&s256, HashAlgorithm::SHA256).unwrap(); + assert_eq!(res, s256); + + let res = map.map_object(&s1, HashAlgorithm::SHA256).unwrap(); + assert_eq!(res.oid, s256); + assert_eq!(res.kind, *kind); + let res = map.map_oid(&s1, HashAlgorithm::SHA256).unwrap(); + assert_eq!(res, s256); + + let res = map.map_object(&s1, HashAlgorithm::SHA1).unwrap(); + assert_eq!(res.oid, s1); + assert_eq!(res.kind, *kind); + let res = map.map_oid(&s1, HashAlgorithm::SHA1).unwrap(); + assert_eq!(res, s1); + } + + for octet in &[0x00u8, 0x6d, 0x6e, 0x8a, 0xff] { + let missing_oid = ObjectID { + hash: [*octet; 32], + algo: HashAlgorithm::SHA256 as u32, + }; + + assert!(map.map_object(&missing_oid, HashAlgorithm::SHA1).is_none()); + assert!(map.map_oid(&missing_oid, HashAlgorithm::SHA1).is_none()); + + assert_eq!( + map.map_oid(&missing_oid, HashAlgorithm::SHA256).unwrap(), + missing_oid + ); + } + } + + #[test] + fn binary_searches_slices_correctly() { + let sl = &[ + 0, 1, 2, 15, 14, 13, 18, 10, 2, 20, 20, 20, 21, 21, 0, 21, 21, 1, 21, 21, 21, 21, 21, + 22, 22, 23, 24, + ]; + + let expected: &[(&[u8], Option<usize>)] = &[ + (&[0, 1, 2], Some(0)), + (&[15, 14, 13], Some(1)), + (&[18, 10, 2], Some(2)), + (&[20, 20, 20], Some(3)), + (&[21, 21, 0], Some(4)), + (&[21, 21, 1], Some(5)), + (&[21, 21, 21], Some(6)), + (&[21, 21, 22], Some(7)), + (&[22, 23, 24], Some(8)), + (&[2, 15, 14], None), + (&[0, 21, 21], None), + (&[21, 21, 23], None), + (&[22, 22, 23], None), + (&[0xff, 0xff, 0xff], None), + (&[0, 0, 0], None), + ]; + + for (wanted, value) in expected { + assert_eq!(MmapedObjectMap::binary_search_slice(sl, wanted), *value); + } + } + + #[test] + fn looks_up_oid_correctly() { + let map = test_map(false); + let entries = test_entries(); + + let s256 = sha256_oid(entries[0].2); + let s1 = sha1_oid(entries[0].1); + + let missing_oid = ObjectID { + hash: [0xffu8; 32], + algo: HashAlgorithm::SHA256 as u32, + }; + + let res = map.map_object(&s256, HashAlgorithm::SHA1).unwrap(); + assert_eq!(res.oid, s1); + assert_eq!(res.kind, MapType::LooseObject); + let res = map.map_oid(&s256, HashAlgorithm::SHA1).unwrap(); + assert_eq!(*res, s1); + + let res = map.map_object(&s1, HashAlgorithm::SHA256).unwrap(); + assert_eq!(res.oid, s256); + assert_eq!(res.kind, MapType::LooseObject); + let res = map.map_oid(&s1, HashAlgorithm::SHA256).unwrap(); + assert_eq!(*res, s256); + + assert!(map.map_object(&missing_oid, HashAlgorithm::SHA1).is_none()); + assert!(map.map_oid(&missing_oid, HashAlgorithm::SHA1).is_none()); + + assert_eq!( + *map.map_oid(&missing_oid, HashAlgorithm::SHA256).unwrap(), + missing_oid + ); + } + + #[test] + fn looks_up_known_oids_correctly() { + let map = test_map(false); + + let funcs: &[&dyn Fn(HashAlgorithm) -> &'static ObjectID] = &[ + &|h: HashAlgorithm| h.empty_tree(), + &|h: HashAlgorithm| h.empty_blob(), + &|h: HashAlgorithm| h.null_oid(), + ]; + + for f in funcs { + let s256 = f(HashAlgorithm::SHA256); + let s1 = f(HashAlgorithm::SHA1); + + let res = map.map_object(s256, HashAlgorithm::SHA1).unwrap(); + assert_eq!(res.oid, *s1); + assert_eq!(res.kind, MapType::Reserved); + let res = map.map_oid(s256, HashAlgorithm::SHA1).unwrap(); + assert_eq!(*res, *s1); + + let res = map.map_object(s1, HashAlgorithm::SHA256).unwrap(); + assert_eq!(res.oid, *s256); + assert_eq!(res.kind, MapType::Reserved); + let res = map.map_oid(s1, HashAlgorithm::SHA256).unwrap(); + assert_eq!(*res, *s256); + } + } + + #[test] + fn nul_padding() { + assert_eq!(ObjectMemoryMap::required_nul_padding(1, 1), 3); + assert_eq!(ObjectMemoryMap::required_nul_padding(2, 1), 2); + assert_eq!(ObjectMemoryMap::required_nul_padding(3, 1), 1); + assert_eq!(ObjectMemoryMap::required_nul_padding(2, 2), 0); + + assert_eq!(ObjectMemoryMap::required_nul_padding(39, 3), 3); + } +} diff --git a/src/meson.build b/src/meson.build index 25b9ad5a14..45739957b4 100644 --- a/src/meson.build +++ b/src/meson.build @@ -1,5 +1,8 @@ libgit_rs_sources = [ + 'csum_file.rs', + 'hash.rs', 'lib.rs', + 'loose.rs', 'varint.rs', ] diff --git a/t/t1006-cat-file.sh b/t/t1006-cat-file.sh index 0eee3bb878..22ea305387 100755 --- a/t/t1006-cat-file.sh +++ b/t/t1006-cat-file.sh @@ -241,10 +241,16 @@ hello_content="Hello World" hello_size=$(strlen "$hello_content") hello_oid=$(echo_without_newline "$hello_content" | git hash-object --stdin) -test_expect_success "setup" ' +test_expect_success "setup part 1" ' git config core.repositoryformatversion 1 && - git config extensions.objectformat $test_hash_algo && - git config extensions.compatobjectformat $test_compat_hash_algo && + git config extensions.objectformat $test_hash_algo +' + +test_expect_success RUST 'compat setup' ' + git config extensions.compatobjectformat $test_compat_hash_algo +' + +test_expect_success 'setup part 2' ' echo_without_newline "$hello_content" > hello && git update-index --add hello && echo_without_newline "$hello_content" > "path with spaces" && @@ -273,9 +279,13 @@ run_blob_tests () { ' } -hello_compat_oid=$(git rev-parse --output-object-format=$test_compat_hash_algo $hello_oid) run_blob_tests $hello_oid -run_blob_tests $hello_compat_oid + +if test_have_prereq RUST +then + hello_compat_oid=$(git rev-parse --output-object-format=$test_compat_hash_algo $hello_oid) + run_blob_tests $hello_compat_oid +fi test_expect_success '--batch-check without %(rest) considers whole line' ' echo "$hello_oid blob $hello_size" >expect && @@ -286,62 +296,76 @@ test_expect_success '--batch-check without %(rest) considers whole line' ' ' tree_oid=$(git write-tree) -tree_compat_oid=$(git rev-parse --output-object-format=$test_compat_hash_algo $tree_oid) tree_size=$((2 * $(test_oid rawsz) + 13 + 24)) -tree_compat_size=$((2 * $(test_oid --hash=compat rawsz) + 13 + 24)) tree_pretty_content="100644 blob $hello_oid hello${LF}100755 blob $hello_oid path with spaces${LF}" -tree_compat_pretty_content="100644 blob $hello_compat_oid hello${LF}100755 blob $hello_compat_oid path with spaces${LF}" run_tests 'tree' $tree_oid "" $tree_size "" "$tree_pretty_content" -run_tests 'tree' $tree_compat_oid "" $tree_compat_size "" "$tree_compat_pretty_content" run_tests 'blob' "$tree_oid:hello" "100644" $hello_size "" "$hello_content" $hello_oid -run_tests 'blob' "$tree_compat_oid:hello" "100644" $hello_size "" "$hello_content" $hello_compat_oid run_tests 'blob' "$tree_oid:path with spaces" "100755" $hello_size "" "$hello_content" $hello_oid -run_tests 'blob' "$tree_compat_oid:path with spaces" "100755" $hello_size "" "$hello_content" $hello_compat_oid + +if test_have_prereq RUST +then + tree_compat_oid=$(git rev-parse --output-object-format=$test_compat_hash_algo $tree_oid) + tree_compat_size=$((2 * $(test_oid --hash=compat rawsz) + 13 + 24)) + tree_compat_pretty_content="100644 blob $hello_compat_oid hello${LF}100755 blob $hello_compat_oid path with spaces${LF}" + + run_tests 'tree' $tree_compat_oid "" $tree_compat_size "" "$tree_compat_pretty_content" + run_tests 'blob' "$tree_compat_oid:hello" "100644" $hello_size "" "$hello_content" $hello_compat_oid + run_tests 'blob' "$tree_compat_oid:path with spaces" "100755" $hello_size "" "$hello_content" $hello_compat_oid +fi commit_message="Initial commit" commit_oid=$(echo_without_newline "$commit_message" | git commit-tree $tree_oid) -commit_compat_oid=$(git rev-parse --output-object-format=$test_compat_hash_algo $commit_oid) commit_size=$(($(test_oid hexsz) + 137)) -commit_compat_size=$(($(test_oid --hash=compat hexsz) + 137)) commit_content="tree $tree_oid author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> $GIT_AUTHOR_DATE committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE $commit_message" -commit_compat_content="tree $tree_compat_oid +run_tests 'commit' $commit_oid "" $commit_size "$commit_content" "$commit_content" + +if test_have_prereq RUST +then + commit_compat_oid=$(git rev-parse --output-object-format=$test_compat_hash_algo $commit_oid) + commit_compat_size=$(($(test_oid --hash=compat hexsz) + 137)) + commit_compat_content="tree $tree_compat_oid author $GIT_AUTHOR_NAME <$GIT_AUTHOR_EMAIL> $GIT_AUTHOR_DATE committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE $commit_message" -run_tests 'commit' $commit_oid "" $commit_size "$commit_content" "$commit_content" -run_tests 'commit' $commit_compat_oid "" $commit_compat_size "$commit_compat_content" "$commit_compat_content" + run_tests 'commit' $commit_compat_oid "" $commit_compat_size "$commit_compat_content" "$commit_compat_content" +fi tag_header_without_oid="type blob tag hellotag tagger $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL>" tag_header_without_timestamp="object $hello_oid $tag_header_without_oid" -tag_compat_header_without_timestamp="object $hello_compat_oid -$tag_header_without_oid" tag_description="This is a tag" tag_content="$tag_header_without_timestamp 0 +0000 $tag_description" -tag_compat_content="$tag_compat_header_without_timestamp 0 +0000 - -$tag_description" tag_oid=$(echo_without_newline "$tag_content" | git hash-object -t tag --stdin -w) tag_size=$(strlen "$tag_content") -tag_compat_oid=$(git rev-parse --output-object-format=$test_compat_hash_algo $tag_oid) -tag_compat_size=$(strlen "$tag_compat_content") - run_tests 'tag' $tag_oid "" $tag_size "$tag_content" "$tag_content" -run_tests 'tag' $tag_compat_oid "" $tag_compat_size "$tag_compat_content" "$tag_compat_content" + +if test_have_prereq RUST +then + tag_compat_header_without_timestamp="object $hello_compat_oid +$tag_header_without_oid" + tag_compat_content="$tag_compat_header_without_timestamp 0 +0000 + +$tag_description" + + tag_compat_oid=$(git rev-parse --output-object-format=$test_compat_hash_algo $tag_oid) + tag_compat_size=$(strlen "$tag_compat_content") + + run_tests 'tag' $tag_compat_oid "" $tag_compat_size "$tag_compat_content" "$tag_compat_content" +fi test_expect_success "Reach a blob from a tag pointing to it" ' echo_without_newline "$hello_content" >expect && @@ -590,7 +614,8 @@ flush" } batch_tests $hello_oid $tree_oid $tree_size $commit_oid $commit_size "$commit_content" $tag_oid $tag_size "$tag_content" -batch_tests $hello_compat_oid $tree_compat_oid $tree_compat_size $commit_compat_oid $commit_compat_size "$commit_compat_content" $tag_compat_oid $tag_compat_size "$tag_compat_content" + +test_have_prereq RUST && batch_tests $hello_compat_oid $tree_compat_oid $tree_compat_size $commit_compat_oid $commit_compat_size "$commit_compat_content" $tag_compat_oid $tag_compat_size "$tag_compat_content" test_expect_success FUNNYNAMES 'setup with newline in input' ' @@ -1236,7 +1261,10 @@ test_expect_success 'batch-check with a submodule' ' test_unconfig extensions.compatobjectformat && printf "160000 commit $(test_oid deadbeef)\tsub\n" >tree-with-sub && tree=$(git mktree <tree-with-sub) && - test_config extensions.compatobjectformat $test_compat_hash_algo && + if test_have_prereq RUST + then + test_config extensions.compatobjectformat $test_compat_hash_algo + fi && git cat-file --batch-check >actual <<-EOF && $tree:sub diff --git a/t/t1016-compatObjectFormat.sh b/t/t1016-compatObjectFormat.sh index 0efce53f3a..92d48b96a1 100755 --- a/t/t1016-compatObjectFormat.sh +++ b/t/t1016-compatObjectFormat.sh @@ -8,6 +8,12 @@ test_description='Test how well compatObjectFormat works' . ./test-lib.sh . "$TEST_DIRECTORY"/lib-gpg.sh +if ! test_have_prereq RUST +then + skip_all='interoperability requires a Git built with Rust' + test_done +fi + # All of the follow variables must be defined in the environment: # GIT_AUTHOR_NAME # GIT_AUTHOR_EMAIL diff --git a/t/t1500-rev-parse.sh b/t/t1500-rev-parse.sh index 7739ab611b..98c5a772bd 100755 --- a/t/t1500-rev-parse.sh +++ b/t/t1500-rev-parse.sh @@ -208,7 +208,7 @@ test_expect_success 'rev-parse --show-object-format in repo' ' ' -test_expect_success 'rev-parse --show-object-format in repo with compat mode' ' +test_expect_success RUST 'rev-parse --show-object-format in repo with compat mode' ' mkdir repo && ( sane_unset GIT_DEFAULT_HASH && diff --git a/t/t9305-fast-import-signatures.sh b/t/t9305-fast-import-signatures.sh index 022dae02e4..a5406793f7 100755 --- a/t/t9305-fast-import-signatures.sh +++ b/t/t9305-fast-import-signatures.sh @@ -70,7 +70,7 @@ test_expect_success GPGSSH 'strip SSH signature with --signed-commits=strip' ' test_must_be_empty log ' -test_expect_success GPG 'setup a commit with dual OpenPGP signatures on its SHA-1 and SHA-256 formats' ' +test_expect_success RUST,GPG 'setup a commit with dual OpenPGP signatures on its SHA-1 and SHA-256 formats' ' # Create a signed SHA-256 commit git init --object-format=sha256 explicit-sha256 && git -C explicit-sha256 config extensions.compatObjectFormat sha1 && @@ -91,7 +91,7 @@ test_expect_success GPG 'setup a commit with dual OpenPGP signatures on its SHA- test_grep -E "^gpgsig-sha256 " out ' -test_expect_success GPG 'strip both OpenPGP signatures with --signed-commits=warn-strip' ' +test_expect_success RUST,GPG 'strip both OpenPGP signatures with --signed-commits=warn-strip' ' git -C explicit-sha256 fast-export --signed-commits=verbatim dual-signed >output && test_grep -E "^gpgsig sha1 openpgp" output && test_grep -E "^gpgsig sha256 openpgp" output && diff --git a/t/t9350-fast-export.sh b/t/t9350-fast-export.sh index 3d153a4805..784d68b6e5 100755 --- a/t/t9350-fast-export.sh +++ b/t/t9350-fast-export.sh @@ -972,7 +972,7 @@ test_expect_success 'fast-export handles --end-of-options' ' test_cmp expect actual ' -test_expect_success GPG 'setup a commit with dual signatures on its SHA-1 and SHA-256 formats' ' +test_expect_success GPG,RUST 'setup a commit with dual signatures on its SHA-1 and SHA-256 formats' ' # Create a signed SHA-256 commit git init --object-format=sha256 explicit-sha256 && git -C explicit-sha256 config extensions.compatObjectFormat sha1 && @@ -993,7 +993,7 @@ test_expect_success GPG 'setup a commit with dual signatures on its SHA-1 and SH test_grep -E "^gpgsig-sha256 " out ' -test_expect_success GPG 'export and import of doubly signed commit' ' +test_expect_success GPG,RUST 'export and import of doubly signed commit' ' git -C explicit-sha256 fast-export --signed-commits=verbatim dual-signed >output && test_grep -E "^gpgsig sha1 openpgp" output && test_grep -E "^gpgsig sha256 openpgp" output && diff --git a/t/test-lib.sh b/t/test-lib.sh index 0fb76f7d11..209498a0eb 100644 --- a/t/test-lib.sh +++ b/t/test-lib.sh @@ -1891,6 +1891,10 @@ test_lazy_prereq LONG_IS_64BIT ' test 8 -le "$(build_option sizeof-long)" ' +test_lazy_prereq RUST ' + test "$(build_option rust)" = enabled +' + test_lazy_prereq TIME_IS_64BIT 'test-tool date is64bit' test_lazy_prereq TIME_T_IS_64BIT 'test-tool date time_t-is64bit' diff --git a/write-or-die.h b/write-or-die.h index 65a5c42a47..ff0408bd84 100644 --- a/write-or-die.h +++ b/write-or-die.h @@ -21,6 +21,7 @@ enum fsync_component { FSYNC_COMPONENT_COMMIT_GRAPH = 1 << 3, FSYNC_COMPONENT_INDEX = 1 << 4, FSYNC_COMPONENT_REFERENCE = 1 << 5, + FSYNC_COMPONENT_OBJECT_MAP = 1 << 6, }; #define FSYNC_COMPONENTS_OBJECTS (FSYNC_COMPONENT_LOOSE_OBJECT | \ @@ -44,7 +45,8 @@ enum fsync_component { FSYNC_COMPONENT_PACK_METADATA | \ FSYNC_COMPONENT_COMMIT_GRAPH | \ FSYNC_COMPONENT_INDEX | \ - FSYNC_COMPONENT_REFERENCE) + FSYNC_COMPONENT_REFERENCE | \ + FSYNC_COMPONENT_OBJECT_MAP) #ifndef FSYNC_COMPONENTS_PLATFORM_DEFAULT #define FSYNC_COMPONENTS_PLATFORM_DEFAULT FSYNC_COMPONENTS_DEFAULT |
