Re: kernel.org and GIT tree rebuilding

From: Nicolas Pitre <nico@cam.org>
Date: 2005-06-29 15:16:29
On Tue, 28 Jun 2005, Nicolas Pitre wrote:

> OK... See below the cleanups I merged from my version on top of yours:

Of course by the time I sent the above you already rewrote the ting to 
be streamable.

So again :-) see below the cleanups I merged from my version on top of 
yours:

 pack-objects.c   |   70 ++++++++++++++-----------------------------------------
 pack.h           |   17 ++++++++-----
 unpack-objects.c |   29 ++++++++++++----------
 3 files changed, 46 insertions(+), 70 deletions(-)

I also restored my original object header size ordering (little endian) 
for two reasons:

 - it is much simpler to generate and therefore allows for removing 
   quite some code

 - it allows for stable bit position which makes it much easier to look 
   at an hex dump of the binary data for manual debugging

Signed-off-by: Nicolas Pitre <nico@cam.org>

diff --git a/pack-objects.c b/pack-objects.c
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -34,7 +34,7 @@ static void *delta_against(void *buf, un
 	if (!otherbuf)
 		die("unable to read %s", sha1_to_hex(entry->delta->sha1));
         delta_buf = diff_delta(otherbuf, othersize,
-			       buf, size, &delta_size, ~0UL);
+			       buf, size, &delta_size, 0UL);
         if (!delta_buf || delta_size != entry->delta_size)
         	die("delta size changed");
         free(buf);
@@ -42,54 +42,13 @@ static void *delta_against(void *buf, un
 	return delta_buf;
 }
 
-/*
- * The per-object header is a pretty dense thing, which is
- *  - first byte: low four bits are "size", then three bits of "type",
- *    and the high bit is "size continues".
- *  - each byte afterwards: low seven bits are size continuation,
- *    with the high bit being "size continues"
- */
-static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
-{
-	int n = 1, i;
-	unsigned char c;
-
-	if (type < OBJ_COMMIT || type > OBJ_DELTA)
-		die("bad type %d", type);
-
-	/*
-	 * Shift the size up by 7 bits at a time,
-	 * until you get bits in the "high four".
-	 * That will be our beginning. We'll have
-	 * four size bits in 28..31, then groups
-	 * of seven in 21..27, 14..20, 7..13 and
-	 * finally 0..6.
-	 */
-	if (size) {
-		n = 5;
-		while (!(size & 0xfe000000)) {
-			size <<= 7;
-			n--;
-		}
-	}
-	c = (type << 4) | (size >> 28);
-	for (i = 1; i < n; i++) {
-		*hdr++ = c | 0x80;
-		c = (size >> 21) & 0x7f;
-		size <<= 7;
-	}
-	*hdr = c;
-	return n;
-}
-
 static unsigned long write_object(struct sha1file *f, struct object_entry *entry)
 {
 	unsigned long size;
 	char type[10];
 	void *buf = read_sha1_file(entry->sha1, type, &size);
-	unsigned char header[10];
+	char header[25];
 	unsigned hdrlen, datalen;
-	enum object_type obj_type;
 
 	if (!buf)
 		die("unable to read %s", sha1_to_hex(entry->sha1));
@@ -97,22 +56,31 @@ static unsigned long write_object(struct
 		die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
 
 	/*
-	 * The object header is a byte of 'type' followed by zero or
-	 * more bytes of length.  For deltas, the 20 bytes of delta sha1
-	 * follows that.
+	 * The object header first byte has its low 3 bits representing the
+	 * object type, the 4 upper bits indicating which of the following
+	 * bytes are used to build the object size.  For delta objects the
+	 * sha1 of the reference object is also appended.
 	 */
-	obj_type = entry->type;
 	if (entry->delta) {
+		header[0] = OBJ_DELTA;
 		buf = delta_against(buf, size, entry);
 		size = entry->delta_size;
-		obj_type = OBJ_DELTA;
+	} else
+		header[0] = entry->type;
+	header[0] |= size << 3;
+	hdrlen = 1;
+	datalen = size >> 4;
+	while (datalen) {
+		header[hdrlen - 1] |= 0x80;
+		header[hdrlen++] = datalen;
+		datalen >>= 7;
 	}
-	hdrlen = encode_header(obj_type, size, header);
-	sha1write(f, header, hdrlen);
 	if (entry->delta) {
-		sha1write(f, entry->delta, 20);
+		memcpy(header+hdrlen, entry->delta, 20);
 		hdrlen += 20;
 	}
+
+	sha1write(f, header, hdrlen);
 	datalen = sha1write_compressed(f, buf, size);
 	free(buf);
 	return hdrlen + datalen;
diff --git a/pack.h b/pack.h
--- a/pack.h
+++ b/pack.h
@@ -1,13 +1,18 @@
 #ifndef PACK_H
 #define PACK_H
 
+/*
+ * The packed object type is stored in the low 3 bits of a byte.
+ * The type value 0 is a reserved prefix if ever there is more than 7
+ * object types, or any future format extensions.
+ */
 enum object_type {
-	OBJ_NONE,
-	OBJ_COMMIT,
-	OBJ_TREE,
-	OBJ_BLOB,
-	OBJ_TAG,
-	OBJ_DELTA,
+	OBJ_EXT = 0,
+	OBJ_COMMIT = 1,
+	OBJ_TREE = 2,
+	OBJ_BLOB = 3,
+	OBJ_TAG = 4,
+	OBJ_DELTA = 7
 };
 
 /*
diff --git a/unpack-objects.c b/unpack-objects.c
--- a/unpack-objects.c
+++ b/unpack-objects.c
@@ -6,6 +6,14 @@
 static int dry_run;
 static const char unpack_usage[] = "git-unpack-objects < pack-file";
 
+static char *type_string[] = {
+	[OBJ_COMMIT]	= "commit",
+	[OBJ_TREE]	= "tree",
+	[OBJ_BLOB]	= "blob",
+	[OBJ_TAG]	= "tag",
+	[OBJ_DELTA]	= "delta"
+};
+
 /* We always read in 4kB chunks. */
 static unsigned char buffer[4096];
 static unsigned long offset, len, eof;
@@ -139,19 +147,11 @@ static void added_object(unsigned char *
 	}
 }
 
-static int unpack_non_delta_entry(enum object_type kind, unsigned long size)
+static int unpack_non_delta_entry(char *type, unsigned long size)
 {
 	void *buf = get_data(size);
 	unsigned char sha1[20];
-	char *type;
 
-	switch (kind) {
-	case OBJ_COMMIT: type = "commit"; break;
-	case OBJ_TREE:   type = "tree"; break;
-	case OBJ_BLOB:   type = "blob"; break;
-	case OBJ_TAG:    type = "tag"; break;
-	default: die("bad type %d", kind);
-	}
 	if (write_sha1_file(buf, size, type, sha1) < 0)
 		die("failed to write object");
 	added_object(sha1, type, buf, size);
@@ -184,26 +184,29 @@ static int unpack_delta_entry(unsigned l
 static void unpack_one(void)
 {
 	unsigned char *pack, c;
+	unsigned int i;
 	unsigned long size;
 	enum object_type type;
 
 	pack = fill(1);
 	c = *pack;
 	use(1);
-	type = (c >> 4) & 7;
-	size = (c & 15);
+	type = c & 0x07;
+	size = (c & ~0x80) >> 3;
+	i = 4;
 	while (c & 0x80) {
 		pack = fill(1);
 		c = *pack++;
 		use(1);
-		size = (size << 7) + (c & 0x7f);
+		size |= (c & ~0x80) << i;
+		i += 7;
 	}
 	switch (type) {
 	case OBJ_COMMIT:
 	case OBJ_TREE:
 	case OBJ_BLOB:
 	case OBJ_TAG:
-		unpack_non_delta_entry(type, size);
+		unpack_non_delta_entry(type_string[type], size);
 		return;
 	case OBJ_DELTA:
 		unpack_delta_entry(size);
-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Received on Wed Jun 29 15:17:17 2005

This archive was generated by hypermail 2.1.8 : 2005-06-29 15:17:22 EST