Skip to content

Commit

Permalink
archive-tar: stream large blobs to tar file
Browse files Browse the repository at this point in the history
t5000 verifies output while t1050 makes sure the command always
respects core.bigfilethreshold

Signed-off-by: Nguyễn Thái Ngọc Duy <[email protected]>
Signed-off-by: Junio C Hamano <[email protected]>
  • Loading branch information
pclouds authored and gitster committed May 3, 2012
1 parent 9cb513b commit 5544049
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 5 deletions.
56 changes: 51 additions & 5 deletions archive-tar.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "cache.h"
#include "tar.h"
#include "archive.h"
#include "streaming.h"
#include "run-command.h"

#define RECORDSIZE (512)
Expand All @@ -30,10 +31,9 @@ static void write_if_needed(void)
* queues up writes, so that all our write(2) calls write exactly one
* full block; pads writes to RECORDSIZE
*/
static void write_blocked(const void *data, unsigned long size)
static void do_write_blocked(const void *data, unsigned long size)
{
const char *buf = data;
unsigned long tail;

if (offset) {
unsigned long chunk = BLOCKSIZE - offset;
Expand All @@ -54,6 +54,11 @@ static void write_blocked(const void *data, unsigned long size)
memcpy(block + offset, buf, size);
offset += size;
}
}

static void finish_record(void)
{
unsigned long tail;
tail = offset % RECORDSIZE;
if (tail) {
memset(block + offset, 0, RECORDSIZE - tail);
Expand All @@ -62,6 +67,12 @@ static void write_blocked(const void *data, unsigned long size)
write_if_needed();
}

static void write_blocked(const void *data, unsigned long size)
{
do_write_blocked(data, size);
finish_record();
}

/*
* The end of tar archives is marked by 2*512 nul bytes and after that
* follows the rest of the block (if any).
Expand All @@ -77,6 +88,33 @@ static void write_trailer(void)
}
}

/*
* queues up writes, so that all our write(2) calls write exactly one
* full block; pads writes to RECORDSIZE
*/
static int stream_blocked(const unsigned char *sha1)
{
struct git_istream *st;
enum object_type type;
unsigned long sz;
char buf[BLOCKSIZE];
ssize_t readlen;

st = open_istream(sha1, &type, &sz, NULL);
if (!st)
return error("cannot stream blob %s", sha1_to_hex(sha1));
for (;;) {
readlen = read_istream(st, buf, sizeof(buf));
if (readlen <= 0)
break;
do_write_blocked(buf, readlen);
}
close_istream(st);
if (!readlen)
finish_record();
return readlen;
}

/*
* pax extended header records have the format "%u %s=%s\n". %u contains
* the size of the whole string (including the %u), the first %s is the
Expand Down Expand Up @@ -203,7 +241,11 @@ static int write_tar_entry(struct archiver_args *args,
} else
memcpy(header.name, path, pathlen);

if (S_ISLNK(mode) || S_ISREG(mode)) {
if (S_ISREG(mode) && !args->convert &&
sha1_object_info(sha1, &size) == OBJ_BLOB &&
size > big_file_threshold)
buffer = NULL;
else if (S_ISLNK(mode) || S_ISREG(mode)) {
enum object_type type;
buffer = sha1_file_to_archive(args, path, sha1, old_mode, &type, &size);
if (!buffer)
Expand Down Expand Up @@ -235,8 +277,12 @@ static int write_tar_entry(struct archiver_args *args,
}
strbuf_release(&ext_header);
write_blocked(&header, sizeof(header));
if (S_ISREG(mode) && buffer && size > 0)
write_blocked(buffer, size);
if (S_ISREG(mode) && size > 0) {
if (buffer)
write_blocked(buffer, size);
else
err = stream_blocked(sha1);
}
free(buffer);
return err;
}
Expand Down
4 changes: 4 additions & 0 deletions t/t1050-large.sh
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,8 @@ test_expect_success 'repack' '
git repack -ad
'

test_expect_success 'tar achiving' '
git archive --format=tar HEAD >/dev/null
'

test_done
6 changes: 6 additions & 0 deletions t/t5000-tar-tree.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@ test_expect_success \
'git archive vs. git tar-tree' \
'test_cmp b.tar b2.tar'

test_expect_success 'git archive on large files' '
test_config core.bigfilethreshold 1 &&
git archive HEAD >b3.tar &&
test_cmp b.tar b3.tar
'

test_expect_success \
'git archive in a bare repo' \
'(cd bare.git && git archive HEAD) >b3.tar'
Expand Down

0 comments on commit 5544049

Please sign in to comment.