Skip to content

Commit

Permalink
Merge branch 'mg/more-textconv'
Browse files Browse the repository at this point in the history
Make "git grep" and "git show" pay attention to --textconv when
dealing with blob objects.

* mg/more-textconv:
  grep: honor --textconv for the case rev:path
  grep: allow to use textconv filters
  t7008: demonstrate behavior of grep with textconv
  cat-file: do not die on --textconv without textconv filters
  show: honor --textconv for blobs
  diff_opt: track whether flags have been set explicitly
  t4030: demonstrate behavior of show with textconv
  • Loading branch information
gitster committed Oct 23, 2013
2 parents eeb8e83 + afa15f3 commit 4197361
Show file tree
Hide file tree
Showing 14 changed files with 237 additions and 57 deletions.
9 changes: 8 additions & 1 deletion Documentation/git-grep.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ git-grep - Print lines matching a pattern
SYNOPSIS
--------
[verse]
'git grep' [-a | --text] [-I] [-i | --ignore-case] [-w | --word-regexp]
'git grep' [-a | --text] [-I] [--textconv] [-i | --ignore-case] [-w | --word-regexp]
[-v | --invert-match] [-h|-H] [--full-name]
[-E | --extended-regexp] [-G | --basic-regexp]
[-P | --perl-regexp]
Expand Down Expand Up @@ -80,6 +80,13 @@ OPTIONS
--text::
Process binary files as if they were text.

--textconv::
Honor textconv filter settings.

--no-textconv::
Do not honor textconv filter settings.
This is the default.

-i::
--ignore-case::
Ignore case differences between the patterns and the
Expand Down
10 changes: 9 additions & 1 deletion Documentation/technical/api-diff.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ Calling sequence

* Call `diff_setup_done()`; this inspects the options set up so far for
internal consistency and make necessary tweaking to it (e.g. if
textual patch output was asked, recursive behaviour is turned on).
textual patch output was asked, recursive behaviour is turned on);
the callback set_default in diff_options can be used to tweak this more.

* As you find different pairs of files, call `diff_change()` to feed
modified files, `diff_addremove()` to feed created or deleted files,
Expand Down Expand Up @@ -115,6 +116,13 @@ Notable members are:
operation, but some do not have anything to do with the diffcore
library.

`touched_flags`::
Records whether a flag has been changed due to user request
(rather than just set/unset by default).

`set_default`::
Callback which allows tweaking the options in diff_setup_done().

BINARY, TEXT;;
Affects the way how a file that is seemingly binary is treated.

Expand Down
18 changes: 8 additions & 10 deletions builtin/cat-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
case 'e':
return !has_sha1_file(sha1);

case 'c':
if (!obj_context.path[0])
die("git cat-file --textconv %s: <object> must be <sha1:path>",
obj_name);

if (textconv_object(obj_context.path, obj_context.mode, sha1, 1, &buf, &size))
break;

case 'p':
type = sha1_object_info(sha1, NULL);
if (type < 0)
Expand All @@ -67,16 +75,6 @@ static int cat_one_file(int opt, const char *exp_type, const char *obj_name)
/* otherwise just spit out the data */
break;

case 'c':
if (!obj_context.path[0])
die("git cat-file --textconv %s: <object> must be <sha1:path>",
obj_name);

if (!textconv_object(obj_context.path, obj_context.mode, sha1, 1, &buf, &size))
die("git cat-file --textconv: unable to run textconv on %s",
obj_name);
break;

case 0:
if (type_from_string(exp_type) == OBJ_BLOB) {
unsigned char blob_sha1[20];
Expand Down
13 changes: 8 additions & 5 deletions builtin/grep.c
Original file line number Diff line number Diff line change
Expand Up @@ -458,10 +458,10 @@ static int grep_tree(struct grep_opt *opt, const struct pathspec *pathspec,
}

static int grep_object(struct grep_opt *opt, const struct pathspec *pathspec,
struct object *obj, const char *name)
struct object *obj, const char *name, struct object_context *oc)
{
if (obj->type == OBJ_BLOB)
return grep_sha1(opt, obj->sha1, name, 0, NULL);
return grep_sha1(opt, obj->sha1, name, 0, oc ? oc->path : NULL);
if (obj->type == OBJ_COMMIT || obj->type == OBJ_TREE) {
struct tree_desc tree;
void *data;
Expand Down Expand Up @@ -503,7 +503,7 @@ static int grep_objects(struct grep_opt *opt, const struct pathspec *pathspec,
for (i = 0; i < nr; i++) {
struct object *real_obj;
real_obj = deref_tag(list->objects[i].item, NULL, 0);
if (grep_object(opt, pathspec, real_obj, list->objects[i].name)) {
if (grep_object(opt, pathspec, real_obj, list->objects[i].name, list->objects[i].context)) {
hit = 1;
if (opt->status_only)
break;
Expand Down Expand Up @@ -658,6 +658,8 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
OPT_SET_INT('I', NULL, &opt.binary,
N_("don't match patterns in binary files"),
GREP_BINARY_NOMATCH),
OPT_BOOL(0, "textconv", &opt.allow_textconv,
N_("process binary files with textconv filters")),
{ OPTION_INTEGER, 0, "max-depth", &opt.max_depth, N_("depth"),
N_("descend at most <depth> levels"), PARSE_OPT_NONEG,
NULL, 1 },
Expand Down Expand Up @@ -817,12 +819,13 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
for (i = 0; i < argc; i++) {
const char *arg = argv[i];
unsigned char sha1[20];
struct object_context oc;
/* Is it a rev? */
if (!get_sha1(arg, sha1)) {
if (!get_sha1_with_context(arg, 0, sha1, &oc)) {
struct object *object = parse_object_or_die(sha1, arg);
if (!seen_dashdash)
verify_non_filename(prefix, arg);
add_object_array(object, arg, &list);
add_object_array_with_context(object, arg, &list, xmemdupz(&oc, sizeof(struct object_context)));
continue;
}
if (!strcmp(arg, "--")) {
Expand Down
26 changes: 23 additions & 3 deletions builtin/log.c
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ static void cmd_log_init_defaults(struct rev_info *rev)

if (default_date_mode)
rev->date_mode = parse_date_format(default_date_mode);
rev->diffopt.touched_flags = 0;
}

static void cmd_log_init_finish(int argc, const char **argv, const char *prefix,
Expand Down Expand Up @@ -436,10 +437,29 @@ static void show_tagger(char *buf, int len, struct rev_info *rev)
strbuf_release(&out);
}

static int show_blob_object(const unsigned char *sha1, struct rev_info *rev)
static int show_blob_object(const unsigned char *sha1, struct rev_info *rev, const char *obj_name)
{
unsigned char sha1c[20];
struct object_context obj_context;
char *buf;
unsigned long size;

fflush(stdout);
return stream_blob_to_fd(1, sha1, NULL, 0);
if (!DIFF_OPT_TOUCHED(&rev->diffopt, ALLOW_TEXTCONV) ||
!DIFF_OPT_TST(&rev->diffopt, ALLOW_TEXTCONV))
return stream_blob_to_fd(1, sha1, NULL, 0);

if (get_sha1_with_context(obj_name, 0, sha1c, &obj_context))
die("Not a valid object name %s", obj_name);
if (!obj_context.path[0] ||
!textconv_object(obj_context.path, obj_context.mode, sha1c, 1, &buf, &size))
return stream_blob_to_fd(1, sha1, NULL, 0);

if (!buf)
die("git show %s: bad file", obj_name);

write_or_die(1, buf, size);
return 0;
}

static int show_tag_object(const unsigned char *sha1, struct rev_info *rev)
Expand Down Expand Up @@ -525,7 +545,7 @@ int cmd_show(int argc, const char **argv, const char *prefix)
const char *name = objects[i].name;
switch (o->type) {
case OBJ_BLOB:
ret = show_blob_object(o->sha1, NULL);
ret = show_blob_object(o->sha1, &rev, name);
break;
case OBJ_TAG: {
struct tag *t = (struct tag *)o;
Expand Down
3 changes: 3 additions & 0 deletions diff.c
Original file line number Diff line number Diff line change
Expand Up @@ -3219,6 +3219,9 @@ void diff_setup_done(struct diff_options *options)
{
int count = 0;

if (options->set_default)
options->set_default(options);

if (options->output_format & DIFF_FORMAT_NAME)
count++;
if (options->output_format & DIFF_FORMAT_NAME_STATUS)
Expand Down
8 changes: 6 additions & 2 deletions diff.h
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,9 @@ typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options *opt, void *data)
#define DIFF_OPT_PICKAXE_IGNORE_CASE (1 << 30)

#define DIFF_OPT_TST(opts, flag) ((opts)->flags & DIFF_OPT_##flag)
#define DIFF_OPT_SET(opts, flag) ((opts)->flags |= DIFF_OPT_##flag)
#define DIFF_OPT_CLR(opts, flag) ((opts)->flags &= ~DIFF_OPT_##flag)
#define DIFF_OPT_TOUCHED(opts, flag) ((opts)->touched_flags & DIFF_OPT_##flag)
#define DIFF_OPT_SET(opts, flag) (((opts)->flags |= DIFF_OPT_##flag),((opts)->touched_flags |= DIFF_OPT_##flag))
#define DIFF_OPT_CLR(opts, flag) (((opts)->flags &= ~DIFF_OPT_##flag),((opts)->touched_flags |= DIFF_OPT_##flag))
#define DIFF_XDL_TST(opts, flag) ((opts)->xdl_opts & XDF_##flag)
#define DIFF_XDL_SET(opts, flag) ((opts)->xdl_opts |= XDF_##flag)
#define DIFF_XDL_CLR(opts, flag) ((opts)->xdl_opts &= ~XDF_##flag)
Expand All @@ -109,6 +110,7 @@ struct diff_options {
const char *single_follow;
const char *a_prefix, *b_prefix;
unsigned flags;
unsigned touched_flags;

/* diff-filter bits */
unsigned int filter;
Expand Down Expand Up @@ -149,6 +151,8 @@ struct diff_options {
/* to support internal diff recursion by --follow hack*/
int found_follow;

void (*set_default)(struct diff_options *);

FILE *file;
int close_file;

Expand Down
100 changes: 86 additions & 14 deletions grep.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
#include "grep.h"
#include "userdiff.h"
#include "xdiff-interface.h"
#include "diff.h"
#include "diffcore.h"

static int grep_source_load(struct grep_source *gs);
static int grep_source_is_binary(struct grep_source *gs);
Expand Down Expand Up @@ -1322,6 +1324,58 @@ static void std_output(struct grep_opt *opt, const void *buf, size_t size)
fwrite(buf, size, 1, stdout);
}

static int fill_textconv_grep(struct userdiff_driver *driver,
struct grep_source *gs)
{
struct diff_filespec *df;
char *buf;
size_t size;

if (!driver || !driver->textconv)
return grep_source_load(gs);

/*
* The textconv interface is intimately tied to diff_filespecs, so we
* have to pretend to be one. If we could unify the grep_source
* and diff_filespec structs, this mess could just go away.
*/
df = alloc_filespec(gs->path);
switch (gs->type) {
case GREP_SOURCE_SHA1:
fill_filespec(df, gs->identifier, 1, 0100644);
break;
case GREP_SOURCE_FILE:
fill_filespec(df, null_sha1, 0, 0100644);
break;
default:
die("BUG: attempt to textconv something without a path?");
}

/*
* fill_textconv is not remotely thread-safe; it may load objects
* behind the scenes, and it modifies the global diff tempfile
* structure.
*/
grep_read_lock();
size = fill_textconv(driver, df, &buf);
grep_read_unlock();
free_filespec(df);

/*
* The normal fill_textconv usage by the diff machinery would just keep
* the textconv'd buf separate from the diff_filespec. But much of the
* grep code passes around a grep_source and assumes that its "buf"
* pointer is the beginning of the thing we are searching. So let's
* install our textconv'd version into the grep_source, taking care not
* to leak any existing buffer.
*/
grep_source_clear_data(gs);
gs->buf = buf;
gs->size = size;

return 0;
}

static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int collect_hits)
{
char *bol;
Expand All @@ -1332,6 +1386,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
unsigned count = 0;
int try_lookahead = 0;
int show_function = 0;
struct userdiff_driver *textconv = NULL;
enum grep_context ctx = GREP_CONTEXT_HEAD;
xdemitconf_t xecfg;

Expand All @@ -1353,27 +1408,44 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
}
opt->last_shown = 0;

switch (opt->binary) {
case GREP_BINARY_DEFAULT:
if (grep_source_is_binary(gs))
binary_match_only = 1;
break;
case GREP_BINARY_NOMATCH:
if (grep_source_is_binary(gs))
return 0; /* Assume unmatch */
break;
case GREP_BINARY_TEXT:
break;
default:
die("bug: unknown binary handling mode");
if (opt->allow_textconv) {
grep_source_load_driver(gs);
/*
* We might set up the shared textconv cache data here, which
* is not thread-safe.
*/
grep_attr_lock();
textconv = userdiff_get_textconv(gs->driver);
grep_attr_unlock();
}

/*
* We know the result of a textconv is text, so we only have to care
* about binary handling if we are not using it.
*/
if (!textconv) {
switch (opt->binary) {
case GREP_BINARY_DEFAULT:
if (grep_source_is_binary(gs))
binary_match_only = 1;
break;
case GREP_BINARY_NOMATCH:
if (grep_source_is_binary(gs))
return 0; /* Assume unmatch */
break;
case GREP_BINARY_TEXT:
break;
default:
die("bug: unknown binary handling mode");
}
}

memset(&xecfg, 0, sizeof(xecfg));
opt->priv = &xecfg;

try_lookahead = should_lookahead(opt);

if (grep_source_load(gs) < 0)
if (fill_textconv_grep(textconv, gs) < 0)
return 0;

bol = gs->buf;
Expand Down
1 change: 1 addition & 0 deletions grep.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,7 @@ struct grep_opt {
#define GREP_BINARY_NOMATCH 1
#define GREP_BINARY_TEXT 2
int binary;
int allow_textconv;
int extended;
int use_reflog_filter;
int pcre;
Expand Down
Loading

0 comments on commit 4197361

Please sign in to comment.