/* GNU DIFF main routine. Copyright (C) 1988, 1989 Free Software Foundation, Inc. This file is part of GNU DIFF. GNU DIFF is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 1, or (at your option) any later version. GNU DIFF is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU DIFF; see the file COPYING. If not, write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ /* GNU DIFF was written by Mike Haertel, David Hayes, Richard Stallman and Len Tower. */ #define GDIFF_MAIN #include "regex.h" #include "diff.h" #include "getopt.h" /* Nonzero for -r: if comparing two directories, compare their common subdirectories recursively. */ int recursive; /* For debugging: don't do discard_confusing_lines. */ int no_discards; /* Return a string containing the command options with which diff was invoked. Spaces appear between what were separate ARGV-elements. There is a space at the beginning but none at the end. If there were no options, the result is an empty string. Arguments: OPTIONVEC, a vector containing separate ARGV-elements, and COUNT, the length of that vector. */ static char * option_list (optionvec, count) char **optionvec; /* Was `vector', but that collides on Alliant. */ int count; { int i; int length = 0; char *result; for (i = 0; i < count; i++) length += strlen (optionvec[i]) + 1; result = (char *) xmalloc (length + 1); result[0] = 0; for (i = 0; i < count; i++) { strcat (result, " "); strcat (result, optionvec[i]); } return result; } /* The numbers 129 and 130 that appear in the fourth element for the context and unidiff entries are used as a way of telling the big switch in `main' how to process those options. */ static struct option longopts[] = { {"ignore-blank-lines", 0, 0, 'B'}, {"context", 2, 0, 129}, {"ifdef", 1, 0, 'D'}, {"show-function-line", 1, 0, 'F'}, {"speed-large-files", 0, 0, 'H'}, {"ignore-matching-lines", 1, 0, 'I'}, {"file-label", 1, 0, 'L'}, {"entire-new-files", 0, 0, 'N'}, {"new-files", 0, 0, 'N'}, {"starting-file", 1, 0, 'S'}, {"initial-tab", 0, 0, 'T'}, {"text", 0, 0, 'a'}, {"all-text", 0, 0, 'a'}, {"ascii", 0, 0, 'a'}, {"ignore-space-change", 0, 0, 'b'}, {"minimal", 0, 0, 'd'}, {"ed", 0, 0, 'e'}, {"reversed-ed", 0, 0, 'f'}, {"ignore-case", 0, 0, 'i'}, {"print", 0, 0, 'l'}, {"rcs", 0, 0, 'n'}, {"show-c-function", 0, 0, 'p'}, {"binary", 0, 0, 'q'}, {"brief", 0, 0, 'q'}, {"recursive", 0, 0, 'r'}, {"report-identical-files", 0, 0, 's'}, {"expand-tabs", 0, 0, 't'}, {"ignore-all-space", 0, 0, 'w'}, {"unified", 2, 0, 130}, {"version", 0, 0, 'v'}, {0, 0, 0, 0} }; main (argc, argv) int argc; char *argv[]; { int val; int c; int prev = -1; int longind; extern char *version_string; program = argv[0]; /* Do our initializations. */ output_style = OUTPUT_NORMAL; always_text_flag = FALSE; ignore_space_change_flag = FALSE; ignore_all_space_flag = FALSE; length_varies = FALSE; ignore_case_flag = FALSE; ignore_blank_lines_flag = FALSE; ignore_regexp = 0; function_regexp = 0; print_file_same_flag = FALSE; entire_new_file_flag = FALSE; no_details_flag = FALSE; context = -1; line_end_char = '\n'; tab_align_flag = FALSE; tab_expand_flag = FALSE; recursive = FALSE; paginate_flag = FALSE; ifdef_string = NULL; heuristic = FALSE; dir_start_file = NULL; msg_chain = NULL; msg_chain_end = NULL; no_discards = 0; /* Decode the options. */ while ((c = getopt_long (argc, argv, "0123456789abBcC:dD:efF:hHiI:lL:nNpqrsS:tTuvw", longopts, &longind)) != EOF) { if (c == 0) /* Long option. */ c = longopts[longind].val; switch (c) { /* All digits combine in decimal to specify the context-size. */ case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '0': if (context == -1) context = 0; /* If a context length has already been specified, more digits allowed only if they follow right after the others. Reject two separate runs of digits, or digits after -C. */ else if (prev < '0' || prev > '9') fatal ("context length specified twice"); context = context * 10 + c - '0'; break; case 'a': /* Treat all files as text files; never treat as binary. */ always_text_flag = 1; break; case 'b': /* Ignore changes in amount of whitespace. */ ignore_space_change_flag = 1; length_varies = 1; break; case 'B': /* Ignore changes affecting only blank lines. */ ignore_blank_lines_flag = 1; break; case 'C': case 129: /* +context[=lines] */ case 130: /* +unified[=lines] */ if (optarg) { if (context >= 0) fatal ("context length specified twice"); { char *p; for (p = optarg; *p; p++) if (*p < '0' || *p > '9') fatal ("invalid context length argument"); } context = atoi (optarg); } /* Falls through. */ case 'c': /* Make context-style output. */ specify_style (c == 130 ? OUTPUT_UNIFIED : OUTPUT_CONTEXT); break; case 'd': /* Don't discard lines. This makes things slower (sometimes much slower) but will find a guaranteed minimal set of changes. */ no_discards = 1; break; case 'D': /* Make merged #ifdef output. */ specify_style (OUTPUT_IFDEF); ifdef_string = optarg; break; case 'e': /* Make output that is a valid `ed' script. */ specify_style (OUTPUT_ED); break; case 'f': /* Make output that looks vaguely like an `ed' script but has changes in the order they appear in the file. */ specify_style (OUTPUT_FORWARD_ED); break; case 'F': /* Show, for each set of changes, the previous line that matches the specified regexp. Currently affects only context-style output. */ function_regexp = optarg; break; case 'h': /* Split the files into chunks of around 1500 lines for faster processing. Usually does not change the result. This currently has no effect. */ break; case 'H': /* Turn on heuristics that speed processing of large files with a small density of changes. */ heuristic = 1; break; case 'i': /* Ignore changes in case. */ ignore_case_flag = 1; break; case 'I': /* Ignore changes affecting only lines that match the specified regexp. */ ignore_regexp = optarg; break; case 'l': /* Pass the output through `pr' to paginate it. */ paginate_flag = 1; break; case 'L': /* Specify file labels for `-c' output headers. */ if (!file_label[0]) file_label[0] = optarg; else if (!file_label[1]) file_label[1] = optarg; else fatal ("too many file label options"); break; case 'n': /* Output RCS-style diffs, like `-f' except that each command specifies the number of lines affected. */ specify_style (OUTPUT_RCS); break; case 'N': /* When comparing directories, if a file appears only in one directory, treat it as present but empty in the other. */ entire_new_file_flag = 1; break; case 'p': /* Make context-style output and show name of last C function. */ specify_style (OUTPUT_CONTEXT); function_regexp = "^[_a-zA-Z]"; break; case 'q': no_details_flag = 1; break; case 'r': /* When comparing directories, recursively compare any subdirectories found. */ recursive = 1; break; case 's': /* Print a message if the files are the same. */ print_file_same_flag = 1; break; case 'S': /* When comparing directories, start with the specified file name. This is used for resuming an aborted comparison. */ dir_start_file = optarg; break; case 't': /* Expand tabs to spaces in the output so that it preserves the alignment of the input files. */ tab_expand_flag = 1; break; case 'T': /* Use a tab in the output, rather than a space, before the text of an input line, so as to keep the proper alignment in the input line without changing the characters in it. */ tab_align_flag = 1; break; case 'v': printf ("GNU diff version %s\n", version_string); break; case 'u': /* Output the context diff in unidiff format. */ specify_style (OUTPUT_UNIFIED); break; case 'w': /* Ignore horizontal whitespace when comparing lines. */ ignore_all_space_flag = 1; length_varies = 1; break; default: usage (); } prev = c; } if (optind != argc - 2) usage (); if (ignore_regexp) { char *val; bzero (&ignore_regexp_compiled, sizeof ignore_regexp_compiled); val = re_compile_pattern (ignore_regexp, strlen (ignore_regexp), &ignore_regexp_compiled); if (val != 0) error ("%s: %s", ignore_regexp, val); ignore_regexp_compiled.fastmap = (char *) xmalloc (256); } if (function_regexp) { char *val; bzero (&function_regexp_compiled, sizeof function_regexp_compiled); val = re_compile_pattern (function_regexp, strlen (function_regexp), &function_regexp_compiled); if (val != 0) error ("%s: %s", function_regexp, val); function_regexp_compiled.fastmap = (char *) xmalloc (256); } if (output_style != OUTPUT_CONTEXT && output_style != OUTPUT_UNIFIED) context = 0; else if (context == -1) /* Default amount of context for -c. */ context = 3; switch_string = option_list (argv + 1, optind - 1); val = compare_files (0, argv[optind], 0, argv[optind + 1], 0); /* Print any messages that were saved up for last. */ print_message_queue (); if (ferror (stdout) || fclose (stdout) != 0) fatal ("write error"); exit (val); } usage () { fprintf (stderr, "\ Usage: diff [-#] [-abBcdefhHilnNprstTuvw] [-C lines] [-F regexp] [-I regexp]\n\ [-L label [-L label]] [-S file] [-D symbol] [+ignore-blank-lines]\n\ [+context[=lines]] [+unified[=lines]] [+ifdef=symbol]\n\ [+show-function-line=regexp]\n"); fprintf (stderr, "\ [+speed-large-files] [+ignore-matching-lines=regexp] [+new-file]\n\ [+initial-tab] [+starting-file=file] [+text] [+all-text] [+ascii]\n\ [+minimal] [+ignore-space-change] [+ed] [+reversed-ed] [+ignore-case]\n"); fprintf (stderr, "\ [+print] [+rcs] [+show-c-function] [+binary] [+brief] [+recursive]\n\ [+report-identical-files] [+expand-tabs] [+ignore-all-space]\n\ [+file-label=label [+file-label=label]] [+version] path1 path2\n"); exit (2); } specify_style (style) enum output_style style; { if (output_style != OUTPUT_NORMAL && output_style != style) error ("conflicting specifications of output style"); output_style = style; } /* Compare two files (or dirs) with specified names DIR0/NAME0 and DIR1/NAME1, at level DEPTH in directory recursion. (if DIR0 is 0, then the name is just NAME0, etc.) This is self-contained; it opens the files and closes them. Value is 0 if files are identical, 1 if different, 2 if there is a problem opening them. */ int compare_files (dir0, name0, dir1, name1, depth) char *dir0, *dir1; char *name0, *name1; int depth; { static char Standard_Input[] = "Standard Input"; struct file_data inf[2]; register int i; int val; int errorcount = 0; int stat_result[2]; /* If this is directory comparison, perhaps we have a file that exists only in one of the directories. If so, just print a message to that effect. */ if (! entire_new_file_flag && (name0 == 0 || name1 == 0)) { char *name = name0 == 0 ? name1 : name0; char *dir = name0 == 0 ? dir1 : dir0; message ("Only in %s: %s\n", dir, name); /* Return 1 so that diff_dirs will return 1 ("some files differ"). */ return 1; } /* Mark any nonexistent file with -1 in the desc field. */ /* Mark unopened files (i.e. directories) with -2. */ inf[0].desc = name0 == 0 ? -1 : -2; inf[1].desc = name1 == 0 ? -1 : -2; /* Now record the full name of each file, including nonexistent ones. */ if (name0 == 0) name0 = name1; if (name1 == 0) name1 = name0; inf[0].name = dir0 == 0 ? name0 : concat (dir0, "/", name0); inf[1].name = dir1 == 0 ? name1 : concat (dir1, "/", name1); /* Stat the files. Record whether they are directories. Record in stat_result whether stat fails. */ for (i = 0; i <= 1; i++) { bzero (&inf[i].stat, sizeof(struct stat)); inf[i].dir_p = 0; stat_result[i] = 0; if (inf[i].desc != -1) { char *filename = inf[i].name; stat_result[i] = strcmp (filename, "-") ? stat (filename, &inf[i].stat) : fstat (0, &inf[i].stat); if (stat_result[i] < 0) { perror_with_name (filename); errorcount = 1; } else inf[i].dir_p = S_IFDIR == (inf[i].stat.st_mode & S_IFMT) && strcmp (filename, "-"); } } /* See if the two named files are actually the same physical file. If so, we know they are identical without actually reading them. */ if (output_style != OUTPUT_IFDEF && inf[0].stat.st_ino == inf[1].stat.st_ino && inf[0].stat.st_dev == inf[1].stat.st_dev && stat_result[0] == 0 && stat_result[1] == 0) { val = 0; goto done; } if (name0 == 0) inf[0].dir_p = inf[1].dir_p; if (name1 == 0) inf[1].dir_p = inf[0].dir_p; /* Open the files and record their descriptors. */ for (i = 0; i <= 1; i++) { if (inf[i].desc == -1) ; else if (!strcmp (inf[i].name, "-")) { inf[i].desc = 0; inf[i].name = Standard_Input; } /* Don't bother opening if stat already failed. */ else if (stat_result[i] == 0 && ! inf[i].dir_p) { char *filename = inf[i].name; inf[i].desc = open (filename, O_RDONLY, 0); if (0 > inf[i].desc) { perror_with_name (filename); errorcount = 1; } } } if (errorcount) { /* If either file should exist but fails to be opened, return 2. */ val = 2; } else if (inf[0].dir_p && inf[1].dir_p) { if (output_style == OUTPUT_IFDEF) fatal ("-D option not supported with directories"); /* If both are directories, compare the files in them. */ if (depth > 0 && !recursive) { /* But don't compare dir contents one level down unless -r was specified. */ message ("Common subdirectories: %s and %s\n", inf[0].name, inf[1].name); val = 0; } else { val = diff_dirs (inf[0].name, inf[1].name, compare_files, depth, 0, 0); } } else if (depth == 0 && (inf[0].dir_p || inf[1].dir_p)) { /* If only one is a directory, and it was specified in the command line, use the file in that dir whose basename matches the other file. */ int dir_arg = (inf[0].dir_p ? 0 : 1); int fnm_arg = (inf[0].dir_p ? 1 : 0); char *p = rindex (inf[fnm_arg].name, '/'); char *filename = concat (inf[dir_arg].name, "/", (p ? p+1 : inf[fnm_arg].name)); if (inf[fnm_arg].name == Standard_Input) fatal ("can't compare - to a directory"); inf[dir_arg].desc = open (filename, O_RDONLY, 0); if (0 > inf[dir_arg].desc) { perror_with_name (filename); val = 2; } else { /* JF: patch from the net to check and make sure we can really free this. If it's from argv[], freeing it is a *really* bad idea */ if (0 != (dir_arg ? dir1 : dir0)) free (inf[dir_arg].name); inf[dir_arg].name = filename; if (fstat (inf[dir_arg].desc, &inf[dir_arg].stat) < 0) pfatal_with_name (inf[dir_arg].name); inf[dir_arg].dir_p = (S_IFDIR == (inf[dir_arg].stat.st_mode & S_IFMT)); if (inf[dir_arg].dir_p) { error ("%s is a directory but %s is not", inf[dir_arg].name, inf[fnm_arg].name); val = 1; } else val = diff_2_files (inf, depth); } } else if (depth > 0 && (inf[0].dir_p || inf[1].dir_p)) { /* Perhaps we have a subdirectory that exists only in one directory. If so, just print a message to that effect. */ if (inf[0].desc == -1 || inf[1].desc == -1) { if (entire_new_file_flag && recursive) val = diff_dirs (inf[0].name, inf[1].name, compare_files, depth, inf[0].desc == -1, inf[1].desc == -1); else { char *dir = (inf[0].desc == -1) ? dir1 : dir0; message ("Only in %s: %s\n", dir, name0); val = 1; } } else { /* We have a subdirectory in one directory and a file in the other. */ if (inf[0].dir_p) message ("%s is a directory but %s is not\n", inf[0].name, inf[1].name); else message ("%s is a directory but %s is not\n", inf[1].name, inf[0].name); /* This is a difference. */ val = 1; } } else { /* Both exist and both are ordinary files. */ val = diff_2_files (inf, depth); } /* Now the comparison has been done, if no error prevented it, and VAL is the value this function will return. */ if (inf[0].desc >= 0) close (inf[0].desc); if (inf[1].desc >= 0) close (inf[1].desc); done: if (val == 0 && !inf[0].dir_p) { if (print_file_same_flag) message ("Files %s and %s are identical\n", inf[0].name, inf[1].name); } else fflush (stdout); if (dir0 != 0) free (inf[0].name); if (dir1 != 0) free (inf[1].name); return val; }