summaryrefslogtreecommitdiff
path: root/scripts/list-gitignored.c
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/list-gitignored.c')
-rw-r--r--scripts/list-gitignored.c1057
1 files changed, 1057 insertions, 0 deletions
diff --git a/scripts/list-gitignored.c b/scripts/list-gitignored.c
new file mode 100644
index 000000000000..f9941f8dcd2b
--- /dev/null
+++ b/scripts/list-gitignored.c
@@ -0,0 +1,1057 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Traverse the source tree, parsing all .gitignore files, and print file paths
+// that are ignored by git.
+// The output is suitable to the --exclude-from option of tar.
+// This is useful until the --exclude-vcs-ignores option gets working correctly.
+//
+// Copyright (C) 2023 Masahiro Yamada <masahiroy@kernel.org>
+// (a lot of code imported from GIT)
+
+#include <assert.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+// Imported from commit 23c56f7bd5f1667f8b793d796bf30e39545920f6 in GIT
+//
+//---------------------------(IMPORT FROM GIT BEGIN)---------------------------
+
+// Copied from environment.c
+
+static bool ignore_case;
+
+// Copied from git-compat-util.h
+
+/* Sane ctype - no locale, and works with signed chars */
+#undef isascii
+#undef isspace
+#undef isdigit
+#undef isalpha
+#undef isalnum
+#undef isprint
+#undef islower
+#undef isupper
+#undef tolower
+#undef toupper
+#undef iscntrl
+#undef ispunct
+#undef isxdigit
+
+static const unsigned char sane_ctype[256];
+#define GIT_SPACE 0x01
+#define GIT_DIGIT 0x02
+#define GIT_ALPHA 0x04
+#define GIT_GLOB_SPECIAL 0x08
+#define GIT_REGEX_SPECIAL 0x10
+#define GIT_PATHSPEC_MAGIC 0x20
+#define GIT_CNTRL 0x40
+#define GIT_PUNCT 0x80
+#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
+#define isascii(x) (((x) & ~0x7f) == 0)
+#define isspace(x) sane_istest(x,GIT_SPACE)
+#define isdigit(x) sane_istest(x,GIT_DIGIT)
+#define isalpha(x) sane_istest(x,GIT_ALPHA)
+#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
+#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)
+#define islower(x) sane_iscase(x, 1)
+#define isupper(x) sane_iscase(x, 0)
+#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
+#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
+#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
+ GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
+#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1)
+#define tolower(x) sane_case((unsigned char)(x), 0x20)
+#define toupper(x) sane_case((unsigned char)(x), 0)
+
+static inline int sane_case(int x, int high)
+{
+ if (sane_istest(x, GIT_ALPHA))
+ x = (x & ~0x20) | high;
+ return x;
+}
+
+static inline int sane_iscase(int x, int is_lower)
+{
+ if (!sane_istest(x, GIT_ALPHA))
+ return 0;
+
+ if (is_lower)
+ return (x & 0x20) != 0;
+ else
+ return (x & 0x20) == 0;
+}
+
+// Copied from ctype.c
+
+enum {
+ S = GIT_SPACE,
+ A = GIT_ALPHA,
+ D = GIT_DIGIT,
+ G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
+ R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */
+ P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
+ X = GIT_CNTRL,
+ U = GIT_PUNCT,
+ Z = GIT_CNTRL | GIT_SPACE
+};
+
+static const unsigned char sane_ctype[256] = {
+ X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */
+ X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */
+ S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */
+ D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */
+ P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */
+ A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */
+ P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */
+ A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */
+ /* Nothing in the 128.. range */
+};
+
+// Copied from hex.c
+
+static const signed char hexval_table[256] = {
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 18-1f */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 20-27 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 28-2f */
+ 0, 1, 2, 3, 4, 5, 6, 7, /* 30-37 */
+ 8, 9, -1, -1, -1, -1, -1, -1, /* 38-3f */
+ -1, 10, 11, 12, 13, 14, 15, -1, /* 40-47 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 48-4f */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 50-57 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 58-5f */
+ -1, 10, 11, 12, 13, 14, 15, -1, /* 60-67 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 68-67 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 70-77 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 78-7f */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 80-87 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 88-8f */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 90-97 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* 98-9f */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* a0-a7 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* a8-af */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* b0-b7 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* b8-bf */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* c0-c7 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* c8-cf */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* d0-d7 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* d8-df */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* e0-e7 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* e8-ef */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* f0-f7 */
+ -1, -1, -1, -1, -1, -1, -1, -1, /* f8-ff */
+};
+
+// Copied from wildmatch.h
+
+#define WM_CASEFOLD 1
+#define WM_PATHNAME 2
+
+#define WM_NOMATCH 1
+#define WM_MATCH 0
+#define WM_ABORT_ALL -1
+#define WM_ABORT_TO_STARSTAR -2
+
+// Copied from wildmatch.c
+
+typedef unsigned char uchar;
+
+// local modification: remove NEGATE_CLASS(2)
+
+#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
+ && *(class) == *(litmatch) \
+ && strncmp((char*)class, litmatch, len) == 0)
+
+// local modification: simpilify macros
+#define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#define ISGRAPH(c) (isprint(c) && !isspace(c))
+#define ISPRINT(c) isprint(c)
+#define ISDIGIT(c) isdigit(c)
+#define ISALNUM(c) isalnum(c)
+#define ISALPHA(c) isalpha(c)
+#define ISCNTRL(c) iscntrl(c)
+#define ISLOWER(c) islower(c)
+#define ISPUNCT(c) ispunct(c)
+#define ISSPACE(c) isspace(c)
+#define ISUPPER(c) isupper(c)
+#define ISXDIGIT(c) isxdigit(c)
+
+/* Match pattern "p" against "text" */
+static int dowild(const uchar *p, const uchar *text, unsigned int flags)
+{
+ uchar p_ch;
+ const uchar *pattern = p;
+
+ for ( ; (p_ch = *p) != '\0'; text++, p++) {
+ int matched, match_slash, negated;
+ uchar t_ch, prev_ch;
+ if ((t_ch = *text) == '\0' && p_ch != '*')
+ return WM_ABORT_ALL;
+ if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
+ t_ch = tolower(t_ch);
+ if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
+ p_ch = tolower(p_ch);
+ switch (p_ch) {
+ case '\\':
+ /* Literal match with following character. Note that the test
+ * in "default" handles the p[1] == '\0' failure case. */
+ p_ch = *++p;
+ /* FALLTHROUGH */
+ default:
+ if (t_ch != p_ch)
+ return WM_NOMATCH;
+ continue;
+ case '?':
+ /* Match anything but '/'. */
+ if ((flags & WM_PATHNAME) && t_ch == '/')
+ return WM_NOMATCH;
+ continue;
+ case '*':
+ if (*++p == '*') {
+ const uchar *prev_p = p - 2;
+ while (*++p == '*') {}
+ if (!(flags & WM_PATHNAME))
+ /* without WM_PATHNAME, '*' == '**' */
+ match_slash = 1;
+ else if ((prev_p < pattern || *prev_p == '/') &&
+ (*p == '\0' || *p == '/' ||
+ (p[0] == '\\' && p[1] == '/'))) {
+ /*
+ * Assuming we already match 'foo/' and are at
+ * <star star slash>, just assume it matches
+ * nothing and go ahead match the rest of the
+ * pattern with the remaining string. This
+ * helps make foo/<*><*>/bar (<> because
+ * otherwise it breaks C comment syntax) match
+ * both foo/bar and foo/a/bar.
+ */
+ if (p[0] == '/' &&
+ dowild(p + 1, text, flags) == WM_MATCH)
+ return WM_MATCH;
+ match_slash = 1;
+ } else /* WM_PATHNAME is set */
+ match_slash = 0;
+ } else
+ /* without WM_PATHNAME, '*' == '**' */
+ match_slash = flags & WM_PATHNAME ? 0 : 1;
+ if (*p == '\0') {
+ /* Trailing "**" matches everything. Trailing "*" matches
+ * only if there are no more slash characters. */
+ if (!match_slash) {
+ if (strchr((char *)text, '/'))
+ return WM_NOMATCH;
+ }
+ return WM_MATCH;
+ } else if (!match_slash && *p == '/') {
+ /*
+ * _one_ asterisk followed by a slash
+ * with WM_PATHNAME matches the next
+ * directory
+ */
+ const char *slash = strchr((char*)text, '/');
+ if (!slash)
+ return WM_NOMATCH;
+ text = (const uchar*)slash;
+ /* the slash is consumed by the top-level for loop */
+ break;
+ }
+ while (1) {
+ if (t_ch == '\0')
+ break;
+ /*
+ * Try to advance faster when an asterisk is
+ * followed by a literal. We know in this case
+ * that the string before the literal
+ * must belong to "*".
+ * If match_slash is false, do not look past
+ * the first slash as it cannot belong to '*'.
+ */
+ if (!is_glob_special(*p)) {
+ p_ch = *p;
+ if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
+ p_ch = tolower(p_ch);
+ while ((t_ch = *text) != '\0' &&
+ (match_slash || t_ch != '/')) {
+ if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
+ t_ch = tolower(t_ch);
+ if (t_ch == p_ch)
+ break;
+ text++;
+ }
+ if (t_ch != p_ch)
+ return WM_NOMATCH;
+ }
+ if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
+ if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
+ return matched;
+ } else if (!match_slash && t_ch == '/')
+ return WM_ABORT_TO_STARSTAR;
+ t_ch = *++text;
+ }
+ return WM_ABORT_ALL;
+ case '[':
+ p_ch = *++p;
+ if (p_ch == '^')
+ p_ch = '!';
+ /* Assign literal 1/0 because of "matched" comparison. */
+ negated = p_ch == '!' ? 1 : 0;
+ if (negated) {
+ /* Inverted character class. */
+ p_ch = *++p;
+ }
+ prev_ch = 0;
+ matched = 0;
+ do {
+ if (!p_ch)
+ return WM_ABORT_ALL;
+ if (p_ch == '\\') {
+ p_ch = *++p;
+ if (!p_ch)
+ return WM_ABORT_ALL;
+ if (t_ch == p_ch)
+ matched = 1;
+ } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
+ p_ch = *++p;
+ if (p_ch == '\\') {
+ p_ch = *++p;
+ if (!p_ch)
+ return WM_ABORT_ALL;
+ }
+ if (t_ch <= p_ch && t_ch >= prev_ch)
+ matched = 1;
+ else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
+ uchar t_ch_upper = toupper(t_ch);
+ if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
+ matched = 1;
+ }
+ p_ch = 0; /* This makes "prev_ch" get set to 0. */
+ } else if (p_ch == '[' && p[1] == ':') {
+ const uchar *s;
+ int i;
+ for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
+ if (!p_ch)
+ return WM_ABORT_ALL;
+ i = p - s - 1;
+ if (i < 0 || p[-1] != ':') {
+ /* Didn't find ":]", so treat like a normal set. */
+ p = s - 2;
+ p_ch = '[';
+ if (t_ch == p_ch)
+ matched = 1;
+ continue;
+ }
+ if (CC_EQ(s,i, "alnum")) {
+ if (ISALNUM(t_ch))
+ matched = 1;
+ } else if (CC_EQ(s,i, "alpha")) {
+ if (ISALPHA(t_ch))
+ matched = 1;
+ } else if (CC_EQ(s,i, "blank")) {
+ if (ISBLANK(t_ch))
+ matched = 1;
+ } else if (CC_EQ(s,i, "cntrl")) {
+ if (ISCNTRL(t_ch))
+ matched = 1;
+ } else if (CC_EQ(s,i, "digit")) {
+ if (ISDIGIT(t_ch))
+ matched = 1;
+ } else if (CC_EQ(s,i, "graph")) {
+ if (ISGRAPH(t_ch))
+ matched = 1;
+ } else if (CC_EQ(s,i, "lower")) {
+ if (ISLOWER(t_ch))
+ matched = 1;
+ } else if (CC_EQ(s,i, "print")) {
+ if (ISPRINT(t_ch))
+ matched = 1;
+ } else if (CC_EQ(s,i, "punct")) {
+ if (ISPUNCT(t_ch))
+ matched = 1;
+ } else if (CC_EQ(s,i, "space")) {
+ if (ISSPACE(t_ch))
+ matched = 1;
+ } else if (CC_EQ(s,i, "upper")) {
+ if (ISUPPER(t_ch))
+ matched = 1;
+ else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
+ matched = 1;
+ } else if (CC_EQ(s,i, "xdigit")) {
+ if (ISXDIGIT(t_ch))
+ matched = 1;
+ } else /* malformed [:class:] string */
+ return WM_ABORT_ALL;
+ p_ch = 0; /* This makes "prev_ch" get set to 0. */
+ } else if (t_ch == p_ch)
+ matched = 1;
+ } while (prev_ch = p_ch, (p_ch = *++p) != ']');
+ if (matched == negated ||
+ ((flags & WM_PATHNAME) && t_ch == '/'))
+ return WM_NOMATCH;
+ continue;
+ }
+ }
+
+ return *text ? WM_NOMATCH : WM_MATCH;
+}
+
+/* Match the "pattern" against the "text" string. */
+static int wildmatch(const char *pattern, const char *text, unsigned int flags)
+{
+ // local modification: move WM_CASEFOLD here
+ if (ignore_case)
+ flags |= WM_CASEFOLD;
+
+ return dowild((const uchar*)pattern, (const uchar*)text, flags);
+}
+
+// Copied from dir.h
+
+#define PATTERN_FLAG_NODIR 1
+#define PATTERN_FLAG_ENDSWITH 4
+#define PATTERN_FLAG_MUSTBEDIR 8
+#define PATTERN_FLAG_NEGATIVE 16
+
+// Copied from dir.c
+
+static int fspathncmp(const char *a, const char *b, size_t count)
+{
+ return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count);
+}
+
+static int simple_length(const char *match)
+{
+ int len = -1;
+
+ for (;;) {
+ unsigned char c = *match++;
+ len++;
+ if (c == '\0' || is_glob_special(c))
+ return len;
+ }
+}
+
+static int no_wildcard(const char *string)
+{
+ return string[simple_length(string)] == '\0';
+}
+
+static void parse_path_pattern(const char **pattern,
+ int *patternlen,
+ unsigned *flags,
+ int *nowildcardlen)
+{
+ const char *p = *pattern;
+ size_t i, len;
+
+ *flags = 0;
+ if (*p == '!') {
+ *flags |= PATTERN_FLAG_NEGATIVE;
+ p++;
+ }
+ len = strlen(p);
+ if (len && p[len - 1] == '/') {
+ len--;
+ *flags |= PATTERN_FLAG_MUSTBEDIR;
+ }
+ for (i = 0; i < len; i++) {
+ if (p[i] == '/')
+ break;
+ }
+ if (i == len)
+ *flags |= PATTERN_FLAG_NODIR;
+ *nowildcardlen = simple_length(p);
+ /*
+ * we should have excluded the trailing slash from 'p' too,
+ * but that's one more allocation. Instead just make sure
+ * nowildcardlen does not exceed real patternlen
+ */
+ if (*nowildcardlen > len)
+ *nowildcardlen = len;
+ if (*p == '*' && no_wildcard(p + 1))
+ *flags |= PATTERN_FLAG_ENDSWITH;
+ *pattern = p;
+ *patternlen = len;
+}
+
+static void trim_trailing_spaces(char *buf)
+{
+ char *p, *last_space = NULL;
+
+ for (p = buf; *p; p++)
+ switch (*p) {
+ case ' ':
+ if (!last_space)
+ last_space = p;
+ break;
+ case '\\':
+ p++;
+ if (!*p)
+ return;
+ /* fallthrough */
+ default:
+ last_space = NULL;
+ }
+
+ if (last_space)
+ *last_space = '\0';
+}
+
+static int match_basename(const char *basename, int basenamelen,
+ const char *pattern, int prefix, int patternlen,
+ unsigned flags)
+{
+ if (prefix == patternlen) {
+ if (patternlen == basenamelen &&
+ !fspathncmp(pattern, basename, basenamelen))
+ return 1;
+ } else if (flags & PATTERN_FLAG_ENDSWITH) {
+ /* "*literal" matching against "fooliteral" */
+ if (patternlen - 1 <= basenamelen &&
+ !fspathncmp(pattern + 1,
+ basename + basenamelen - (patternlen - 1),
+ patternlen - 1))
+ return 1;
+ } else {
+ // local modification: call wildmatch() directly
+ if (!wildmatch(pattern, basename, flags))
+ return 1;
+ }
+ return 0;
+}
+
+static int match_pathname(const char *pathname, int pathlen,
+ const char *base, int baselen,
+ const char *pattern, int prefix, int patternlen)
+{
+ // local modification: remove local variables
+
+ /*
+ * match with FNM_PATHNAME; the pattern has base implicitly
+ * in front of it.
+ */
+ if (*pattern == '/') {
+ pattern++;
+ patternlen--;
+ prefix--;
+ }
+
+ /*
+ * baselen does not count the trailing slash. base[] may or
+ * may not end with a trailing slash though.
+ */
+ if (pathlen < baselen + 1 ||
+ (baselen && pathname[baselen] != '/') ||
+ fspathncmp(pathname, base, baselen))
+ return 0;
+
+ // local modification: simplified because always baselen > 0
+ pathname += baselen + 1;
+ pathlen -= baselen + 1;
+
+ if (prefix) {
+ /*
+ * if the non-wildcard part is longer than the
+ * remaining pathname, surely it cannot match.
+ */
+ if (prefix > pathlen)
+ return 0;
+
+ if (fspathncmp(pattern, pathname, prefix))
+ return 0;
+ pattern += prefix;
+ patternlen -= prefix;
+ pathname += prefix;
+ pathlen -= prefix;
+
+ /*
+ * If the whole pattern did not have a wildcard,
+ * then our prefix match is all we need; we
+ * do not need to call fnmatch at all.
+ */
+ if (!patternlen && !pathlen)
+ return 1;
+ }
+
+ // local modification: call wildmatch() directly
+ return !wildmatch(pattern, pathname, WM_PATHNAME);
+}
+
+// Copied from git/utf8.c
+
+static const char utf8_bom[] = "\357\273\277";
+
+//----------------------------(IMPORT FROM GIT END)----------------------------
+
+struct pattern {
+ unsigned int flags;
+ int nowildcardlen;
+ int patternlen;
+ int dirlen;
+ char pattern[];
+};
+
+static struct pattern **pattern_list;
+static int nr_patterns, alloced_patterns;
+
+// Remember the number of patterns at each directory level
+static int *nr_patterns_at;
+// Track the current/max directory level;
+static int depth, max_depth;
+static bool debug_on;
+static FILE *out_fp, *stat_fp;
+static char *prefix = "";
+static char *progname;
+
+static void __attribute__((noreturn)) perror_exit(const char *s)
+{
+ perror(s);
+
+ exit(EXIT_FAILURE);
+}
+
+static void __attribute__((noreturn)) error_exit(const char *fmt, ...)
+{
+ va_list args;
+
+ fprintf(stderr, "%s: error: ", progname);
+
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+
+ exit(EXIT_FAILURE);
+}
+
+static void debug(const char *fmt, ...)
+{
+ va_list args;
+ int i;
+
+ if (!debug_on)
+ return;
+
+ fprintf(stderr, "[DEBUG] ");
+
+ for (i = 0; i < depth * 2; i++)
+ fputc(' ', stderr);
+
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+}
+
+static void *xrealloc(void *ptr, size_t size)
+{
+ ptr = realloc(ptr, size);
+ if (!ptr)
+ perror_exit(progname);
+
+ return ptr;
+}
+
+static void *xmalloc(size_t size)
+{
+ return xrealloc(NULL, size);
+}
+
+// similar to last_matching_pattern_from_list() in GIT
+static bool is_ignored(const char *path, int pathlen, int dirlen, bool is_dir)
+{
+ int i;
+
+ // Search in the reverse order because the last matching pattern wins.
+ for (i = nr_patterns - 1; i >= 0; i--) {
+ struct pattern *p = pattern_list[i];
+ unsigned int flags = p->flags;
+ const char *gitignore_dir = p->pattern + p->patternlen + 1;
+ bool ignored;
+
+ if ((flags & PATTERN_FLAG_MUSTBEDIR) && !is_dir)
+ continue;
+
+ if (flags & PATTERN_FLAG_NODIR) {
+ if (!match_basename(path + dirlen + 1,
+ pathlen - dirlen - 1,
+ p->pattern,
+ p->nowildcardlen,
+ p->patternlen,
+ p->flags))
+ continue;
+ } else {
+ if (!match_pathname(path, pathlen,
+ gitignore_dir, p->dirlen,
+ p->pattern,
+ p->nowildcardlen,
+ p->patternlen))
+ continue;
+ }
+
+ debug("%s: matches %s%s%s (%s/.gitignore)\n", path,
+ flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
+ flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "",
+ gitignore_dir);
+
+ ignored = (flags & PATTERN_FLAG_NEGATIVE) == 0;
+ if (ignored)
+ debug("Ignore: %s\n", path);
+
+ return ignored;
+ }
+
+ debug("%s: no match\n", path);
+
+ return false;
+}
+
+static void add_pattern(const char *string, const char *dir, int dirlen)
+{
+ struct pattern *p;
+ int patternlen, nowildcardlen;
+ unsigned int flags;
+
+ parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen);
+
+ if (patternlen == 0)
+ return;
+
+ p = xmalloc(sizeof(*p) + patternlen + dirlen + 2);
+
+ memcpy(p->pattern, string, patternlen);
+ p->pattern[patternlen] = 0;
+ memcpy(p->pattern + patternlen + 1, dir, dirlen);
+ p->pattern[patternlen + 1 + dirlen] = 0;
+
+ p->patternlen = patternlen;
+ p->nowildcardlen = nowildcardlen;
+ p->dirlen = dirlen;
+ p->flags = flags;
+
+ debug("Add pattern: %s%s%s\n",
+ flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern,
+ flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "");
+
+ if (nr_patterns >= alloced_patterns) {
+ alloced_patterns += 128;
+ pattern_list = xrealloc(pattern_list,
+ sizeof(*pattern_list) * alloced_patterns);
+ }
+
+ pattern_list[nr_patterns++] = p;
+}
+
+// similar to add_patterns_from_buffer() in GIT
+static void add_patterns_from_gitignore(const char *dir, int dirlen)
+{
+ struct stat st;
+ char path[PATH_MAX], *buf, *entry;
+ size_t size;
+ int fd, pathlen, i;
+
+ pathlen = snprintf(path, sizeof(path), "%s/.gitignore", dir);
+ if (pathlen >= sizeof(path))
+ error_exit("%s: too long path was truncated\n", path);
+
+ fd = open(path, O_RDONLY | O_NOFOLLOW);
+ if (fd < 0) {
+ if (errno != ENOENT)
+ return perror_exit(path);
+ return;
+ }
+
+ if (fstat(fd, &st) < 0)
+ perror_exit(path);
+
+ size = st.st_size;
+
+ buf = xmalloc(size + 1);
+ if (read(fd, buf, st.st_size) != st.st_size)
+ perror_exit(path);
+
+ buf[st.st_size] = '\n';
+ if (close(fd))
+ perror_exit(path);
+
+ debug("Parse %s\n", path);
+
+ entry = buf;
+
+ // skip utf8 bom
+ if (!strncmp(entry, utf8_bom, strlen(utf8_bom)))
+ entry += strlen(utf8_bom);
+
+ for (i = entry - buf; i < size; i++) {
+ if (buf[i] == '\n') {
+ if (entry != buf + i && entry[0] != '#') {
+ buf[i - (i && buf[i-1] == '\r')] = 0;
+ trim_trailing_spaces(entry);
+ add_pattern(entry, dir, dirlen);
+ }
+ entry = buf + i + 1;
+ }
+ }
+
+ free(buf);
+}
+
+// Save the current number of patterns and increment the depth
+static void increment_depth(void)
+{
+ if (depth >= max_depth) {
+ max_depth += 1;
+ nr_patterns_at = xrealloc(nr_patterns_at,
+ sizeof(*nr_patterns_at) * max_depth);
+ }
+
+ nr_patterns_at[depth] = nr_patterns;
+ depth++;
+}
+
+// Decrement the depth, and free up the patterns of this directory level.
+static void decrement_depth(void)
+{
+ depth--;
+ assert(depth >= 0);
+
+ while (nr_patterns > nr_patterns_at[depth])
+ free(pattern_list[--nr_patterns]);
+}
+
+static void print_path(const char *path)
+{
+ // The path always starts with "./"
+ assert(strlen(path) >= 2);
+
+ // Replace the root directory with a preferred prefix.
+ // This is useful for the tar command.
+ fprintf(out_fp, "%s%s\n", prefix, path + 2);
+}
+
+static void print_stat(const char *path, struct stat *st)
+{
+ if (!stat_fp)
+ return;
+
+ if (!S_ISREG(st->st_mode) && !S_ISLNK(st->st_mode))
+ return;
+
+ assert(strlen(path) >= 2);
+
+ fprintf(stat_fp, "%c %9ld %10ld %s\n",
+ S_ISLNK(st->st_mode) ? 'l' : '-',
+ st->st_size, st->st_mtim.tv_sec, path + 2);
+}
+
+// Traverse the entire directory tree, parsing .gitignore files.
+// Print file paths that are not tracked by git.
+//
+// Return true if all files under the directory are ignored, false otherwise.
+static bool traverse_directory(const char *dir, int dirlen)
+{
+ bool all_ignored = true;
+ DIR *dirp;
+
+ debug("Enter[%d]: %s\n", depth, dir);
+ increment_depth();
+
+ add_patterns_from_gitignore(dir, dirlen);
+
+ dirp = opendir(dir);
+ if (!dirp)
+ perror_exit(dir);
+
+ while (1) {
+ struct dirent *d;
+ struct stat st;
+ char path[PATH_MAX];
+ int pathlen;
+ bool ignored;
+
+ errno = 0;
+ d = readdir(dirp);
+ if (!d) {
+ if (errno)
+ perror_exit(dir);
+ break;
+ }
+
+ if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, "."))
+ continue;
+
+ pathlen = snprintf(path, sizeof(path), "%s/%s", dir, d->d_name);
+ if (pathlen >= sizeof(path))
+ error_exit("%s: too long path was truncated\n", path);
+
+ if (lstat(path, &st) < 0)
+ perror_exit(path);
+
+ if ((!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) ||
+ is_ignored(path, pathlen, dirlen, S_ISDIR(st.st_mode))) {
+ ignored = true;
+ } else {
+ if (S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode))
+ // If all the files in a directory are ignored,
+ // let's ignore that directory as well. This
+ // will avoid empty directories in the tarball.
+ ignored = traverse_directory(path, pathlen);
+ else
+ ignored = false;
+ }
+
+ if (ignored) {
+ print_path(path);
+ } else {
+ print_stat(path, &st);
+ all_ignored = false;
+ }
+ }
+
+ if (closedir(dirp))
+ perror_exit(dir);
+
+ decrement_depth();
+ debug("Leave[%d]: %s\n", depth, dir);
+
+ return all_ignored;
+}
+
+static void usage(void)
+{
+ fprintf(stderr,
+ "usage: %s [options]\n"
+ "\n"
+ "Show files that are ignored by git\n"
+ "\n"
+ "options:\n"
+ " -d, --debug print debug messages to stderr\n"
+ " -e, --exclude PATTERN add the given exclude pattern\n"
+ " -h, --help show this help message and exit\n"
+ " -i, --ignore-case Ignore case differences between the patterns and the files\n"
+ " -o, --output FILE output the ignored files to a file (default: '-', i.e. stdout)\n"
+ " -p, --prefix PREFIX prefix added to each path (default: empty string)\n"
+ " -r, --rootdir DIR root of the source tree (default: current working directory)\n"
+ " -s, --stat FILE output the file stat of non-ignored files to a file\n",
+ progname);
+}
+
+static void open_output(const char *pathname, FILE **fp)
+{
+ if (strcmp(pathname, "-")) {
+ *fp = fopen(pathname, "w");
+ if (!*fp)
+ perror_exit(pathname);
+ } else {
+ *fp = stdout;
+ }
+}
+
+static void close_output(const char *pathname, FILE *fp)
+{
+ fflush(fp);
+
+ if (ferror(fp))
+ error_exit("not all data was written to the output\n");
+
+ if (fclose(fp))
+ perror_exit(pathname);
+}
+
+int main(int argc, char *argv[])
+{
+ const char *output = "-";
+ const char *rootdir = ".";
+ const char *stat = NULL;
+
+ progname = strrchr(argv[0], '/');
+ if (progname)
+ progname++;
+ else
+ progname = argv[0];
+
+ while (1) {
+ static struct option long_options[] = {
+ {"debug", no_argument, NULL, 'd'},
+ {"help", no_argument, NULL, 'h'},
+ {"ignore-case", no_argument, NULL, 'i'},
+ {"output", required_argument, NULL, 'o'},
+ {"prefix", required_argument, NULL, 'p'},
+ {"rootdir", required_argument, NULL, 'r'},
+ {"stat", required_argument, NULL, 's'},
+ {"exclude", required_argument, NULL, 'x'},
+ {},
+ };
+
+ int c = getopt_long(argc, argv, "dhino:p:r:s:x:", long_options, NULL);
+
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'd':
+ debug_on = true;
+ break;
+ case 'h':
+ usage();
+ exit(0);
+ case 'i':
+ ignore_case = true;
+ break;
+ case 'o':
+ output = optarg;
+ break;
+ case 'p':
+ prefix = optarg;
+ break;
+ case 'r':
+ rootdir = optarg;
+ break;
+ case 's':
+ stat = optarg;
+ break;
+ case 'x':
+ add_pattern(optarg, ".", strlen("."));
+ break;
+ case '?':
+ usage();
+ /* fallthrough */
+ default:
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ open_output(output, &out_fp);
+ if (stat && stat[0])
+ open_output(stat, &stat_fp);
+
+ if (chdir(rootdir))
+ perror_exit(rootdir);
+
+ add_pattern(".git/", ".", strlen("."));
+
+ if (traverse_directory(".", strlen(".")))
+ print_path("./");
+
+ assert(depth == 0);
+
+ while (nr_patterns > 0)
+ free(pattern_list[--nr_patterns]);
+ free(pattern_list);
+ free(nr_patterns_at);
+
+ close_output(output, out_fp);
+ if (stat_fp)
+ close_output(stat, stat_fp);
+
+ return 0;
+}