diff options
Diffstat (limited to 'coreutils/sort.c')
| -rw-r--r-- | coreutils/sort.c | 357 |
1 files changed, 357 insertions, 0 deletions
diff --git a/coreutils/sort.c b/coreutils/sort.c new file mode 100644 index 000000000..c23bf9c60 --- /dev/null +++ b/coreutils/sort.c @@ -0,0 +1,357 @@ +/* vi: set sw=4 ts=4: */ +/* + * SuS3 compliant sort implementation for busybox + * + * Copyright (C) 2004 by Rob Landley <rob@landley.net> + * + * MAINTAINER: Rob Landley <rob@landley.net> + * + * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. + * + * See SuS3 sort standard at: + * http://www.opengroup.org/onlinepubs/007904975/utilities/sort.html + */ + +#include "busybox.h" + +static int global_flags; + +/* + sort [-m][-o output][-bdfinru][-t char][-k keydef]... [file...] + sort -c [-bdfinru][-t char][-k keydef][file] +*/ + +/* These are sort types */ +#define FLAG_n 1 /* Numeric sort */ +#define FLAG_g 2 /* Sort using strtod() */ +#define FLAG_M 4 /* Sort date */ +/* ucsz apply to root level only, not keys. b at root level implies bb */ +#define FLAG_u 8 /* Unique */ +#define FLAG_c 16 /* Check: no output, exit(!ordered) */ +#define FLAG_s 32 /* Stable sort, no ascii fallback at end */ +#define FLAG_z 64 /* Input is null terminated, not \n */ +/* These can be applied to search keys, the previous four can't */ +#define FLAG_b 128 /* Ignore leading blanks */ +#define FLAG_r 256 /* Reverse */ +#define FLAG_d 512 /* Ignore !(isalnum()|isspace()) */ +#define FLAG_f 1024 /* Force uppercase */ +#define FLAG_i 2048 /* Ignore !isprint() */ +#define FLAG_bb 32768 /* Ignore trailing blanks */ + + +#if ENABLE_FEATURE_SORT_BIG +static char key_separator; + +static struct sort_key { + struct sort_key *next_key; /* linked list */ + unsigned short range[4]; /* start word, start char, end word, end char */ + int flags; +} *key_list; + +static char *get_key(char *str, struct sort_key *key, int flags) +{ + int start = 0, end = 0, len, i, j; + + /* Special case whole string, so we don't have to make a copy */ + if (key->range[0] == 1 && !key->range[1] && !key->range[2] && !key->range[3] + && !(flags & (FLAG_b | FLAG_d | FLAG_f | FLAG_i | FLAG_bb)) + ) { + return str; + } + + /* Find start of key on first pass, end on second pass*/ + len = strlen(str); + for (j = 0; j < 2; j++) { + if (!key->range[2*j]) + end = len; + /* Loop through fields */ + else { + end = 0; + for (i = 1; i < key->range[2*j] + j; i++) { + /* Skip leading blanks or first separator */ + if (str[end]) { + if (!key_separator) + while (isspace(str[end])) end++; + } + /* Skip body of key */ + for (; str[end]; end++) { + if (key_separator) { + if (str[end] == key_separator) + break; + } else { + if (isspace(str[end])) + break; + } + } + } + } + if (!j) start = end; + } + /* Key with explicit separator starts after separator */ + if (key_separator && str[start] == key_separator) + start++; + /* Strip leading whitespace if necessary */ +//XXX: skip_whitespace() + if (flags & FLAG_b) + while (isspace(str[start])) start++; + /* Strip trailing whitespace if necessary */ + if (flags & FLAG_bb) + while (end > start && isspace(str[end-1])) end--; + /* Handle offsets on start and end */ + if (key->range[3]) { + end += key->range[3] - 1; + if (end > len) end = len; + } + if (key->range[1]) { + start += key->range[1] - 1; + if (start > len) start = len; + } + /* Make the copy */ + if (end < start) end = start; + str = xstrndup(str+start, end-start); + /* Handle -d */ + if (flags & FLAG_d) { + for (start = end = 0; str[end]; end++) + if (isspace(str[end]) || isalnum(str[end])) + str[start++] = str[end]; + str[start] = '\0'; + } + /* Handle -i */ + if (flags & FLAG_i) { + for (start = end = 0; str[end]; end++) + if (isprint(str[end])) + str[start++] = str[end]; + str[start] = '\0'; + } + /* Handle -f */ + if (flags & FLAG_f) + for (i = 0; str[i]; i++) + str[i] = toupper(str[i]); + + return str; +} + +static struct sort_key *add_key(void) +{ + struct sort_key **pkey = &key_list; + while (*pkey) + pkey = &((*pkey)->next_key); + return *pkey = xzalloc(sizeof(struct sort_key)); +} + +#define GET_LINE(fp) \ + ((global_flags & FLAG_z) \ + ? bb_get_chunk_from_file(fp, NULL) \ + : xmalloc_getline(fp)) +#else +#define GET_LINE(fp) xmalloc_getline(fp) +#endif + +/* Iterate through keys list and perform comparisons */ +static int compare_keys(const void *xarg, const void *yarg) +{ + int flags = global_flags, retval = 0; + char *x, *y; + +#if ENABLE_FEATURE_SORT_BIG + struct sort_key *key; + + for (key = key_list; !retval && key; key = key->next_key) { + flags = key->flags ? key->flags : global_flags; + /* Chop out and modify key chunks, handling -dfib */ + x = get_key(*(char **)xarg, key, flags); + y = get_key(*(char **)yarg, key, flags); +#else + /* This curly bracket serves no purpose but to match the nesting + level of the for() loop we're not using */ + { + x = *(char **)xarg; + y = *(char **)yarg; +#endif + /* Perform actual comparison */ + switch (flags & 7) { + default: + bb_error_msg_and_die("unknown sort type"); + break; + /* Ascii sort */ + case 0: + retval = strcmp(x, y); + break; +#if ENABLE_FEATURE_SORT_BIG + case FLAG_g: { + char *xx, *yy; + double dx = strtod(x, &xx); + double dy = strtod(y, &yy); + /* not numbers < NaN < -infinity < numbers < +infinity) */ + if (x == xx) + retval = (y == yy ? 0 : -1); + else if (y == yy) + retval = 1; + /* Check for isnan */ + else if (dx != dx) + retval = (dy != dy) ? 0 : -1; + else if (dy != dy) + retval = 1; + /* Check for infinity. Could underflow, but it avoids libm. */ + else if (1.0 / dx == 0.0) { + if (dx < 0) + retval = (1.0 / dy == 0.0 && dy < 0) ? 0 : -1; + else + retval = (1.0 / dy == 0.0 && dy > 0) ? 0 : 1; + } else if (1.0 / dy == 0.0) + retval = (dy < 0) ? 1 : -1; + else + retval = (dx > dy) ? 1 : ((dx < dy) ? -1 : 0); + break; + } + case FLAG_M: { + struct tm thyme; + int dx; + char *xx, *yy; + + xx = strptime(x, "%b", &thyme); + dx = thyme.tm_mon; + yy = strptime(y, "%b", &thyme); + if (!xx) + retval = (!yy) ? 0 : -1; + else if (!yy) + retval = 1; + else + retval = (dx == thyme.tm_mon) ? 0 : dx - thyme.tm_mon; + break; + } + /* Full floating point version of -n */ + case FLAG_n: { + double dx = atof(x); + double dy = atof(y); + retval = (dx > dy) ? 1 : ((dx < dy) ? -1 : 0); + break; + } + } /* switch */ + /* Free key copies. */ + if (x != *(char **)xarg) free(x); + if (y != *(char **)yarg) free(y); + /* if (retval) break; - done by for() anyway */ +#else + /* Integer version of -n for tiny systems */ + case FLAG_n: + retval = atoi(x) - atoi(y); + break; + } /* switch */ +#endif + } + /* Perform fallback sort if necessary */ + if (!retval && !(global_flags & FLAG_s)) + retval = strcmp(*(char **)xarg, *(char **)yarg); + + if (flags & FLAG_r) return -retval; + return retval; +} + +int sort_main(int argc, char **argv) +{ + FILE *fp, *outfile = NULL; + int linecount = 0, i, flag; + char *line, **lines = NULL, *optlist = "ngMucszbrdfimS:T:o:k:t:"; + int c; + + xfunc_error_retval = 2; + /* Parse command line options */ + while ((c = getopt(argc, argv, optlist)) > 0) { + line = strchr(optlist, c); + if (!line) bb_show_usage(); + switch (*line) { +#if ENABLE_FEATURE_SORT_BIG + case 'o': + if (outfile) bb_error_msg_and_die("too many -o"); + outfile = xfopen(optarg, "w"); + break; + case 't': + if (key_separator || optarg[1]) + bb_error_msg_and_die("too many -t"); + key_separator = *optarg; + break; + /* parse sort key */ + case 'k': { + struct sort_key *key = add_key(); + char *temp, *temp2; + + temp = optarg; + for (i = 0; *temp;) { + /* Start of range */ + key->range[2*i] = (unsigned short)strtol(temp, &temp, 10); + if (*temp == '.') + key->range[(2*i)+1] = (unsigned short)strtol(temp+1, &temp, 10); + for (; *temp; temp++) { + if (*temp == ',' && !i++) { + temp++; + break; + } /* no else needed: fall through to syntax error + because comma isn't in optlist */ + temp2 = strchr(optlist, *temp); + flag = (1 << (temp2 - optlist)); + if (!temp2 || (flag > FLAG_M && flag < FLAG_b)) + bb_error_msg_and_die("unknown key option"); + /* b after ',' means strip _trailing_ space */ + if (i && flag == FLAG_b) flag = FLAG_bb; + key->flags |= flag; + } + } + break; + } +#endif + default: + global_flags |= (1 << (line - optlist)); + /* global b strips leading and trailing spaces */ + if (global_flags & FLAG_b) global_flags |= FLAG_bb; + break; + } + } + /* Open input files and read data */ + for (i = argv[optind] ? optind : optind-1; argv[i]; i++) { + fp = stdin; + if (i >= optind && (argv[i][0] != '-' || argv[i][1])) + fp = xfopen(argv[i], "r"); + for (;;) { + line = GET_LINE(fp); + if (!line) break; + if (!(linecount & 63)) + lines = xrealloc(lines, sizeof(char *) * (linecount + 64)); + lines[linecount++] = line; + } + fclose(fp); + } +#if ENABLE_FEATURE_SORT_BIG + /* if no key, perform alphabetic sort */ + if (!key_list) + add_key()->range[0] = 1; + /* handle -c */ + if (global_flags & FLAG_c) { + int j = (global_flags & FLAG_u) ? -1 : 0; + for (i = 1; i < linecount; i++) + if (compare_keys(&lines[i-1], &lines[i]) > j) { + fprintf(stderr, "Check line %d\n", i); + return 1; + } + return 0; + } +#endif + /* Perform the actual sort */ + qsort(lines, linecount, sizeof(char *), compare_keys); + /* handle -u */ + if (global_flags & FLAG_u) { + for (flag = 0, i = 1; i < linecount; i++) { + if (!compare_keys(&lines[flag], &lines[i])) + free(lines[i]); + else + lines[++flag] = lines[i]; + } + if (linecount) linecount = flag+1; + } + /* Print it */ + if (!outfile) outfile = stdout; + for (i = 0; i < linecount; i++) + fprintf(outfile, "%s\n", lines[i]); + + fflush_stdout_and_exit(EXIT_SUCCESS); +} |
