/* $NetBSD: init.c,v 1.30 2021/09/19 11:37:01 andvar Exp $ */ /*- * Copyright (c) 2000-2003 The NetBSD Foundation, Inc. * All rights reserved. * * This code is derived from software contributed to The NetBSD Foundation * by Ben Harris and Jaromir Dolecek. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ /*- * Copyright (c) 1993 * The Regents of the University of California. All rights reserved. * * This code is derived from software contributed to Berkeley by * Peter McIlroy. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "sort.h" __RCSID("$NetBSD: init.c,v 1.30 2021/09/19 11:37:01 andvar Exp $"); #include <ctype.h> #include <string.h> static void insertcol(struct field *); static const char *setcolumn(const char *, struct field *); /* * masks of ignored characters. */ static u_char dtable[NBINS], itable[NBINS]; /* * parsed key options */ struct coldesc *clist = NULL; int ncols = 0; /* * clist (list of columns which correspond to one or more icol or tcol) * is in increasing order of columns. * Fields are kept in increasing order of fields. */ /* * keep clist in order--inserts a column in a sorted array */ static void insertcol(struct field *field) { int i; struct coldesc *p; /* Make space for new item */ p = realloc(clist, (ncols + 2) * sizeof(*clist)); if (!p) err(1, "realloc"); clist = p; memset(&clist[ncols], 0, sizeof(clist[ncols])); for (i = 0; i < ncols; i++) if (field->icol.num <= clist[i].num) break; if (field->icol.num != clist[i].num) { memmove(clist+i+1, clist+i, sizeof(COLDESC)*(ncols-i)); clist[i].num = field->icol.num; ncols++; } if (field->tcol.num && field->tcol.num != field->icol.num) { for (i = 0; i < ncols; i++) if (field->tcol.num <= clist[i].num) break; if (field->tcol.num != clist[i].num) { memmove(clist+i+1, clist+i,sizeof(COLDESC)*(ncols-i)); clist[i].num = field->tcol.num; ncols++; } } } /* * matches fields with the appropriate columns--n^2 but who cares? */ void fldreset(struct field *fldtab) { int i; fldtab[0].tcol.p = clist + ncols - 1; for (++fldtab; fldtab->icol.num; ++fldtab) { for (i = 0; fldtab->icol.num != clist[i].num; i++) ; fldtab->icol.p = clist + i; if (!fldtab->tcol.num) continue; for (i = 0; fldtab->tcol.num != clist[i].num; i++) ; fldtab->tcol.p = clist + i; } } /* * interprets a column in a -k field */ static const char * setcolumn(const char *pos, struct field *cur_fld) { struct column *col; char *npos; int tmp; col = cur_fld->icol.num ? (&cur_fld->tcol) : (&cur_fld->icol); col->num = (int) strtol(pos, &npos, 10); pos = npos; if (col->num <= 0 && !(col->num == 0 && col == &(cur_fld->tcol))) errx(2, "field numbers must be positive"); if (*pos == '.') { if (!col->num) errx(2, "cannot indent end of line"); ++pos; col->indent = (int) strtol(pos, &npos, 10); pos = npos; if (&cur_fld->icol == col) col->indent--; if (col->indent < 0) errx(2, "illegal offset"); } for(; (tmp = optval(*pos, cur_fld->tcol.num)); pos++) cur_fld->flags |= tmp; if (cur_fld->icol.num == 0) cur_fld->icol.num = 1; return (pos); } int setfield(const char *pos, struct field *cur_fld, int gflag) { cur_fld->mask = NULL; pos = setcolumn(pos, cur_fld); if (*pos == '\0') /* key extends to EOL. */ cur_fld->tcol.num = 0; else { if (*pos != ',') errx(2, "illegal field descriptor"); setcolumn((++pos), cur_fld); } if (!cur_fld->flags) cur_fld->flags = gflag; if (REVERSE) /* A local 'r' doesn't invert the global one */ cur_fld->flags &= ~R; /* Assign appropriate mask table and weight table. */ cur_fld->weights = weight_tables[cur_fld->flags & (R | F)]; if (cur_fld->flags & I) cur_fld->mask = itable; else if (cur_fld->flags & D) cur_fld->mask = dtable; cur_fld->flags |= (gflag & (BI | BT)); if (!cur_fld->tcol.indent) /* BT has no meaning at end of field */ cur_fld->flags &= ~BT; if (cur_fld->tcol.num && !(!(cur_fld->flags & BI) && cur_fld->flags & BT) && (cur_fld->tcol.num <= cur_fld->icol.num /* indent if 0 -> end of field, i.e. okay */ && cur_fld->tcol.indent != 0 && cur_fld->tcol.indent < cur_fld->icol.indent)) errx(2, "fields out of order"); insertcol(cur_fld); return (cur_fld->tcol.num); } int optval(int desc, int tcolflag) { switch(desc) { case 'b': if (!tcolflag) return BI; else return BT; case 'd': return D; case 'f': return F; case 'i': return I; case 'l': return L; case 'n': return N; case 'r': return R; default: return 0; } } /* * Return true if the options found in ARG, according to the getopt * spec in OPTS, require an additional argv word as an option * argument. */ static int options_need_argument(const char *arg, const char *opts) { size_t pos; const char *s; /*assert(arg[0] == '-');*/ pos = 1; while (arg[pos]) { s = strchr(opts, arg[pos]); if (s == NULL) { /* invalid option */ return 0; } if (s[1] == ':') { /* option requires argument */ if (arg[pos+1] == '\0') { /* no argument in this arg */ return 1; } else { /* argument is in this arg; no more options */ return 0; } } pos++; } return 0; } /* * Replace historic +SPEC arguments with appropriate -kSPEC. * * The form can be either a single +SPEC or a pair +SPEC -SPEC. * The following -SPEC is not recognized unless it follows * immediately. */ void fixit(int *argc, char **argv, const char *opts) { int i, j, sawplus; char *vpos, *tpos, spec[20]; int col, indent; sawplus = 0; for (i = 1; i < *argc; i++) { /* * This loop must stop exactly where getopt will stop. * Otherwise it turns e.g. "sort x +3" into "sort x * -k4.1", which will croak if +3 was in fact really a * file name. In order to do this reliably we need to * be able to identify argv words that are option * arguments. */ if (!strcmp(argv[i], "--")) { /* End of options; stop. */ break; } if (argv[i][0] == '+') { /* +POS argument */ sawplus = 1; } else if (argv[i][0] == '-' && sawplus && isdigit((unsigned char)argv[i][1])) { /* -POS argument */ sawplus = 0; } else if (argv[i][0] == '-') { /* other option */ sawplus = 0; if (options_need_argument(argv[i], opts)) { /* skip over the argument */ i++; } continue; } else { /* not an option at all; stop */ sawplus = 0; break; } /* * At this point argv[i] is an old-style spec. The * sawplus flag used by the above loop logic also * tells us if it's a +SPEC or -SPEC. */ /* parse spec */ tpos = argv[i]+1; col = (int)strtol(tpos, &tpos, 10); if (*tpos == '.') { ++tpos; indent = (int) strtol(tpos, &tpos, 10); } else indent = 0; /* tpos now points to the optional flags */ /* * In the traditional form, x.0 means beginning of line; * in the new form, x.0 means end of line. Adjust the * value of INDENT accordingly. */ if (sawplus) { /* +POS */ col += 1; indent += 1; } else { /* -POS */ if (indent > 0) col += 1; } /* make the new style spec */ (void)snprintf(spec, sizeof(spec), "%d.%d%s", col, indent, tpos); if (sawplus) { /* Replace the +POS argument with new-style -kSPEC */ asprintf(&vpos, "-k%s", spec); argv[i] = vpos; } else { /* * Append the spec to the one from the * preceding +POS argument, and remove the * current argv element entirely. */ asprintf(&vpos, "%s,%s", argv[i-1], spec); free(argv[i-1]); argv[i-1] = vpos; for (j=i; j < *argc; j++) argv[j] = argv[j+1]; *argc -= 1; i--; } } } /* * ascii, Rascii, Ftable, and RFtable map * * Sorting 'weight' tables. * Convert 'ascii' characters into their sort order. * The 'F' variants fold lower case to upper equivalent * The 'R' variants are for reverse sorting. * * The record separator (REC_D) never needs a weight, this frees one * byte value as an 'end of key' marker. This must be 0 for normal * weight tables, and 0xff for reverse weight tables - and is used * to terminate keys so that short keys sort before (after if reverse) * longer keys. * * The field separator has a normal weight - although it cannot occur * within a key unless it is the default (space+tab). * * All other bytes map to the appropriate value for the sort order. * Numeric sorts don't need any tables, they are reversed by negation. * * Global reverse sorts are done by writing the sorted keys in reverse * order - the sort itself is still forwards. * This means that weights are only ever used when generating keys, any * sort of the original data bytes is always forwards and unweighted. * * Note: this is only good for ASCII sorting. For different LC 's, * all bets are off. * * itable[] and dtable[] are the masks for -i (ignore non-printables) * and -d (only sort blank and alphanumerics). */ void settables(void) { int i; int next_weight = 1; int rev_weight = 254; ascii[REC_D] = 0; Rascii[REC_D] = 255; Ftable[REC_D] = 0; RFtable[REC_D] = 255; for (i = 0; i < 256; i++) { if (i == REC_D) continue; ascii[i] = next_weight; Rascii[i] = rev_weight; if (Ftable[i] == 0) { Ftable[i] = next_weight; RFtable[i] = rev_weight; Ftable[tolower(i)] = next_weight; RFtable[tolower(i)] = rev_weight; } next_weight++; rev_weight--; if (i == '\n' || isprint(i)) itable[i] = 1; if (i == '\n' || i == '\t' || i == ' ' || isalnum(i)) dtable[i] = 1; } }