#include #include #include #include #include "xmlparse.h" #include "hash.h" #include "util.h" /*================================================================ ** Macro Definition Section */ #define NAMECNT_INCREMENT 512 #define ELINFO_INCREMENT 128 #define ELSTACK_INCREMENT 128 #define BUFFSIZE 32000 /*================================================================ ** Type Definition Section */ typedef struct elinfo { const char *name; int count; int minlev; int charcount; int seen; int empty; HashTable * ptab; HashTable * ktab; HashTable * atab; } Elinfo; typedef struct namecnt { const char * name; int count; } Namecnt; /*================================================================ ** Private globals */ static Elinfo * Infopool = NULL; static int Infonext; static Elinfo * Info_ondeck; static Namecnt * Cntpool = NULL; static int Cntnext; static Namecnt * Cnt_ondeck; static Elinfo * Root = NULL; static Elinfo ** Elstack; static int Elstack_top; static int Elstack_size; static int Seenorder = 0; static HashTable * Eltab; /*================================================================ ** Private functions */ static HashTable * newnmtab(int size) { HashTable *ret; HASH_BEGIN(Namecnt); HASH_KEY_PSTR(name); HASH_END(ret, size); return ret; } /* End newnmtab */ static Elinfo * newElinfo() { Elinfo *ret; if (Infopool == NULL || Infonext >= ELINFO_INCREMENT) { Infopool = NEW(Elinfo, ELINFO_INCREMENT); Infonext = 0; } return &Infopool[Infonext++]; } /* End newElinfo */ static void init_Elinfo(Elinfo *inf) { inf->name = NEWSTRING(inf->name); inf->count = inf->minlev = inf->charcount = 0; inf->empty = 1; inf->ptab = newnmtab(16); inf->ktab = newnmtab(48); inf->atab = newnmtab(16); } /* End init_Elinfo */ static Namecnt * newNamecnt() { if (Cntpool == NULL || Cntnext >= NAMECNT_INCREMENT) { Cntpool = NEW(Namecnt, NAMECNT_INCREMENT); Cntnext = 0; } return &Cntpool[Cntnext++]; } /* End newNamecnt */ static void start_tag(void *data, const XML_Char *name, const XML_Char **atts) { Elinfo *inf; int i; Info_ondeck->name = name; inf = (Elinfo *) hashinsert(Eltab, (ConstHashObj) Info_ondeck); if (inf == Info_ondeck) { /* This is a new name in the table */ init_Elinfo(inf); Info_ondeck = newElinfo(); inf->seen = Seenorder++; } inf->count++; if (Elstack_top < 0) { Root = inf; } else { Elinfo *parent = Elstack[Elstack_top]; Namecnt *nc; parent->empty = 0; Cnt_ondeck->name = parent->name; nc = (Namecnt *) hashinsert(inf->ptab, (ConstHashObj) Cnt_ondeck); if (nc == Cnt_ondeck) { nc->count = 1; Cnt_ondeck = newNamecnt(); } else { nc->count++; } Cnt_ondeck->name = inf->name; nc = (Namecnt *) hashinsert(parent->ktab, (ConstHashObj) Cnt_ondeck); if (nc == Cnt_ondeck) { nc->count = 1; Cnt_ondeck = newNamecnt(); } else { nc->count++; } } /* Attribute handling */ for (i = 0; atts[i] != NULL; i += 2) { Namecnt * nc; Cnt_ondeck->name = atts[i]; nc = (Namecnt *) hashinsert(inf->atab, (ConstHashObj) Cnt_ondeck); if (nc == Cnt_ondeck) { nc->name = NEWSTRING(nc->name); nc->count = 1; Cnt_ondeck = newNamecnt(); } else { nc->count++; } } Elstack[++Elstack_top] = inf; } /* End start_tag */ static void end_tag(void *data, const XML_Char *name) { Elstack_top--; } /* End end_tag */ static void chardata(void *data, const XML_Char *s, int len) { const unsigned char *p; const unsigned char *lim; int cnt; int i; Elinfo * inf; inf = Elstack[Elstack_top]; inf->empty = 0; cnt = len; for (p = s, lim = &s[len]; p < lim; p++) { if (*p >= (unsigned char) '\300') { if (*p < (unsigned char) '\340') cnt--; else if (*p < (unsigned char) '\360') cnt -= 2; else cnt -= 3; } } inf->charcount += cnt; } /* End chardata */ static void set_minlev(Elinfo *inf, int level) { if (inf->minlev == 0 || inf->minlev > level) { Namecnt *nc; HashIterator step = 0; int newlev = level + 1; inf->minlev = level; while ((nc = (Namecnt *) hashnext(inf->ktab, &step)) != NULL) { Elinfo *kinf; Info_ondeck->name = nc->name; kinf = (Elinfo *) hashfind(Eltab, (ConstHashObj) Info_ondeck); if (kinf == NULL) { fprintf(stderr, "Shouldn't happen: kid not found in Eltab\n"); exit(-1); } set_minlev(kinf, newlev); } } } /* End set_minlev */ static int elcomp(const void *a, const void *b) { int cmpmin = (*((Elinfo **) a))->minlev - (*((Elinfo **) b))->minlev; if (cmpmin) return cmpmin; return (*((Elinfo **) a))->seen - (*((Elinfo **) b))->seen; } /* End elcomp */ static int nmcomp(const void *a, const void *b) { return strcmp((*((Namecnt **) a))->name, (*((Namecnt **) b))->name); } /* End nmcomp */ static void showtab(char *label, HashTable *tab, int dosum) { int nmcnt; int sum; int i; Namecnt **nlist; if (! hashcount(tab)) return; sum = 0; printf("\n %s:\n", label); nmcnt = hashvector(tab, (HashObj **) &nlist); if (nmcnt < 0) { fprintf(stderr, "Couldn't allocate memory for name list\n"); exit(-1); } qsort(nlist, nmcnt, sizeof(Namecnt *), nmcomp); for (i = 0; i < nmcnt; i++) { sum += nlist[i]->count; printf(" %-16s %5d\n", nlist[i]->name, nlist[i]->count); } if (dosum && nmcnt > 1) { printf(" =====\n"); printf(" %5d\n", sum); } } /* End showtab */ /*================================================================ ** */ void main(int argc, char **argv) { int fd; int elcnt; int status; int i; Elinfo **elist; XML_Parser p; if (argc < 2) { fprintf(stderr, "No filename supplied\n"); exit(-1); } fd = open(argv[1], O_RDONLY); if (fd < 0) { fprintf(stderr, "Couldn't open %s: %s\n", argv[1], sys_errlist[errno]); exit(-1); } Elstack = NEW(Elinfo *, ELSTACK_INCREMENT); Elstack_size = ELSTACK_INCREMENT; Elstack_top = -1; Info_ondeck = newElinfo(); Cnt_ondeck = newNamecnt(); HASH_BEGIN(Elinfo); HASH_KEY_PSTR(name); HASH_END(Eltab, 32); p = XML_ParserCreate(NULL); XML_SetElementHandler(p, start_tag, end_tag); XML_SetCharacterDataHandler(p, chardata); for (;;) { int br; void *buff = XML_GetBuffer(p, BUFFSIZE); if (! buff) { fprintf(stderr, "Out of memory for XML_GetBuffer\n"); exit(-1); } br = read(fd, buff, BUFFSIZE); if (br < 0) { fprintf(stderr, "Error reading %s: %s\n", argv[1], sys_errlist[errno]); exit(-1); } status = XML_ParseBuffer(p, br, br == 0); if (status == 0) { fprintf(stderr, "%s at line %d, column %d, byte number %d\n", XML_ErrorString(XML_GetErrorCode(p)), XML_GetCurrentLineNumber(p), XML_GetCurrentColumnNumber(p), XML_GetCurrentByteIndex(p)); exit(-1); } if (br == 0) break; } /* ---- Finished parsing ---- */ close(fd); if (! Root) { fprintf(stderr, "Shouldn't happen: Root not set\n"); exit(-1); } set_minlev(Root, 1); elcnt = hashvector(Eltab, (HashObj **) &elist); if (elcnt < 0) { fprintf(stderr, "Couldn't allocate memory for element list\n"); exit(-1); } qsort(elist, elcnt, sizeof(Elinfo *), elcomp); for (i = 0; i < elcnt; i++) { printf("\n================\n%s: %d\n", elist[i]->name, elist[i]->count); if (elist[i]->charcount) printf("Had %d bytes of character data\n", elist[i]->charcount); if (elist[i]->empty) printf("Always empty\n"); showtab("Parents", elist[i]->ptab, 0); showtab("Children", elist[i]->ktab, 1); showtab("Attributes", elist[i]->atab, 0); } } /* End main */