#include #include #include #define HashTable rxp_HashTable #include "system.h" #include "xmlparser.h" #include "string16.h" #undef HashTable #include "hash.h" #include "util.h" /*================================================================ ** Macro Definition Section */ #define NAMECNT_INCREMENT 512 #define ELINFO_INCREMENT 128 #define ELSTACK_INCREMENT 128 #define BUFFSIZE 32000 /*================================================================ ** Type Definition Section */ typedef struct elinfo { const char *name; int count; int minlev; int charcount; int seen; int empty; HashTable * ptab; HashTable * ktab; HashTable * atab; } Elinfo; typedef struct namecnt { const char * name; int count; } Namecnt; /*================================================================ ** Private globals */ static Elinfo * Infopool = NULL; static int Infonext; static Elinfo * Info_ondeck; static Namecnt * Cntpool = NULL; static int Cntnext; static Namecnt * Cnt_ondeck; static Elinfo * Root = NULL; static Elinfo ** Elstack; static int Elstack_top; static int Elstack_size; static int Seenorder = 0; static HashTable * Eltab; /*================================================================ ** Private functions */ static char * new8name(const Char *n16) { int size = Strlen(n16); int bigenuf = size * 3 + 1; char *ret = MALLOC(bigenuf); char *ptr; for (ptr = ret; *n16; n16++) { if (*n16 < 0x80) { *ptr++ = *n16; } else if (*n16 < 0x800) { *ptr++ = 0xc0 | (*n16 >> 6); *ptr++ = 0x80 | (*n16 & 0x3f); } else { *ptr++ = 0xe0 | (*n16 >> 12); *ptr++ = 0x80 | ((*n16 >> 6) & 0x3f); *ptr++ = 0x80 | (*n16 & 0x3f); } } *ptr = '\0'; return ret; } /* new8name */ static HashTable * newnmtab(int size) { HashTable *ret; HASH_BEGIN(Namecnt); HASH_KEY_PSTR(name); HASH_END(ret, size); return ret; } /* End newnmtab */ static Elinfo * newElinfo() { Elinfo *ret; if (Infopool == NULL || Infonext >= ELINFO_INCREMENT) { Infopool = NEW(Elinfo, ELINFO_INCREMENT); Infonext = 0; } return &Infopool[Infonext++]; } /* End newElinfo */ static void init_Elinfo(Elinfo *inf) { inf->count = inf->minlev = inf->charcount = 0; inf->empty = 1; inf->ptab = newnmtab(16); inf->ktab = newnmtab(48); inf->atab = newnmtab(16); } /* End init_Elinfo */ static Namecnt * newNamecnt() { if (Cntpool == NULL || Cntnext >= NAMECNT_INCREMENT) { Cntpool = NEW(Namecnt, NAMECNT_INCREMENT); Cntnext = 0; } return &Cntpool[Cntnext++]; } /* End newNamecnt */ static void start_tag(XBit bit) { Elinfo *inf; Attribute astep; Info_ondeck->name = new8name(bit->element_definition->name); inf = (Elinfo *) hashinsert(Eltab, (ConstHashObj) Info_ondeck); if (inf == Info_ondeck) { /* This is a new name in the table */ init_Elinfo(inf); Info_ondeck = newElinfo(); inf->seen = Seenorder++; } inf->count++; if (Elstack_top < 0) { Root = inf; } else { Elinfo *parent = Elstack[Elstack_top]; Namecnt *nc; parent->empty = 0; Cnt_ondeck->name = parent->name; nc = (Namecnt *) hashinsert(inf->ptab, (ConstHashObj) Cnt_ondeck); if (nc == Cnt_ondeck) { nc->count = 1; Cnt_ondeck = newNamecnt(); } else { nc->count++; } Cnt_ondeck->name = inf->name; nc = (Namecnt *) hashinsert(parent->ktab, (ConstHashObj) Cnt_ondeck); if (nc == Cnt_ondeck) { nc->count = 1; Cnt_ondeck = newNamecnt(); } else { nc->count++; } } /* Attribute handling */ for (astep = bit->attributes; astep; astep = astep->next) { Namecnt * nc; Cnt_ondeck->name = new8name(astep->definition->name); nc = (Namecnt *) hashinsert(inf->atab, (ConstHashObj) Cnt_ondeck); if (nc == Cnt_ondeck) { nc->count = 1; Cnt_ondeck = newNamecnt(); } else { nc->count++; } } Elstack[++Elstack_top] = inf; } /* End start_tag */ static void chardata(const Char *data) { Elinfo * inf; inf = Elstack[Elstack_top]; inf->empty = 0; inf->charcount += Strlen(data); } /* End chardata */ static void set_minlev(Elinfo *inf, int level) { if (inf->minlev == 0 || inf->minlev > level) { Namecnt *nc; HashIterator step = 0; int newlev = level + 1; inf->minlev = level; while ((nc = (Namecnt *) hashnext(inf->ktab, &step)) != NULL) { Elinfo *kinf; Info_ondeck->name = nc->name; kinf = (Elinfo *) hashfind(Eltab, (ConstHashObj) Info_ondeck); if (kinf == NULL) { fprintf(stderr, "Shouldn't happen: kid not found in Eltab\n"); exit(-1); } set_minlev(kinf, newlev); } } } /* End set_minlev */ static int elcomp(const void *a, const void *b) { int cmpmin = (*((Elinfo **) a))->minlev - (*((Elinfo **) b))->minlev; if (cmpmin) return cmpmin; return (*((Elinfo **) a))->seen - (*((Elinfo **) b))->seen; } /* End elcomp */ static int nmcomp(const void *a, const void *b) { return strcmp((*((Namecnt **) a))->name, (*((Namecnt **) b))->name); } /* End nmcomp */ static void showtab(char *label, HashTable *tab, int dosum) { int nmcnt; int sum; int i; Namecnt **nlist; if (! hashcount(tab)) return; sum = 0; printf("\n %s:\n", label); nmcnt = hashvector(tab, (HashObj **) &nlist); if (nmcnt < 0) { fprintf(stderr, "Couldn't allocate memory for name list\n"); exit(-1); } qsort(nlist, nmcnt, sizeof(Namecnt *), nmcomp); for (i = 0; i < nmcnt; i++) { sum += nlist[i]->count; printf(" %-16s %5d\n", nlist[i]->name, nlist[i]->count); } if (dosum && nmcnt > 1) { printf(" =====\n"); printf(" %5d\n", sum); } } /* End showtab */ /*================================================================ ** */ void main(int argc, char **argv) { int fd; int elcnt; int status; int i; Elinfo **elist; Entity ent; InputSource source; Parser p; if (argc < 2) { fprintf(stderr, "No filename supplied\n"); exit(-1); } ent = NewExternalEntity(0, 0, argv[1], 0, 0); if (! ent) { exit(-1); } source = EntityOpen(ent); if (! source) { exit(-1); } Elstack = NEW(Elinfo *, ELSTACK_INCREMENT); Elstack_size = ELSTACK_INCREMENT; Elstack_top = -1; Info_ondeck = newElinfo(); Cnt_ondeck = newNamecnt(); HASH_BEGIN(Elinfo); HASH_KEY_PSTR(name); HASH_END(Eltab, 32); p = NewParser(); ParserSetFlag(p, ErrorOnBadCharacterEntities, 1); ParserSetFlag(p, ErrorOnUndefinedEntities, 1); ParserSetFlag(p, XMLStrictWFErrors, 1); SetFileEncoding(Stdout, CE_UTF_8); if (ParserPush(p, source) == -1) { ParserPerror(p, &p->xbit); exit(-1); } for (;;) { XBit bit = ReadXBit(p); switch(bit->type) { case XBIT_eof: goto parsedone; case XBIT_error: ParserPerror(p, bit); exit(-1); case XBIT_start: case XBIT_empty: start_tag(bit); if (bit->type == XBIT_empty) Elstack_top--; break; case XBIT_end: Elstack_top--; break; case XBIT_cdsect: chardata(bit->cdsect_chars); break; case XBIT_pcdata: chardata(bit->pcdata_chars); break; default: break; } } /* ---- Finished parsing ---- */ parsedone: if (! Root) { fprintf(stderr, "Shouldn't happen: Root not set\n"); exit(-1); } set_minlev(Root, 1); elcnt = hashvector(Eltab, (HashObj **) &elist); if (elcnt < 0) { fprintf(stderr, "Couldn't allocate memory for element list\n"); exit(-1); } qsort(elist, elcnt, sizeof(Elinfo *), elcomp); for (i = 0; i < elcnt; i++) { printf("\n================\n%s: %d\n", elist[i]->name, elist[i]->count); if (elist[i]->charcount) printf("Had %d bytes of character data\n", elist[i]->charcount); if (elist[i]->empty) printf("Always empty\n"); showtab("Parents", elist[i]->ptab, 0); showtab("Children", elist[i]->ktab, 1); showtab("Attributes", elist[i]->atab, 0); } } /* End main */