import java.io.*; import java.util.Iterator; import java.util.Stack; import java.util.EmptyStackException; import java.util.TreeMap; import java.util.HashMap; import java.util.ArrayList; import org.xml.sax.*; import com.ibm.xml.parsers.SAXParser; class Statsax { static class Elinfo { String name; int count; int minlev; int charcount; int seen; boolean empty; TreeMap ptab; TreeMap ktab; TreeMap atab; Elinfo(String nm) { name = nm; count = minlev = charcount = 0; seen = 0; empty = true; ptab = new TreeMap(); ktab = new TreeMap(); atab = new TreeMap(); } } static class Int { public int val; Int (int i) { val = i; } } static HashMap eltab; static Stack elstack; static int global_seen = 0; static String root; static class Stats extends HandlerBase { public void startElement(String name, AttributeList attrs) { Elinfo inf = (Elinfo) eltab.get(name); if (inf == null) { inf = new Elinfo(name); inf.seen = global_seen++; eltab.put(name, inf); } inf.count++; try { String parent = (String) elstack.peek(); Int pcnt = (Int) inf.ptab.get(parent); if (pcnt == null) { inf.ptab.put(parent, new Int(1)); } else { pcnt.val++; } Elinfo pinf = (Elinfo) eltab.get(parent); Int kcnt = (Int) pinf.ktab.get(name); if (kcnt == null) { pinf.ktab.put(name, new Int(1)); } else { kcnt.val++; } pinf.empty = false; } catch (EmptyStackException e) { root = name; } // Attribute handling int n = attrs.getLength(); int i; for (i = 0; i < n; i++) { String attnm = attrs.getName(i); Int acnt = (Int) inf.atab.get(attnm); if (acnt == null) { inf.atab.put(attnm, new Int(1)); } else { acnt.val++; } } elstack.push(name); } public void endElement(String name) throws SAXException { try { elstack.pop(); } catch (EmptyStackException e) { throw new SAXException("Tried to pop empty element stack"); } } public void characters(char [] data, int offset, int len) throws SAXException { String elname; try { elname = (String) elstack.peek(); } catch (EmptyStackException e) { throw new SAXException("No element on stack"); } Elinfo inf = (Elinfo) eltab.get(elname); inf.empty = false; inf.charcount += len; } public void ignorableWhitespace(char [] data, int offset, int len) throws SAXException { String elname; try { elname = (String) elstack.peek(); } catch (EmptyStackException e) { throw new SAXException("No element on stack"); } Elinfo inf = (Elinfo) eltab.get(elname); inf.empty = false; inf.charcount += len; } } static class ElComp implements java.util.Comparator { public int compare(Object ao, Object bo) { Elinfo a = (Elinfo) ao; Elinfo b = (Elinfo) bo; int c1 = a.minlev - b.minlev; if (c1 != 0) return c1; return a.seen - b.seen; } public boolean equals(Object ao, Object bo) { Elinfo a = (Elinfo) ao; Elinfo b = (Elinfo) bo; return ((a.minlev == b.minlev) && (a.seen == b.seen)); } } static void set_minlev(String name, int level) { Elinfo inf = (Elinfo) eltab.get(name); if (inf.minlev == 0 || inf.minlev > level) { int newlev = level + 1; inf.minlev = level; Iterator iter = inf.ktab.keySet().iterator(); while (iter.hasNext()) { String kname = (String) iter.next(); set_minlev(kname, newlev); } } } static void showtab(String label, TreeMap tab, boolean dosum) { if (tab.size() == 0) return; System.out.println("\n " + label + ":"); int sum = 0; Iterator step = tab.keySet().iterator(); while (step.hasNext()) { String name = (String) step.next(); int cnt = ((Int) tab.get(name)).val; sum += cnt; System.out.print(" "); System.out.print(name); String cstr = Integer.toString(cnt); int lim = 22 - name.length() + 5 - cstr.length(); int i; for (i = 0; i < lim; i++) System.out.print(" "); System.out.println(cstr); } if (dosum && tab.size() > 1) { System.out.println(" ====="); String sstr = Integer.toString(sum); int lim = 33 - sstr.length(); int i; for (i = 0; i < lim; i++) System.out.print(" "); System.out.println(sstr); } } public static void main(String [] args) { if (args.length < 1) { System.err.println("Missing file argument"); System.exit(1); } SAXParser p = new SAXParser(); Stats app = new Stats(); eltab = new HashMap(128); elstack = new Stack(); p.setDocumentHandler(app); try { p.parse(args[0]); } catch (FileNotFoundException e) { System.err.println("Failed to open " + e.getMessage()); System.exit(1); } catch (SAXException e) { System.err.println(" SAX error: " + e.getMessage()); System.exit(1); } catch (IOException e) { e.printStackTrace(); System.exit(1); } set_minlev(root, 1); ArrayList sorted = new ArrayList(eltab.values()); java.util.Collections.sort(sorted, new ElComp()); Iterator step = sorted.iterator(); while (step.hasNext()) { Elinfo inf = (Elinfo) step.next(); System.out.println("\n================\n" + inf.name + ": " + inf.count); if (inf.charcount > 0) System.out.println("Had " + inf.charcount + " bytes of character data"); if (inf.empty) System.out.println("Always empty"); showtab("Parents", inf.ptab, false); showtab("Children", inf.ktab, true); showtab("Attributes", inf.atab, false); } } }