Structured Writing, Structured Search
by Jon Udell
|
Pages: 1, 2
Since I couldn't parameterize the query string, I left it as a placeholder (match="query") and looked to DOM manipulation as a way to reach in and change it. Here's what I came up with:
<html>
<head>
<link rel="stylesheet" type="text/css" href="style.css"/>
<script>
var xslurl = 't.xsl';
var xmlurl = 'ml.xml';
function transform(queryText)
{
var appName = navigator.appName;
var appVersion = navigator.appVersion;
if (appName == 'Netscape')
{
MOZtransform(queryText);
return;
};
if (appName == 'Microsoft Internet Explorer')
{
IEtransform(queryText);
return;
}
alert('unsupported: ' + appName + ', ' + appVersion);
}
function MOZtransform(queryText)
{
var xsl;
var xml;
try
{
xsl = document.implementation.createDocument("", "xslt", null);
xsl.async = false;
xsl.load (xslurl);
var queryTemplate = xsl.getElementsByTagName('template')[1];
queryTemplate.setAttribute('match', queryText);
}
catch(e)
{
alert('error: modify xsl: ' + e.message);
}
try
{
xml = document.implementation.createDocument("", "xml", null);
xml.async = false;
xml.load (xmlurl);
}
catch(e)
{
alert('error: load xml: ' + e.message);
}
try
{
var xslp = new XSLTProcessor();
xslp.importStylesheet ( xsl );
var results = xslp.transformToFragment(xml,document);
var resultDiv = document.getElementsByTagName('div')[0];
resultDiv.innerHTML = '';
resultDiv.appendChild(results);
document.queryBox.q.value = queryText;
}
catch(e)
{
alert('error: do xslt: ' + e.message);
}
}
function IEtransform(queryText)
{
var xsl;
var xml;
try
{
xsl = new ActiveXObject("MSXML2.FreeThreadedDOMDocument");
xsl.async = false;
xsl.load(xslurl);
var xsldoc = xsl.documentElement;
var nodelist = xsldoc.selectNodes('//*[@match="query"]');
var queryTemplate = nodelist.item(0);
queryTemplate.setAttribute('match', queryText);
}
catch(e)
{
alert('error: modify xsl: ' + e.description);
}
try
{
xml = new ActiveXObject("MSXML.DOMDocument");
xml.async = false;
xml.load(xmlurl);
}
catch(e)
{
alert('error: load xml: ' + e.description);
}
try {
var templ = new ActiveXObject("MSXML2.XSLTemplate");
templ.stylesheet = xsl;
var xslp = templ.createProcessor();
xslp.input = xml;
xslp.transform();
var results = xslp.output;
var resultDiv = document.getElementsByTagName('div')[0];
resultDiv.innerHTML = results;
document.queryBox.q.value = queryText;
}
catch(e)
{
alert('error: do xslt: ' + e.description);
}
}
</script>
</head>
<body>
<table>
<tr>
<td>choose xpath query from list</td>
<td>enter or modify xpath query</td>
</tr>
<tr><td>
<form name="queryList" method="post">
<select name="q"
onChange="javascript:transform(document.queryList.q.value)">
<option value="/">choose your query</option>
<option value="//s:title[contains( . , 'SlideML')]">
slide titles containing 'SlideML'</option>
<option value="//img">
image references</option>
<option value="//img[contains(@src, 'zope')]">
image references containing 'zope'</option>
<option value="//p[contains(. , 'OpenOffice')]">
paragraphs containing 'OpenOffice'</option>
<option value="//*[@class='code']">
elements with class='code'</option>
<option value="//*[@class='code' and contains(@id, 'python')]">
//class='code' and id contains 'python'</option>
<option value="//a[contains(@href , 'bray')]">
links with URL containing 'bray'</option>
<option value="//a[contains(./text() , 'bray')]">
links with text containing 'bray'</option>
<option value="//a[contains( translate (
text(), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ',
'abcdefghijklmnopqrstuvwxyz'), 'bray')]">
links with text containing 'bray', case-insensitive</option>
</select>
</form>
</td><td>
<form name="queryBox" method="post"
action="javascript:transform(document.queryBox.q.value)">
<input name="q" size="60">
</form>
</td></tr></table>
<div class="results">
</div>
</body>
</html>
In slightly different ways, MSIE and Mozilla are following the same recipe:
Load the stylesheet into an XML DOM.
Find the <xsl:template match="query"> element.
Reset the value of its match attribute to the XPath string obtained from one or the other of the UI widgets.
Load the package of SlideML data into another XML DOM.
Create an XSLT processor.
Apply the modified XSLT to the SlideML data.
Replace a DIV element with the search results.
Using XPath Search
From a user's point of view, XPath query strings are pretty darned geeky. I'm hopeless with them myself unless I have examples in front of me. I find that having a list of examples available in the context of my own live data, and synchronizing it to an input box in which examples can be modified, leads me to discover and record more useful patterns. A subtler thing happens too. As you're writing the XHTML, the search possibilities begin to guide your choices.
For example, I chose a very simple markup strategy for the slideshow. Rather than go with complex outlining, I decided that I really only needed two levels of indentation. I attached those levels to <p> and <div>. For purposes of indentation, it didn't matter whether I wrote like this:
<p>...</p>
<div>...</div>
<div>...</div>
Or like this:
<p>
<div>...</div>
<div>...</div>
</p>
I chose the latter style because I sensed that I wanted a <p> to enclose a complete thought. That was a somewhat abstract notion, but it suddenly became crystal clear when I made a simple change to the XSLT stylesheet. The change was from
<xsl:value-of select="."/>
to
<xsl:copy-of select="."/>
More from Jon Udell | |
Lightweight XML Search Servers, Part 2 | |
In other words, instead of simply dumping the text of the found element -- which is what search engines almost universally do, since they can't rely on the markup in the text they find -- this engine returns well-formed fragments. Images display as images, links as proper links, tables as tables, and when the query says "find a paragraph that contains" the result is the complete XHTML paragraph element, rendered as it is in its original context.
Sooner or later, I'll be using a real XML database to enjoy this level of control over the XHTML content I post to my weblog and that others post to theirs. With a little luck, I won't have to provide that service myself. Somebody will build one that latches onto my XHTML feed and others. Meanwhile, being lazy and having some RAM to spare, I'll probably see how far I can push this serverless approach.
- Great; SlideML and Namespaces; Wysiwyg Editor
2003-06-13 17:30:22 Roger Fischer - disaggregating content
2003-06-12 16:20:45 Lucas Fletcher