function processEntry(d, model, utilities, uris, uriToLocation) { var elmt = d.evaluate("//div[@id='maincol']/div[@class='detail']", d, null, XPathResult.ANY_TYPE,null); var urls1 = []; var aElmt = elmt.iterateNext(); while (aElmt) { if(aElmt.innerHTML.indexOf("venue") != -1) { utilities.debugPrint(aElmt.childNodes[1]); urls1.unshift(aElmt.childNodes[1]); } aElmt = elmt.iterateNext(); } utilities.processDocuments( browser, // current browser null, // first document to process if any urls1, // array of urls to load asynchronously function(d, cont) { // function to process each document as it gets loaded try { processEntry2(d, model, utilities, urls1, uriToLocation); //processEntry3(d, model, utilities, uris22, uriToLocation22); } catch (e) { utilities.debugPrint(e); } cont(); // continue with the iteration },wait, // what to do when all documents have been processed function(e, url) { // error handler alert("Error scraping data from " + url + "\n" + e); } ); var rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; var rdfs = "http://www.w3.org/2000/01/rdf-schema#"; var dc = "http://purl.org/dc/elements/1.1#"; var drownedinsound = "http://www.drownedinsound.com/"; var loc = "http://simile.mit.edu/2005/05/ontologies/location#"; var uri = d.location.href; model.addStatement(uri, rdf + "type", drownedinsound + "event", false); } function processEntry2(d, model, utilities, urls1, uriToLocation) { var elmt = d.evaluate("//div[@id='maincol']/div[@class='detail']", d, null, XPathResult.ANY_TYPE,null); var urls1 = []; var aElmt = elmt.iterateNext(); while (aElmt) { if(aElmt.innerHTML.indexOf("maps.google") != -1) { address = aElmt.childNodes[1].attributes[0].value.substr(aElmt.childNodes[1].attributes[0].value.indexOf("?q=") + 3).replace(/\+/g, " ").replace(/%2C/g, " "); } aElmt = elmt.iterateNext(); } var rdf = "http://www.w3.org/1999/02/22-rdf-syntax-ns#"; var rdfs = "http://www.w3.org/2000/01/rdf-schema#"; var dc = "http://purl.org/dc/elements/1.1#"; var drownedinsound = "http://www.drownedinsound.com/"; var loc = "http://simile.mit.edu/2005/05/ontologies/location#"; var uri = d.location.href; model.addStatement(uri, rdf + "type", drownedinsound + "venue", false); model.addStatement(uri, loc + "address", address, true); uris.unshift(uri); uriToLocation[uri] = address; } var uris = []; var uriToLocation = []; var uris22 = []; var uriToLocation22 = []; var xmlHttp = []; var count = 0; function GetGeoLocation(url2, i) { xmlHttp[url2] = new XMLHttpRequest(); xmlHttp[url2].open("GET",url2,true); xmlHttp[url2].onreadystatechange=function() { // if xmlhttp shows "loaded" if (xmlHttp[url2].readyState==4) { // if "OK" if (xmlHttp[url2].status==200) { // Possible problem in multiple calls. //utilities.debugPrint(xmlHttp[url2].responseText); var loc = "http://simile.mit.edu/2005/05/ontologies/location#"; model.addStatement(uris[i], loc + "coordinates", xmlHttp[url2].responseText, true); if(count == 50) { done(); } else { count = count + 1; } } else { utilities.debugPrint("Problem retrieving XML data"); } } }; xmlHttp[url2].send(null); } var done2 = function() { if (uris.length > 0) { // Process address for(i=0; i < uris.length; i++) { // Possible problem here. var url1 = "http://127.0.0.1/geolocation/default.aspx?street="+uriToLocation[uris[i]].toString()+"&city=London&country=UK" url1.replace(/ /g, "%20"); //utilities.debugPrint(url1); GetGeoLocation(url1, i); } wait(); } else { done(); } } var urls = []; var iterator = doc.evaluate("//div[@id='maincol']/ul/li[@class='normal']/a", doc, null, XPathResult.ANY_TYPE,null) var aElement = iterator.iterateNext(); while (aElement) { urls.unshift(aElement.href); aElement = iterator.iterateNext(); } utilities.processDocuments( browser, // current browser null, // first document to process if any urls, // array of urls to load asynchronously function(d, cont) { // function to process each document as it gets loaded try { processEntry(d, model, utilities, urls, uriToLocation); } catch (e) { utilities.debugPrint(e); } cont(); // continue with the iteration }, done2, // what to do when all documents have been processed function(e, url) { // error handler alert("Error scraping data from " + url + "\n" + e); } ); wait(); // don't navigate to the collected data just yet