[freenet-cvs] r14775 - trunk/freenet/src/freenet/clients/http

nextgens at freenetproject.org nextgens at freenetproject.org
Fri Aug 17 21:42:03 UTC 2007


Author: nextgens
Date: 2007-08-17 21:42:03 +0000 (Fri, 17 Aug 2007)
New Revision: 14775

Removed:
   trunk/freenet/src/freenet/clients/http/XMLSpider.java
Log:
XMLSpider has got its own plugin now... that code is dead

Deleted: trunk/freenet/src/freenet/clients/http/XMLSpider.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/XMLSpider.java	2007-08-17 21:37:52 UTC (rev 14774)
+++ trunk/freenet/src/freenet/clients/http/XMLSpider.java	2007-08-17 21:42:03 UTC (rev 14775)
@@ -1,1512 +0,0 @@
-/* This code is part of Freenet. It is distributed under the GNU General
- * Public License, version 2 (or at your option any later version). See
- * http://www.gnu.org/ for further details of the GPL. */
-package freenet.clients.http;
-
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.net.MalformedURLException;
-import java.net.URI;
-import java.net.URISyntaxException;
-import java.security.MessageDigest;
-import java.security.NoSuchAlgorithmException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeMap;
-import java.util.Vector;
-
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
-
-import org.w3c.dom.Attr;
-import org.w3c.dom.DOMImplementation;
-import org.w3c.dom.Document;
-import org.w3c.dom.Element;
-import org.w3c.dom.NodeList;
-import org.w3c.dom.Text;
-
-import freenet.client.ClientMetadata;
-import freenet.client.FetchContext;
-import freenet.client.FetchException;
-import freenet.client.FetchResult;
-import freenet.client.InsertException;
-import freenet.client.async.BaseClientPutter;
-import freenet.client.async.ClientCallback;
-import freenet.client.async.ClientGetter;
-import freenet.client.async.USKCallback;
-import freenet.clients.http.filter.ContentFilter;
-import freenet.clients.http.filter.FoundURICallback;
-import freenet.clients.http.filter.UnsafeContentTypeException;
-import freenet.keys.FreenetURI;
-import freenet.keys.USK;
-import freenet.node.NodeClientCore;
-import freenet.node.RequestStarter;
-import freenet.oldplugins.plugin.HttpPlugin;
-import freenet.oldplugins.plugin.PluginManager;
-import freenet.pluginmanager.PluginRespirator;
-import freenet.support.HTMLNode;
-import freenet.support.Logger;
-import freenet.support.MultiValueTable;
-import freenet.support.api.Bucket;
-import freenet.support.api.HTTPRequest;
-
-/**
- * Spider. Produces an index.
- */
-public class XMLSpider implements HttpPlugin, ClientCallback, FoundURICallback ,USKCallback{
-
-	long tProducedIndex;
-	private TreeMap tMap = new TreeMap();
-
-	// URIs visited, or fetching, or queued. Added once then forgotten about.
-	private final HashSet visitedURIs = new HashSet();
-	private final HashSet urisWithWords = new HashSet();
-	private final HashSet failedURIs = new HashSet();
-	private final HashSet queuedURISet = new HashSet();
-	private final LinkedList queuedURIList = new LinkedList();
-	private final HashMap runningFetchesByURI = new HashMap();
-	private final HashMap urisByWord = new HashMap();
-	private final HashMap titlesOfURIs = new HashMap();
-	private Vector indices;
-	private int match;
-	private boolean indexing ;
-	private static final int minTimeBetweenEachIndexRewriting = 50;
-	//private static final String indexFilename = "index.xml";
-	private static final String DEFAULT_INDEX_DIR = "myindex2/";
-	public Set allowedMIMETypes;
-	private static final int MAX_ENTRIES = 50;
-	private static final String pluginName = "XML spider";
-	private static final double MAX_TIME_SPENT_INDEXING = 0.5;
-	//MAX_TIME_SPENT_INDEXING is the fraction of the total time  allowed to be spent on indexing(max value = 1)
-	private static final String indexTitle= "This is an index";
-	private static final String indexOwner = "Another anonymous";
-	private static final String indexOwnerEmail = null;
-	private final HashMap sizeOfURIs = new HashMap(); /* String (URI) -> Long */
-	private final HashMap mimeOfURIs = new HashMap(); /* String (URI) -> String */
-	private final HashMap lastPositionByURI = new HashMap(); /* String (URI) -> Integer */ /* Use to determine word position on each uri */
-	private final HashMap positionsByWordByURI = new HashMap(); /* String (URI) -> HashMap (String (word) -> Integer[] (Positions)) */
-
-	// Can have many; this limit only exists to save memory.
-	private static final int maxParallelRequests = 100;
-	private int maxShownURIs = 50;
-	private HashMap urisToNumbers;
-	private NodeClientCore core;
-	private FetchContext ctx;
-	private final short PRIORITY_CLASS = RequestStarter.BULK_SPLITFILE_PRIORITY_CLASS;
-	private boolean stopped = true;
-	PluginRespirator pr;
-	
-
-	private synchronized void queueURI(FreenetURI uri) {
-		//not adding the html condition
-		if((uri.getKeyType()).equals("USK")){
-			if(uri.getSuggestedEdition() < 0)
-				uri = uri.setSuggestedEdition((-1)* uri.getSuggestedEdition());
-		}
-		if ((!visitedURIs.contains(uri)) && queuedURISet.add(uri)) {
-			queuedURIList.addLast(uri);
-			visitedURIs.add(uri);
-		}
-	}
-
-	private void startSomeRequests() {
-
-		
-		FreenetURI[] initialURIs = core.bookmarkManager.getBookmarkURIs();
-		for (int i = 0; i < initialURIs.length; i++)
-		{
-		queueURI(initialURIs[i]);
-		}
-					
-		ArrayList toStart = null;
-		synchronized (this) {
-			if (stopped) {
-				return;
-			}
-			int running = runningFetchesByURI.size();
-			int queued = queuedURIList.size();
-			
-			if ((running >= maxParallelRequests) || (queued == 0))
-				return;
-			
-			toStart = new ArrayList(Math.min(maxParallelRequests - running, queued));
-			
-			for (int i = running; i < maxParallelRequests; i++) {
-				if (queuedURIList.isEmpty())
-					break;
-				FreenetURI uri = (FreenetURI) queuedURIList.removeFirst();
-				queuedURISet.remove(uri);
-				if((uri.getKeyType()).equals("USK")){
-//				if(uri.getSuggestedEdition() < 0)
-//					uri = uri.setSuggestedEdition((-1)* uri.getSuggestedEdition());
-				try{
-					(ctx.uskManager).subscribe(USK.create(uri),this, false, this);	
-				}catch(Exception e){
-					
-				}
-				
-				}
-				ClientGetter getter = makeGetter(uri);
-				toStart.add(getter);
-				}
-		}
-			for (int i = 0; i < toStart.size(); i++) {
-				
-			ClientGetter g = (ClientGetter) toStart.get(i);
-			try {
-				runningFetchesByURI.put(g.getURI(), g);
-				g.start();
-				FileWriter outp = new FileWriter("logfile2",true);
-				outp.write("URI "+g.getURI().toString()+'\n');
-				
-				outp.close();
-				} catch (FetchException e) {
-					onFailure(e, g);
-				}
-				catch (IOException e){
-					Logger.error(this, "the logfile can not be written"+e.toString(), e);
-				}
-		
-			}
-		//}
-				
-	}
-	
-
-	private ClientGetter makeGetter(FreenetURI uri) {
-		ClientGetter g = new ClientGetter(this, core.requestStarters.chkFetchScheduler, core.requestStarters.sskFetchScheduler, uri, ctx, PRIORITY_CLASS, this, null, null);
-		return g;
-	}
-
-	public void onSuccess(FetchResult result, ClientGetter state) {
-		FreenetURI uri = state.getURI();
-		try{
-	    FileWriter output = new FileWriter("logfile",true);
-	    output.write(uri.toString()+"\n");
-	    output.close();
-		}
-		catch(Exception e){
-			Logger.error(this, "The uri could not be removed from running "+e.toString(), e);
-		}
-		synchronized (this) {
-			runningFetchesByURI.remove(uri);
-		}
-		startSomeRequests();
-		ClientMetadata cm = result.getMetadata();
-		Bucket data = result.asBucket();
-		String mimeType = cm.getMIMEType();
-		
-		sizeOfURIs.put(uri.toString(), new Long(data.size()));
-		mimeOfURIs.put(uri.toString(), mimeType);
-		
-		try {
-			ContentFilter.filter(data, ctx.bucketFactory, mimeType, uri.toURI("http://127.0.0.1:8888/"), this);
-		} catch (UnsafeContentTypeException e) {
-			return; // Ignore
-		} catch (IOException e) {
-			Logger.error(this, "Bucket error?: " + e, e);
-		} catch (URISyntaxException e) {
-			Logger.error(this, "Internal error: " + e, e);
-		} finally {
-			data.free();
-		}
-	}
-
-	public void onFailure(FetchException e, ClientGetter state) {
-		FreenetURI uri = state.getURI();
-		try{
-			FileWriter outp = new FileWriter("failed",true);
-			outp.write("failed "+e.toString()+" for "+uri+'\n');
-			outp.close();
-			
-		}catch(Exception e2){
-			
-		}
-		synchronized (this) {
-			runningFetchesByURI.remove(uri);
-			failedURIs.add(uri);
-		}
-		if (e.newURI != null)
-			queueURI(e.newURI);
-//		else
-//			queueURI(uri);
-		startSomeRequests();
-		
-		
-	}
-
-	public void onSuccess(BaseClientPutter state) {
-		// Ignore
-	}
-
-	public void onFailure(InsertException e, BaseClientPutter state) {
-		// Ignore
-	}
-
-	public void onGeneratedURI(FreenetURI uri, BaseClientPutter state) {
-		// Ignore
-	}
-
-	public void foundURI(FreenetURI uri) {
-		queueURI(uri);
-		startSomeRequests();
-	}
-
-	public void onText(String s, String type, URI baseURI) {
-		
-		FreenetURI uri;
-		try {
-			uri = new FreenetURI(baseURI.getPath().substring(1));
-		} catch (MalformedURLException e) {
-			Logger.error(this, "Caught " + e, e);
-			return;
-		}
-		 
-		
-      
-		if((type != null) && (type.length() != 0) && type.toLowerCase().equals("title")
-		   && (s != null) && (s.length() != 0) && (s.indexOf('\n') < 0)) {
-			/* We should have a correct title */
-			titlesOfURIs.put(uri.toString(), s);
-			type = "title";
-			
-		}
-		else type = null;
-
-
-		String[] words = s.split("[^A-Za-z0-9]");
-
-		Integer lastPosition = null;
-
-		lastPosition = (Integer)lastPositionByURI.get(uri.toString());
-
-		if(lastPosition == null)
-			lastPosition = new Integer(1); /* We start to count from 1 */
-
-		for (int i = 0; i < words.length; i++) {
-			String word = words[i];
-			if ((word == null) || (word.length() == 0))
-				continue;
-			word = word.toLowerCase();
-			try{
-			if(type == null)
-				addWord(word, lastPosition.intValue() + i, uri);
-			else
-				addWord(word, -1 * (i+1), uri);
-			}
-			catch (Exception e){}
-		}
-		
-		if(type == null) {
-			lastPosition = new Integer(lastPosition.intValue() + words.length);
-			lastPositionByURI.put(uri.toString(), lastPosition);
-		}
-		
-	}
-
-	private synchronized void addWord(String word, int position,FreenetURI uri) throws Exception{
-		
-		
-		if(word.length() < 3)
-			return;
-		
-		//word = word.intern();
-
-
-		FreenetURI[] uris = (FreenetURI[]) urisByWord.get(word);
-
-		//Integer[] positions = (Integer[]) positionsByWordByURI.get(word);
-
-		urisWithWords.add(uri);
-
-
-		/* Word position indexation */
-		HashMap wordPositionsForOneUri = (HashMap)positionsByWordByURI.get(uri.toString()); /* For a given URI, take as key a word, and gives position */
-		
-		if(wordPositionsForOneUri == null) {
-			wordPositionsForOneUri = new HashMap();
-			wordPositionsForOneUri.put(word, new Integer[] { new Integer(position) });
-			positionsByWordByURI.put(uri.toString(), wordPositionsForOneUri);
-		} else {
-			Integer[] positions = (Integer[])wordPositionsForOneUri.get(word);
-
-			if(positions == null) {
-				positions = new Integer[] { new Integer(position) };
-				wordPositionsForOneUri.put(word, positions);
-			} else {
-				Integer[] newPositions = new Integer[positions.length + 1];
-
-				System.arraycopy(positions, 0, newPositions, 0, positions.length);
-				newPositions[positions.length] = new Integer(position);
-
-				wordPositionsForOneUri.put(word, newPositions);
-			}
-		}
-	
-		if (uris == null) {
-			urisByWord.put(word, new FreenetURI[] { uri });
-			
-		} else {
-			for (int i = 0; i < uris.length; i++) {
-				if (uris[i].equals(uri))
-					return;
-			}
-			FreenetURI[] newURIs = new FreenetURI[uris.length + 1];
-			System.arraycopy(uris, 0, newURIs, 0, uris.length);
-			newURIs[uris.length] = uri;
-			urisByWord.put(word, newURIs);
-		}
-		//the new word is added here in urisByWord
-		tMap.put(MD5(word), word);
-		long time_indexing = System.currentTimeMillis();
-		if (tProducedIndex + minTimeBetweenEachIndexRewriting * 1000 < System.currentTimeMillis()) {
-			try {
-				//produceIndex();
-				//check();
-				
-				if(indexing){
-				generateIndex2();
-				produceIndex2();
-				if((System.currentTimeMillis() - time_indexing)/(System.currentTimeMillis() - tProducedIndex) > MAX_TIME_SPENT_INDEXING) indexing= false;
-				else indexing = true;
-				}
-				
-			} catch (IOException e) {
-				Logger.error(this, "Caught " + e + " while creating index", e);
-			}
-			tProducedIndex = System.currentTimeMillis();
-		}
-		
-	}
-//	private synchronized void check() throws IOException{
-//		FileWriter outp = new FileWriter("logs/indexing",true);
-//		outp.write("size = "+urisByWord.size()+"\n");
-//		Iterator it = urisByWord.keySet().iterator();
-//		while(it.hasNext())
-//			outp.write(it.next()+"\n");
-//		outp.close();
-//	}
-
-	private synchronized void produceIndex() throws IOException,NoSuchAlgorithmException {
-		// Produce the main index file.
-		
-		//the number of bits to consider for matching 
-		int prefix = 1 ;
-	
-		if (urisByWord.isEmpty() || urisWithWords.isEmpty()) {
-			System.out.println("No URIs with words");
-			return;
-		}
-		File outputFile = new File(DEFAULT_INDEX_DIR+"index.xml");
-		StreamResult resultStream;
-		resultStream = new StreamResult(outputFile);
-
-		/* Initialize xml builder */
-		Document xmlDoc = null;
-		DocumentBuilderFactory xmlFactory = null;
-		DocumentBuilder xmlBuilder = null;
-		DOMImplementation impl = null;
-		Element rootElement = null;
-
-		xmlFactory = DocumentBuilderFactory.newInstance();
-
-
-		try {
-			xmlBuilder = xmlFactory.newDocumentBuilder();
-		} catch(javax.xml.parsers.ParserConfigurationException e) {
-			/* Will (should ?) never happen */
-			Logger.error(this, "Spider: Error while initializing XML generator: "+e.toString());
-			return;
-		}
-
-		impl = xmlBuilder.getDOMImplementation();
-		/* Starting to generate index */
-		xmlDoc = impl.createDocument(null, "main_index", null);
-		rootElement = xmlDoc.getDocumentElement();
-
-		/* Adding header to the index */
-		Element headerElement = xmlDoc.createElement("header");
-
-		/* -> title */
-		Element subHeaderElement = xmlDoc.createElement("title");
-		Text subHeaderText = xmlDoc.createTextNode(indexTitle);
-		
-		subHeaderElement.appendChild(subHeaderText);
-		headerElement.appendChild(subHeaderElement);
-
-		/* -> owner */
-		subHeaderElement = xmlDoc.createElement("owner");
-		subHeaderText = xmlDoc.createTextNode(indexOwner);
-		
-		subHeaderElement.appendChild(subHeaderText);
-		headerElement.appendChild(subHeaderElement);
-		
-		/* -> owner email */
-		if(indexOwnerEmail != null) {
-			subHeaderElement = xmlDoc.createElement("email");
-			subHeaderText = xmlDoc.createTextNode(indexOwnerEmail);
-			
-			subHeaderElement.appendChild(subHeaderText);
-			headerElement.appendChild(subHeaderElement);
-		}
-
-		
-		//String[] words = (String[]) urisByWord.keySet().toArray(new String[urisByWord.size()]);
-		//Arrays.sort(words);
-		FreenetURI[] uris = (FreenetURI[]) urisWithWords.toArray(new FreenetURI[urisWithWords.size()]);
-		urisToNumbers = new HashMap();
-		Element prefixElement = xmlDoc.createElement("prefix");
-		prefixElement.setAttribute("value", prefix+"");
-	
-
-		for (int i = 0; i < uris.length; i++) {
-			urisToNumbers.put(uris[i], new Integer(i));
-			}
-		
-		//all index files are ready
-		/* Adding word index */
-		Element keywordsElement = xmlDoc.createElement("keywords");
-		for(int i = 0;i<16;i++){
-			generateSubIndex(DEFAULT_INDEX_DIR+"index_"+Integer.toHexString(i)+".xml");
-			Element subIndexElement = xmlDoc.createElement("subIndex");
-			if(i<=9)
-			subIndexElement.setAttribute("key",i+"");
-			else
-				subIndexElement.setAttribute("key",Integer.toHexString(i));
-			//the subindex element key will contain the bits used for matching in that subindex
-			keywordsElement.appendChild(subIndexElement);
-		}
-		
-
-		// make sure that prefix is the first child of root Element
-		rootElement.appendChild(prefixElement);
-		rootElement.appendChild(headerElement);
-		
-		//rootElement.appendChild(filesElement);
-		rootElement.appendChild(keywordsElement);
-
-		/* Serialization */
-		DOMSource domSource = new DOMSource(xmlDoc);
-		TransformerFactory transformFactory = TransformerFactory.newInstance();
-		Transformer serializer;
-
-		try {
-			serializer = transformFactory.newTransformer();
-		} catch(javax.xml.transform.TransformerConfigurationException e) {
-			Logger.error(this, "Spider: Error while serializing XML (transformFactory.newTransformer()): "+e.toString());
-			return;
-		}
-
-		serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
-		serializer.setOutputProperty(OutputKeys.INDENT,"yes");
-		
-		/* final step */
-		try {
-			serializer.transform(domSource, resultStream);
-		} catch(javax.xml.transform.TransformerException e) {
-			Logger.error(this, "Spider: Error while serializing XML (transform()): "+e.toString());
-			return;
-		}
-
-		if(Logger.shouldLog(Logger.MINOR, this))
-			Logger.minor(this, "Spider: indexes regenerated.");
-	
-	//the main xml file is generated 
-	//now as each word is generated enter it into the respective subindex
-	//now the parsing will start and nodes will be added as needed 
-		
-
-	}
-
-	private synchronized void produceIndex2() throws IOException,NoSuchAlgorithmException {
-		// Produce the main index file.
-		
-		//the number of bits to consider for matching 
-		
-	
-		if (urisByWord.isEmpty() || urisWithWords.isEmpty()) {
-			System.out.println("No URIs with words");
-			return;
-		}
-		File outputFile = new File(DEFAULT_INDEX_DIR+"index.xml");
-		StreamResult resultStream;
-		resultStream = new StreamResult(outputFile);
-
-		/* Initialize xml builder */
-		Document xmlDoc = null;
-		DocumentBuilderFactory xmlFactory = null;
-		DocumentBuilder xmlBuilder = null;
-		DOMImplementation impl = null;
-		Element rootElement = null;
-
-		xmlFactory = DocumentBuilderFactory.newInstance();
-
-
-		try {
-			xmlBuilder = xmlFactory.newDocumentBuilder();
-		} catch(javax.xml.parsers.ParserConfigurationException e) {
-			/* Will (should ?) never happen */
-			Logger.error(this, "Spider: Error while initializing XML generator: "+e.toString());
-			return;
-		}
-
-		impl = xmlBuilder.getDOMImplementation();
-		/* Starting to generate index */
-		xmlDoc = impl.createDocument(null, "main_index", null);
-		rootElement = xmlDoc.getDocumentElement();
-
-		/* Adding header to the index */
-		Element headerElement = xmlDoc.createElement("header");
-
-		/* -> title */
-		Element subHeaderElement = xmlDoc.createElement("title");
-		Text subHeaderText = xmlDoc.createTextNode(indexTitle);
-		
-		subHeaderElement.appendChild(subHeaderText);
-		headerElement.appendChild(subHeaderElement);
-
-		/* -> owner */
-		subHeaderElement = xmlDoc.createElement("owner");
-		subHeaderText = xmlDoc.createTextNode(indexOwner);
-		
-		subHeaderElement.appendChild(subHeaderText);
-		headerElement.appendChild(subHeaderElement);
-		
-		/* -> owner email */
-		if(indexOwnerEmail != null) {
-			subHeaderElement = xmlDoc.createElement("email");
-			subHeaderText = xmlDoc.createTextNode(indexOwnerEmail);
-			
-			subHeaderElement.appendChild(subHeaderText);
-			headerElement.appendChild(subHeaderElement);
-		}
-
-		
-		//String[] words = (String[]) urisByWord.keySet().toArray(new String[urisByWord.size()]);
-		//Arrays.sort(words);
-		
-		Element prefixElement = xmlDoc.createElement("prefix");
-		//prefixElement.setAttribute("value",match+"");
-		//this match will be set after processing the TreeMap
-	
-
-		
-		//all index files are ready
-		/* Adding word index */
-		Element keywordsElement = xmlDoc.createElement("keywords");
-		for(int i = 0;i<indices.size();i++){
-			//generateSubIndex(DEFAULT_INDEX_DIR+"index_"+Integer.toHexString(i)+".xml");
-			Element subIndexElement = xmlDoc.createElement("subIndex");
-//			if(i<=9)
-//			subIndexElement.setAttribute("key",i+"");
-//			else
-//				subIndexElement.setAttribute("key",Integer.toHexString(i));
-			subIndexElement.setAttribute("key", (String) indices.elementAt(i));
-			//the subindex element key will contain the bits used for matching in that subindex
-			keywordsElement.appendChild(subIndexElement);
-		}
-		
-		prefixElement.setAttribute("value",match+"");
-		// make sure that prefix is the first child of root Element
-		rootElement.appendChild(prefixElement);
-		rootElement.appendChild(headerElement);
-		
-		//rootElement.appendChild(filesElement);
-		rootElement.appendChild(keywordsElement);
-
-		/* Serialization */
-		DOMSource domSource = new DOMSource(xmlDoc);
-		TransformerFactory transformFactory = TransformerFactory.newInstance();
-		Transformer serializer;
-
-		try {
-			serializer = transformFactory.newTransformer();
-		} catch(javax.xml.transform.TransformerConfigurationException e) {
-			Logger.error(this, "Spider: Error while serializing XML (transformFactory.newTransformer()): "+e.toString());
-			return;
-		}
-
-		serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
-		serializer.setOutputProperty(OutputKeys.INDENT,"yes");
-		
-		/* final step */
-		try {
-			serializer.transform(domSource, resultStream);
-		} catch(javax.xml.transform.TransformerException e) {
-			Logger.error(this, "Spider: Error while serializing XML (transform()): "+e.toString());
-			return;
-		}
-
-		if(Logger.shouldLog(Logger.MINOR, this))
-			Logger.minor(this, "Spider: indexes regenerated.");
-	
-	//the main xml file is generated 
-	//now as each word is generated enter it into the respective subindex
-	//now the parsing will start and nodes will be added as needed 
-		
-
-	}
-	private synchronized void generateIndex2() throws Exception{
-		// now we the tree map and we need to use the sorted (md5s) to generate the xml indices
-		if (urisByWord.isEmpty() || urisWithWords.isEmpty()) {
-			System.out.println("No URIs with words");
-			return;
-		}
-		FreenetURI[] uris = (FreenetURI[]) urisWithWords.toArray(new FreenetURI[urisWithWords.size()]);
-		urisToNumbers = new HashMap();
-		for (int i = 0; i < uris.length; i++) {
-			urisToNumbers.put(uris[i], new Integer(i));
-			}
-		indices = new Vector();
-		int prefix = 1;
-		match = 1;
-		Vector list = new Vector();
-		//String str = tMap.firstKey();
-		Iterator it = tMap.keySet().iterator();
-		FileWriter outp = new FileWriter("indexing");
-		outp.write("size = "+tMap.size()+"\n");
-		outp.close();
-		String str = (String) it.next();
-		int i = 0;
-		while(it.hasNext())
-		{
-		 outp = new FileWriter("indexing",true);
-			String key =(String) it.next();
-			outp.write(key + "\n");
-			outp.close();
-			if(key.substring(0, prefix).equals(str.substring(0, prefix))) 
-				{i++;
-				list.add(key);
-				}
-			else {
-		generateSubIndex(prefix,list);
-		str = key;
-		list = new Vector();
-//		int count = list.size();
-//		if(count > MAX_ENTRIES){
-//			//the index has to be split up
-//			generateSubIndex(prefix,list);			
-//		}
-//		else generateXML(list,prefix);
-//		str = key;
-//		list = new Vector();
-		}
-			//
-		// this variable will keep the number of digits to be used 
-		}
-		
-		generateSubIndex(prefix,list);
-	}
-	private synchronized Vector subVector(Vector list, int begin, int end){
-		Vector tmp = new Vector();
-		for(int i = begin;i<end+1;i++) tmp.add(list.elementAt(i));
-		return tmp;
-	}
-	
-	private synchronized void generateSubIndex(int p,Vector list) throws Exception{
-		
-		if(list.size() < MAX_ENTRIES)
-		{
-			//the index can be generated from this list
-			generateXML(list,p);
-		}
-		else
-		{
-			//this means that prefix needs to be incremented
-			if(match <= p) match = p+1; 
-			int prefix = p+1;
-			int i =0;
-			String str = (String) list.elementAt(i);
-			int index=0;
-			while(i<list.size())
-			{
-				String key = (String) list.elementAt(i);
-				if((key.substring(0, prefix)).equals(str.substring(0, prefix))) 
-					{
-					//index = i;
-					i++;
-					}
-				else {
-					//generateXML(subVector(list,index,i-1),prefix);
-					generateSubIndex(prefix,subVector(list,index,i-1));
-					index = i;
-					str = key;
-				}
-				
-
-			}
-			generateSubIndex(prefix,subVector(list,index,i-1));
-		}
-	}	
-		
-
-	private synchronized void generateXML(Vector list, int prefix)
-	{
-		String p = ((String) list.elementAt(0)).substring(0, prefix);
-		indices.add(p);
-		File outputFile = new File(DEFAULT_INDEX_DIR+"index_"+p+".xml");
-		//indices.add(p);
-		StreamResult resultStream;
-		resultStream = new StreamResult(outputFile);
-
-		/* Initialize xml builder */
-		Document xmlDoc = null;
-		DocumentBuilderFactory xmlFactory = null;
-		DocumentBuilder xmlBuilder = null;
-		DOMImplementation impl = null;
-		Element rootElement = null;
-
-		xmlFactory = DocumentBuilderFactory.newInstance();
-
-
-		try {
-			xmlBuilder = xmlFactory.newDocumentBuilder();
-		} catch(javax.xml.parsers.ParserConfigurationException e) {
-			/* Will (should ?) never happen */
-			Logger.error(this, "Spider: Error while initializing XML generator: "+e.toString());
-			return;
-		}
-
-
-		impl = xmlBuilder.getDOMImplementation();
-
-		/* Starting to generate index */
-
-		xmlDoc = impl.createDocument(null, "sub_index", null);
-		rootElement = xmlDoc.getDocumentElement();
-
-		/* Adding header to the index */
-		Element headerElement = xmlDoc.createElement("header");
-
-		/* -> title */
-		Element subHeaderElement = xmlDoc.createElement("title");
-		Text subHeaderText = xmlDoc.createTextNode(indexTitle);
-		
-		subHeaderElement.appendChild(subHeaderText);
-		headerElement.appendChild(subHeaderElement);
-
-		/* -> owner */
-		subHeaderElement = xmlDoc.createElement("owner");
-		subHeaderText = xmlDoc.createTextNode(indexOwner);
-		
-		subHeaderElement.appendChild(subHeaderText);
-		headerElement.appendChild(subHeaderElement);
-		
-	
-		/* -> owner email */
-		if(indexOwnerEmail != null) {
-			subHeaderElement = xmlDoc.createElement("email");
-			subHeaderText = xmlDoc.createTextNode(indexOwnerEmail);
-			
-			subHeaderElement.appendChild(subHeaderText);
-			headerElement.appendChild(subHeaderElement);
-		}
-
-		
-		Element filesElement = xmlDoc.createElement("files"); /* filesElement != fileElement */
-
-		Element EntriesElement = xmlDoc.createElement("entries");
-		EntriesElement.setNodeValue(list.size()+"");
-		EntriesElement.setAttribute("value", list.size()+"");
-		//all index files are ready
-		/* Adding word index */
-		Element keywordsElement = xmlDoc.createElement("keywords");
-		//words to be added 
-		Vector fileid = new Vector();
-		for(int i =0;i<list.size();i++)
-		{
-			Element wordElement = xmlDoc.createElement("word");
-			String str = (String) tMap.get(list.elementAt(i));
-			wordElement.setAttribute("v",str );
-			FreenetURI[] urisForWord = (FreenetURI[]) urisByWord.get(str);
-//			
-			for (int j = 0; j < urisForWord.length; j++) {
-				FreenetURI uri = urisForWord[j];
-				Integer x = (Integer) urisToNumbers.get(uri);
-				
-				if (x == null) {
-					Logger.error(this, "Eh?");
-					continue;
-				}
-//
-				Element uriElement = xmlDoc.createElement("file");
-				Element fileElement = xmlDoc.createElement("file");
-				uriElement.setAttribute("id", x.toString());
-				fileElement.setAttribute("id", x.toString());
-				fileElement.setAttribute("key", uri.toString());
-////				/* Position by position */
-				HashMap positionsForGivenWord = (HashMap)positionsByWordByURI.get(uri.toString());
-				Integer[] positions = (Integer[])positionsForGivenWord.get(str);
-
-				StringBuffer positionList = new StringBuffer();
-
-				for(int k=0; k < positions.length ; k++) {
-					if(k!=0)
-						positionList.append(',');
-
-					positionList.append(positions[k].toString());
-				}
-				
-				uriElement.appendChild(xmlDoc.createTextNode(positionList.toString()));
-				wordElement.appendChild(uriElement);
-//			for(l = 0;l<filesElement.getChildNodes().getLength();l++)
-//				{ Element file = (Element) filesElement.getChildNodes().item(l);
-//				if(file.getAttribute("id").equals(x.toString()))
-//				
-//				break;
-//				}
-				
-//				if(l>=filesElement.getChildNodes().getLength())
-//				filesElement.appendChild(fileElement);
-				if(!fileid.contains(x.toString()))
-				{
-					fileid.add(x.toString());
-					filesElement.appendChild(fileElement);
-				}
-			}
-			
-			//Element keywordsElement = (Element) root.getElementsByTagName("keywords").item(0);
-			keywordsElement.appendChild(wordElement);
-//				
-		}
-//	
-		
-		rootElement.appendChild(EntriesElement);
-		rootElement.appendChild(headerElement);
-		rootElement.appendChild(filesElement);
-		rootElement.appendChild(keywordsElement);
-
-		/* Serialization */
-		DOMSource domSource = new DOMSource(xmlDoc);
-		TransformerFactory transformFactory = TransformerFactory.newInstance();
-		Transformer serializer;
-
-		try {
-			serializer = transformFactory.newTransformer();
-		} catch(javax.xml.transform.TransformerConfigurationException e) {
-			Logger.error(this, "Spider: Error while serializing XML (transformFactory.newTransformer()): "+e.toString());
-			return;
-		}
-
-
-		serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
-		serializer.setOutputProperty(OutputKeys.INDENT,"yes");
-		
-		/* final step */
-		try {
-			serializer.transform(domSource, resultStream);
-		} catch(javax.xml.transform.TransformerException e) {
-			Logger.error(this, "Spider: Error while serializing XML (transform()): "+e.toString());
-			return;
-		}
-
-		if(Logger.shouldLog(Logger.MINOR, this))
-			Logger.minor(this, "Spider: indexes regenerated.");
-	
-	}
-	private synchronized void generateIndex() throws Exception{
-		String[] words = (String[]) urisByWord.keySet().toArray(new String[urisByWord.size()]);
-		Arrays.sort(words);
-		for (int i = 0; i < words.length; i++) {
-		try{
-		
-		String prefix_match = getIndex(words[i]);
-
-		boolean addedWord = addWord(prefix_match,words[i]);
-
-		if(addedWord == false)
-			{
-			split(prefix_match);
-			regenerateIndex(prefix_match);
-			prefix_match = getIndex(words[i]);
-			addWord(prefix_match,words[i]);
-			}
-		}
-		catch(Exception e2){Logger.error(this,"The Word could not be added"+ e2.toString(), e2); }
-		}	
-
-	
-	}
-	private void regenerateIndex(String prefix) throws Exception{
-		//redistribute the entries in prefix.xml to prefix(0-f).xml
-		DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
-		DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
-		Document doc = docBuilder.parse(DEFAULT_INDEX_DIR+"index_"+prefix+".xml");
-		Element root = doc.getDocumentElement();
-		NodeList wordList = root.getElementsByTagName("word");
-		for(int i = 0;i<wordList.getLength();i++){
-			Element word = (Element)wordList.item(i);
-			String value = word.getAttribute("v");
-			String prefix_match = getIndex(value);
-			addWord(prefix_match,value);
-		}
-	}
-	
-	private String getIndex(String word) throws Exception {
-		DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
-		DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
-		Document doc = docBuilder.parse(DEFAULT_INDEX_DIR+"index.xml");
-		Element root = doc.getDocumentElement();
-		Attr prefix_value = (Attr) (root.getElementsByTagName("prefix").item(0)).getAttributes().getNamedItem("value");
-		int prefix = Integer.parseInt(prefix_value.getValue()); 
-		String md5 = MD5(word);
-		NodeList subindexList = root.getElementsByTagName("subIndex");
-		String str = md5.substring(0,prefix);		
-		String prefix_match = search(str,subindexList);
-		
-		return prefix_match;
-	}
-	
-	private boolean addWord(String prefix, String str) throws Exception
-	{
-		//this word has to be added to the particular subindex
-		// modify the corresponding index
-		try{
-			DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
-			DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
-			Document doc = docBuilder.parse(DEFAULT_INDEX_DIR+"index_"+prefix+".xml");
-			Element root = doc.getDocumentElement();
-			
-			Element entry = (Element) root.getElementsByTagName("entries").item(0);
-			
-			Attr no_entries = (Attr) entry.getAttributes().getNamedItem("value");
-			
-			Element filesElement = (Element) root.getElementsByTagName("files").item(0);
-			NodeList filesList = filesElement.getElementsByTagName("file");
-			if(Integer.parseInt(no_entries.getValue()) >= MAX_ENTRIES) return false;
-			else
-			{
-			//increment the number of entries
-			entry.setAttribute("value",(Integer.parseInt(no_entries.getValue())+1)+"");
-			//add the entry
-			
-			Element wordElement = doc.createElement("word");
-			wordElement.setAttribute("v", str);
-
-			FreenetURI[] urisForWord = (FreenetURI[]) urisByWord.get(str);
-
-			/* URI by URI */
-			for (int j = 0; j < urisForWord.length; j++) {
-				FreenetURI uri = urisForWord[j];
-				Integer x = (Integer) urisToNumbers.get(uri);
-				
-				if (x == null) {
-					Logger.error(this, "Eh?");
-					continue;
-				}
-
-				Element uriElement = doc.createElement("file");
-				Element fileElement = doc.createElement("file");
-				uriElement.setAttribute("id", x.toString());
-				fileElement.setAttribute("id", x.toString());
-				fileElement.setAttribute("key", uri.toString());
-//				/* Position by position */
-				HashMap positionsForGivenWord = (HashMap)positionsByWordByURI.get(uri.toString());
-				Integer[] positions = (Integer[])positionsForGivenWord.get(str);
-
-				StringBuffer positionList = new StringBuffer();
-
-				for(int k=0; k < positions.length ; k++) {
-					if(k!=0)
-						positionList.append(',');
-
-					positionList.append(positions[k].toString());
-				}
-				
-				uriElement.appendChild(doc.createTextNode(positionList.toString()));
-				int l;
-			for(l = 0;l<filesList.getLength();l++)
-				{ Element file = (Element) filesList.item(l);
-				if(file.getAttribute("id").equals(x.toString()))
-				
-				break;
-				}
-				wordElement.appendChild(uriElement);
-				if(l>=filesList.getLength())
-				filesElement.appendChild(fileElement);
-			}
-			Element keywordsElement = (Element) root.getElementsByTagName("keywords").item(0);
-			keywordsElement.appendChild(wordElement);
-		
-			
-			
-			DOMSource domSource = new DOMSource(doc);
-			TransformerFactory transformFactory = TransformerFactory.newInstance();
-			Transformer serializer;
-
-			
-				serializer = transformFactory.newTransformer();
-			
-				
-						
-			File outputFile = new File(DEFAULT_INDEX_DIR+"index_"+prefix+".xml");
-			StreamResult resultStream;
-			resultStream = new StreamResult(outputFile);
-
-			serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
-			serializer.setOutputProperty(OutputKeys.INDENT,"yes");
-			
-			/* final step */
-			try {
-				serializer.transform(domSource, resultStream);
-			} catch(javax.xml.transform.TransformerException e) {}
-			}
-			
-			return true;	
-		}
-		
-		catch(Exception e){Logger.error(this,"Word could not be added to the subindex"+ e.toString(), e);}
-		return false;
-	}
-	private void split(String prefix) throws Exception
-	{
-		//first we need to split the current subindex into 16 newones
-		//then read from the original one and append to the new ones
-		// make the entry in the main index..
-		DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
-		DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
-		Document doc = docBuilder.parse(DEFAULT_INDEX_DIR+"index.xml");
-		Element root = doc.getDocumentElement();
-		Element prefixElt =(Element) root.getElementsByTagName("prefix").item(0);
-		int prefix_current = Integer.parseInt(prefixElt.getAttribute("value"));
-		if (prefix_current <= prefix.length())
-		prefixElt.setAttribute("value", (prefix_current+1)+"");
-		
-		Element keywordElement = (Element) root.getElementsByTagName("keywords").item(0);
-		
-		NodeList subIndexElt = root.getElementsByTagName("subIndex");
-		for(int i =0;i<subIndexElt.getLength();i++)
-		{
-			Element subIndex = (Element) subIndexElt.item(i);
-			if((subIndex.getAttribute("key")).equals(prefix)) {
-				keywordElement.removeChild(subIndex);
-				break;
-			}
-		}
-		
-		for(int i = 0;i<16;i++)
-			{
-			Element subIndex = doc.createElement("subIndex");
-			generateSubIndex(DEFAULT_INDEX_DIR+"index_"+prefix+Integer.toHexString(i)+".xml");
-			subIndex.setAttribute("key",prefix.concat(Integer.toHexString(i)));
-			keywordElement.appendChild(subIndex);
-			}
-		
-		
-		DOMSource domSource = new DOMSource(doc);
-		TransformerFactory transformFactory = TransformerFactory.newInstance();
-		Transformer serializer;
-		serializer = transformFactory.newTransformer();
-		File outputFile = new File(DEFAULT_INDEX_DIR+"index.xml");
-		StreamResult resultStream;
-		resultStream = new StreamResult(outputFile);
-
-		serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
-		serializer.setOutputProperty(OutputKeys.INDENT,"yes");
-		
-		/* final step */
-		try {
-			serializer.transform(domSource, resultStream);
-		} catch(javax.xml.transform.TransformerException e) {}
-	}
-	
-	public String search(String str,NodeList list) throws Exception
-	{
-		int prefix = str.length();
-		for(int i = 0;i<list.getLength();i++){
-			Element subIndex = (Element) list.item(i);
-			String key = subIndex.getAttribute("key");
-			if(key.equals(str)) return key;
-		}
-		return search(str.substring(0, prefix-1),list);
-	}
-
-	
-	public void handleGet(HTTPRequest request, ToadletContext context) throws IOException, ToadletContextClosedException {
-		String action = request.getParam("action");
-		PageMaker pageMaker = context.getPageMaker();
-		if ((action == null) || (action.length() == 0)) {
-			MultiValueTable responseHeaders = new MultiValueTable();
-			responseHeaders.put("Location", "?action=list");
-			context.sendReplyHeaders(301, "Redirect", responseHeaders, "text/html; charset=utf-8", 0);
-			return;
-		} else if ("list".equals(action)) {
-			
-			String listName = request.getParam("listName", null);
-			HTMLNode pageNode = pageMaker.getPageNode("The XML Spider", context);
-			HTMLNode contentNode = pageMaker.getContentNode(pageNode);
-			/* create copies for multi-threaded use */
-			if (listName == null) {
-				Map runningFetches = new HashMap(runningFetchesByURI);
-				List queued = new ArrayList(queuedURIList);
-				Set visited = new HashSet(visitedURIs);
-				Set failed = new HashSet(failedURIs);
-				contentNode.addChild(createNavbar(pageMaker, runningFetches.size(), queued.size(), visited.size(), failed.size()));
-				contentNode.addChild(createAddBox(pageMaker, context));
-				contentNode.addChild(createList(pageMaker, "Running FetcheIIIs", "running", runningFetches.keySet(), maxShownURIs));
-				contentNode.addChild(createList(pageMaker, "Queued URIs", "queued", queued, maxShownURIs));
-				contentNode.addChild(createList(pageMaker, "Visited URIs", "visited", visited, maxShownURIs));
-				contentNode.addChild(createList(pageMaker, "Failed URIs", "failed", failed, maxShownURIs));
-			} else {
-				contentNode.addChild(createBackBox(pageMaker));
-				if ("failed".equals(listName)) {
-					Set failed = new HashSet(failedURIs);
-					contentNode.addChild(createList(pageMaker, "Failed URIs", "failed", failed, -1));	
-				} else if ("visited".equals(listName)) {
-					Set visited = new HashSet(visitedURIs);
-					contentNode.addChild(createList(pageMaker, "Visited URIs", "visited", visited, -1));
-				} else if ("queued".equals(listName)) {
-					List queued = new ArrayList(queuedURIList);
-					contentNode.addChild(createList(pageMaker, "Queued URIs", "queued", queued, -1));
-				} else if ("running".equals(listName)) {
-					Map runningFetches = new HashMap(runningFetchesByURI);
-					contentNode.addChild(createList(pageMaker, "Running Fetches", "running", runningFetches.keySet(), -1));
-				}
-			}
-			MultiValueTable responseHeaders = new MultiValueTable();
-			byte[] responseBytes = pageNode.generate().getBytes("utf-8");
-			context.sendReplyHeaders(200, "OK", responseHeaders, "text/html; charset=utf-8", responseBytes.length);
-			context.writeData(responseBytes);
-		} else if ("add".equals(action)) {
-			String uriParam = request.getParam("key");
-			try {
-				FreenetURI uri = new FreenetURI(uriParam);
-				synchronized (this) {
-					failedURIs.remove(uri);
-					visitedURIs.remove(uri);
-				}
-				queueURI(uri);
-				startSomeRequests();
-			} catch (MalformedURLException mue1) {
-				sendSimpleResponse(context, "URL invalid", "The given URI is not valid.");
-				return;
-			}
-			MultiValueTable responseHeaders = new MultiValueTable();
-			responseHeaders.put("Location", "?action=list");
-			context.sendReplyHeaders(301, "Redirect", responseHeaders, "text/html; charset=utf-8", 0);
-			return;
-		}
-	}
-
-	/**
-	 * @see freenet.oldplugins.plugin.HttpPlugin#handlePost(freenet.clients.http.HTTPRequestImpl, freenet.clients.http.ToadletContext)
-	 */
-	public void handlePost(HTTPRequest request, ToadletContext context) throws IOException {
-	}
-	
-	private void sendSimpleResponse(ToadletContext context, String title, String message) throws ToadletContextClosedException, IOException {
-		PageMaker pageMaker = context.getPageMaker();
-		HTMLNode pageNode = pageMaker.getPageNode(title, context);
-		HTMLNode contentNode = pageMaker.getContentNode(pageNode);
-		HTMLNode infobox = contentNode.addChild(pageMaker.getInfobox("infobox-alter", title));
-		HTMLNode infoboxContent = pageMaker.getContentNode(infobox);
-		infoboxContent.addChild("#", message);
-		byte[] responseBytes = pageNode.generate().getBytes("utf-8");
-		context.sendReplyHeaders(200, "OK", new MultiValueTable(), "text/html; charset=utf-8", responseBytes.length);
-		context.writeData(responseBytes);
-	}
-	
-	private HTMLNode createBackBox(PageMaker pageMaker) {
-		HTMLNode backbox = pageMaker.getInfobox((String) null);
-		HTMLNode backContent = pageMaker.getContentNode(backbox);
-		backContent.addChild("#", "Return to the ");
-		backContent.addChild("a", "href", "?action=list", "list of all URIs");
-		backContent.addChild("#", ".");
-		return backbox;
-	}
-	
-	private HTMLNode createAddBox(PageMaker pageMaker, ToadletContext ctx) {
-		HTMLNode addBox = pageMaker.getInfobox("Add a URI");
-		HTMLNode formNode = pageMaker.getContentNode(addBox).addChild("form", new String[] { "action", "method" }, new String[] { "", "get" });
-		formNode.addChild("input", new String[] { "type", "name", "value" }, new String[] { "hidden", "action", "add" });
-		formNode.addChild("input", new String[] { "type", "size", "name", "value" }, new String[] { "text", "40", "key", "" });
-		formNode.addChild("input", new String[] { "type", "value" }, new String[] { "submit", "Add URI" });
-		return addBox;
-	}
-
-	private HTMLNode createNavbar(PageMaker pageMaker, int running, int queued, int visited, int failed) {
-		HTMLNode navbar = pageMaker.getInfobox("navbar", "Page Navigation");
-		HTMLNode list = pageMaker.getContentNode(navbar).addChild("ul");
-		list.addChild("li").addChild("a", "href", "#running", "Running (" + running + ')');
-		list.addChild("li").addChild("a", "href", "#queued", "Queued (" + queued + ')');
-		list.addChild("li").addChild("a", "href", "#visited", "Visited (" + visited + ')');
-		list.addChild("li").addChild("a", "href", "#failed", "Failed (" + failed + ')');
-		return navbar;
-	}
-
-	private HTMLNode createList(PageMaker pageMaker, String listName, String anchorName, Collection collection, int maxCount) {
-		HTMLNode listNode = new HTMLNode("div");
-		listNode.addChild("a", "name", anchorName);
-		HTMLNode listBox = pageMaker.getInfobox(listName);
-		HTMLNode listContent = pageMaker.getContentNode(listBox);
-		listNode.addChild(listBox);
-		Iterator collectionItems = collection.iterator();
-		int itemCount = 0;
-		while (collectionItems.hasNext()) {
-			FreenetURI uri = (FreenetURI) collectionItems.next();
-			listContent.addChild("#", uri.toString());
-			listContent.addChild("br");
-			if (itemCount++ == maxCount) {
-				listContent.addChild("br");
-				listContent.addChild("a", "href", "?action=list&listName=" + anchorName, "Show all\u2026");
-				break;
-			}
-		}
-		return listNode;
-	}
-
-	/**
-	 * @see freenet.oldplugins.plugin.Plugin#getPluginName()
-	 */
-	public String getPluginName() {
-		return pluginName;
-	}
-
-	/**
-	 * @see freenet.oldplugins.plugin.Plugin#setPluginManager(freenet.oldplugins.plugin.PluginManager)
-	 */
-	public void setPluginManager(PluginManager pluginManager) {
-		
-		this.core = pluginManager.getClientCore();
-		this.ctx = core.makeClient((short) 0).getFetchContext();
-		ctx.maxSplitfileBlockRetries = 10;
-		ctx.maxNonSplitfileRetries = 10;
-		ctx.maxTempLength = 2 * 1024 * 1024;
-		ctx.maxOutputLength = 2 * 1024 * 1024;
-		allowedMIMETypes = new HashSet();
-		allowedMIMETypes.add(new String("text/html"));
-		allowedMIMETypes.add(new String("text/plain"));
-		allowedMIMETypes.add(new String("application/xhtml+xml"));
-	//	allowedMIMETypes.add(new String("application/zip"));
-		ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
-	//	ctx.allowedMIMETypes.add("text/html"); 
-		tProducedIndex = System.currentTimeMillis();
-		indexing = true;
-	}
-
-
-	/**
-	 * @see freenet.oldplugins.plugin.Plugin#startPlugin()
-	 */
-	public void startPlugin() {
-		stopped = false;
-		
-		Thread starterThread = new Thread("Spider Plugin Starter") {
-			public void run() {
-				try{
-					Thread.sleep(30 * 1000); // Let the node start up
-				} catch (InterruptedException e){}
-				startSomeRequests();
-			}
-		};
-		starterThread.setDaemon(true);
-		starterThread.start();
-	}
-
-	/**
-	 * @see freenet.oldplugins.plugin.Plugin#stopPlugin()
-	 */
-	public void stopPlugin() {
-		synchronized (this) {
-			stopped = true;
-			queuedURIList.clear();
-		}
-	}
-
-	public void onMajorProgress() {
-		// Ignore
-	}
-
-	public void onFetchable(BaseClientPutter state) {
-		// Ignore
-	}
-	private static String convertToHex(byte[] data) {
-        StringBuffer buf = new StringBuffer();
-        for (int i = 0; i < data.length; i++) {
-        	int halfbyte = (data[i] >>> 4) & 0x0F;
-        	int two_halfs = 0;
-        	do {
-	        	if ((0 <= halfbyte) && (halfbyte <= 9))
-	                buf.append((char) ('0' + halfbyte));
-	            else
-	            	buf.append((char) ('a' + (halfbyte - 10)));
-	        	halfbyte = data[i] & 0x0F;
-        	} while(two_halfs++ < 1);
-        }
-        return buf.toString();
-    }
-	//this function will return the String representation of the MD5 hash for the input string 
-	public static String MD5(String text) throws NoSuchAlgorithmException, UnsupportedEncodingException  {
-		MessageDigest md;
-		md = MessageDigest.getInstance("MD5");
-		byte[] md5hash = new byte[32];
-		md.update(text.getBytes("iso-8859-1"), 0, text.length());
-		md5hash = md.digest();
-		return convertToHex(md5hash);
-	}
-	
-	public void generateSubIndex(String filename){
-//generates the new subIndex
-		File outputFile = new File(filename);
-		StreamResult resultStream;
-		resultStream = new StreamResult(outputFile);
-
-		/* Initialize xml builder */
-		Document xmlDoc = null;
-		DocumentBuilderFactory xmlFactory = null;
-		DocumentBuilder xmlBuilder = null;
-		DOMImplementation impl = null;
-		Element rootElement = null;
-
-		xmlFactory = DocumentBuilderFactory.newInstance();
-
-
-		try {
-			xmlBuilder = xmlFactory.newDocumentBuilder();
-		} catch(javax.xml.parsers.ParserConfigurationException e) {
-			/* Will (should ?) never happen */
-			Logger.error(this, "Spider: Error while initializing XML generator: "+e.toString());
-			return;
-		}
-
-
-		impl = xmlBuilder.getDOMImplementation();
-
-		/* Starting to generate index */
-
-		xmlDoc = impl.createDocument(null, "sub_index", null);
-		rootElement = xmlDoc.getDocumentElement();
-
-		/* Adding header to the index */
-		Element headerElement = xmlDoc.createElement("header");
-
-		/* -> title */
-		Element subHeaderElement = xmlDoc.createElement("title");
-		Text subHeaderText = xmlDoc.createTextNode(indexTitle);
-		
-		subHeaderElement.appendChild(subHeaderText);
-		headerElement.appendChild(subHeaderElement);
-
-		/* -> owner */
-		subHeaderElement = xmlDoc.createElement("owner");
-		subHeaderText = xmlDoc.createTextNode(indexOwner);
-		
-		subHeaderElement.appendChild(subHeaderText);
-		headerElement.appendChild(subHeaderElement);
-		
-	
-		/* -> owner email */
-		if(indexOwnerEmail != null) {
-			subHeaderElement = xmlDoc.createElement("email");
-			subHeaderText = xmlDoc.createTextNode(indexOwnerEmail);
-			
-			subHeaderElement.appendChild(subHeaderText);
-			headerElement.appendChild(subHeaderElement);
-		}
-
-		
-		Element filesElement = xmlDoc.createElement("files"); /* filesElement != fileElement */
-
-		Element EntriesElement = xmlDoc.createElement("entries");
-		EntriesElement.setNodeValue("0");
-		EntriesElement.setAttribute("value", "0");
-		//all index files are ready
-		/* Adding word index */
-		Element keywordsElement = xmlDoc.createElement("keywords");
-		
-		rootElement.appendChild(EntriesElement);
-		rootElement.appendChild(headerElement);
-		rootElement.appendChild(filesElement);
-		rootElement.appendChild(keywordsElement);
-
-		/* Serialization */
-		DOMSource domSource = new DOMSource(xmlDoc);
-		TransformerFactory transformFactory = TransformerFactory.newInstance();
-		Transformer serializer;
-
-		try {
-			serializer = transformFactory.newTransformer();
-		} catch(javax.xml.transform.TransformerConfigurationException e) {
-			Logger.error(this, "Spider: Error while serializing XML (transformFactory.newTransformer()): "+e.toString());
-			return;
-		}
-
-
-		serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
-		serializer.setOutputProperty(OutputKeys.INDENT,"yes");
-		
-		/* final step */
-		try {
-			serializer.transform(domSource, resultStream);
-		} catch(javax.xml.transform.TransformerException e) {
-			Logger.error(this, "Spider: Error while serializing XML (transform()): "+e.toString());
-			return;
-		}
-
-		if(Logger.shouldLog(Logger.MINOR, this))
-			Logger.minor(this, "Spider: indexes regenerated.");
-	}
-	
-public void terminate(){
-	synchronized (this) {
-		stopped = true;
-		queuedURIList.clear();
-	}
-}
-	
-public void runPlugin(PluginRespirator pr){
-	this.pr = pr;
-	this.core = pr.getNode().clientCore;
-	this.ctx = core.makeClient((short) 0).getFetchContext();
-	ctx.maxSplitfileBlockRetries = 10;
-	ctx.maxNonSplitfileRetries = 10;
-	ctx.maxTempLength = 2 * 1024 * 1024;
-	ctx.maxOutputLength = 2 * 1024 * 1024;
-	allowedMIMETypes = new HashSet();
-	allowedMIMETypes.add(new String("text/html"));
-	allowedMIMETypes.add(new String("text/plain"));
-	allowedMIMETypes.add(new String("application/xhtml+xml"));
-//	allowedMIMETypes.add(new String("application/zip"));
-	ctx.allowedMIMETypes = new HashSet(allowedMIMETypes);
-//	ctx.allowedMIMETypes.add("text/html"); 
-	tProducedIndex = System.currentTimeMillis();
-	indexing = true;
-	stopped = false;
-	
-	Thread starterThread = new Thread("Spider Plugin Starter") {
-		public void run() {
-			try{
-				Thread.sleep(30 * 1000); // Let the node start up
-			} catch (InterruptedException e){}
-			startSomeRequests();
-		}
-	};
-	starterThread.setDaemon(true);
-	starterThread.start();
-}
-
-
-public void onFoundEdition(long l, USK key){
-	FreenetURI uri = key.getURI();
-	if(runningFetchesByURI.containsKey(uri)) runningFetchesByURI.remove(uri);
-	uri = key.getURI().setSuggestedEdition(l);
-	queueURI(uri);
-}
-	
-	
-}




More information about the cvs mailing list