[freenet-cvs] r8275 - in trunk/freenet/src/freenet: clients/http clients/http/filter node support/io

toad at freenetproject.org toad at freenetproject.org
Sat Mar 18 15:19:05 UTC 2006


Author: toad
Date: 2006-03-18 15:18:54 +0000 (Sat, 18 Mar 2006)
New Revision: 8275

Added:
   trunk/freenet/src/freenet/clients/http/filter/CSSParser.java
   trunk/freenet/src/freenet/clients/http/filter/CSSReadFilter.java
   trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.java
   trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex
   trunk/freenet/src/freenet/clients/http/filter/DataFilterException.java
   trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
   trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
   trunk/freenet/src/freenet/clients/http/filter/KnownUnsafeContentTypeException.java
   trunk/freenet/src/freenet/clients/http/filter/NullFilterCallback.java
   trunk/freenet/src/freenet/clients/http/filter/UnknownContentTypeException.java
   trunk/freenet/src/freenet/clients/http/filter/UnsafeContentTypeException.java
   trunk/freenet/src/freenet/clients/http/filter/Yytoken.java
   trunk/freenet/src/freenet/support/io/NullWriter.java
Modified:
   trunk/freenet/src/freenet/clients/http/FproxyToadlet.java
   trunk/freenet/src/freenet/clients/http/ToadletContext.java
   trunk/freenet/src/freenet/clients/http/ToadletContextImpl.java
   trunk/freenet/src/freenet/clients/http/filter/CharsetExtractor.java
   trunk/freenet/src/freenet/clients/http/filter/ContentDataFilter.java
   trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
   trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
   trunk/freenet/src/freenet/clients/http/filter/MIMEType.java
   trunk/freenet/src/freenet/node/TextModeClientInterface.java
   trunk/freenet/src/freenet/node/Version.java
Log:
544:
Anonymity filter.


Modified: trunk/freenet/src/freenet/clients/http/FproxyToadlet.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/FproxyToadlet.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/FproxyToadlet.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -6,10 +6,15 @@
 import java.net.MalformedURLException;
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.security.MessageDigest;
+import java.security.NoSuchAlgorithmException;
 
 import freenet.client.FetchException;
 import freenet.client.FetchResult;
 import freenet.client.HighLevelSimpleClient;
+import freenet.clients.http.filter.ContentFilter;
+import freenet.clients.http.filter.MIMEType;
+import freenet.clients.http.filter.UnsafeContentTypeException;
 import freenet.config.Config;
 import freenet.config.InvalidConfigValueException;
 import freenet.config.SubConfig;
@@ -19,15 +24,21 @@
 import freenet.pluginmanager.HTTPRequest;
 import freenet.pluginmanager.PproxyToadlet;
 import freenet.support.Bucket;
-import freenet.support.BucketTools;
 import freenet.support.HTMLEncoder;
+import freenet.support.HexUtil;
 import freenet.support.Logger;
 import freenet.support.MultiValueTable;
 
 public class FproxyToadlet extends Toadlet {
 	
-	public FproxyToadlet(HighLevelSimpleClient client) {
+	final byte[] random;
+	
+	// ?force= links become invalid after 2 hours.
+	long FORCE_GRAIN_INTERVAL = 60*60*1000;
+	
+	public FproxyToadlet(HighLevelSimpleClient client, byte[] random) {
 		super(client);
+		this.random = random;
 	}
 	
 	public String supportedMethods() {
@@ -54,10 +65,9 @@
 		//String ks = uri.toString();
 		String ks = uri.getPath();
 		
-		HTTPRequest request = new HTTPRequest(uri);
+		HTTPRequest httprequest = new HTTPRequest(uri);
 		
 		if (ks.equals("/")) {
-			HTTPRequest httprequest = new HTTPRequest(uri);
 			if (httprequest.isParameterSet("key")) {
 				MultiValueTable headers = new MultiValueTable();
 				
@@ -88,7 +98,53 @@
 		try {
 			Logger.minor(this, "Fproxy fetching "+key);
 			FetchResult result = fetch(key);
-			writeReply(ctx, 200, result.getMimeType(), "OK", result.asBucket());
+			
+			// Now, is it safe?
+			
+			Bucket data = result.asBucket();
+			
+			String typeName = result.getMimeType();
+			
+			String reqParam = httprequest.getParam("type", null);
+			
+			if(reqParam != null)
+				typeName = reqParam;
+			
+			Logger.minor(this, "Type: "+typeName+" ("+result.getMimeType()+" "+reqParam+")");
+			
+			long now = System.currentTimeMillis();
+			
+			String forceString = httprequest.getParam("force");
+			boolean force = false;
+			if(forceString != null) {
+				if(forceString.equals(getForceValue(key, now)) || 
+						forceString.equals(getForceValue(key, now-FORCE_GRAIN_INTERVAL)))
+					force = true;
+			}
+			
+			try {
+				if(!force)
+					data = ContentFilter.filter(data, ctx.getBucketFactory(), typeName);
+				
+				// Send the data, intact
+				writeReply(ctx, 200, typeName, "OK", data);
+			} catch (UnsafeContentTypeException e) {
+				StringBuffer buf = new StringBuffer();
+				ctx.getPageMaker().makeHead(buf, "Potentially Dangerous Content");
+				buf.append("<h1>");
+				buf.append(e.getHTMLEncodedTitle());
+				buf.append("</h1>\n");
+				buf.append(e.getExplanation());
+				buf.append("<p>Your options are:</p><ul>\n");
+				buf.append("<li><a href=\"/"+key.toString(false)+"?type=text/plain\">Click here</a> to open the file as plain text (this should not be dangerous, but it may be garbled).</li>\n");
+				// FIXME: is this safe? See bug #131
+				buf.append("<li><a href=\"/"+key.toString(false)+"?type=application/x-msdownload\">Click here</a> to force your browser to download the file to disk.</li>\n");
+				buf.append("<li><a href=\"/"+key.toString(false)+"?force="+getForceValue(key, now)+"\">Click here</a> to open the file as "+HTMLEncoder.encode(typeName)+".</li>\n");
+				buf.append("<li><a href=\"/\">Click here</a> to go to the FProxy home page.</li>\n");
+				buf.append("</ul>");
+				ctx.getPageMaker().makeTail(buf);
+				writeReply(ctx, 200, "text/html", "OK", buf.toString());
+			}
 		} catch (FetchException e) {
 			String msg = e.getMessage();
 			String extra = "";
@@ -108,6 +164,18 @@
 		}
 	}
 
+	private String getForceValue(FreenetURI key, long time) {
+		try {
+			MessageDigest md5 = MessageDigest.getInstance("SHA-256");
+			md5.update(random);
+			md5.update(key.toString(false).getBytes());
+			md5.update(Long.toString(time / FORCE_GRAIN_INTERVAL).getBytes());
+			return HexUtil.bytesToHex(md5.digest());
+		} catch (NoSuchAlgorithmException e) {
+			throw new Error(e);
+		}
+	}
+
 	public static void maybeCreateFproxyEtc(Node node, Config config) throws IOException, InvalidConfigValueException {
 		
 		SubConfig fproxyConfig = new SubConfig("fproxy", config);
@@ -118,7 +186,9 @@
 			HighLevelSimpleClient client = node.makeClient(RequestStarter.INTERACTIVE_PRIORITY_CLASS);
 			
 			node.setToadletContainer(server);
-			FproxyToadlet fproxy = new FproxyToadlet(client);
+			byte[] random = new byte[32];
+			node.random.nextBytes(random);
+			FproxyToadlet fproxy = new FproxyToadlet(client, random);
 			node.setFproxy(fproxy);
 			server.register(fproxy, "/", false);
 			
@@ -141,7 +211,7 @@
 			server.register(darknetToadlet, "/darknet/", true);
 
 		} catch (IOException ioe) {
-			Logger.error(node,"Failed to start fproxy on "+fproxyConfig.getString("bindTo")+":"+fproxyConfig.getInt("port"));
+			Logger.error(node,"Failed to start fproxy: "+ioe, ioe);
 		}
 		
 		fproxyConfig.finishedInitialization();

Modified: trunk/freenet/src/freenet/clients/http/ToadletContext.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/ToadletContext.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/ToadletContext.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -3,6 +3,7 @@
 import java.io.IOException;
 
 import freenet.support.Bucket;
+import freenet.support.BucketFactory;
 import freenet.support.MultiValueTable;
 
 /**
@@ -35,5 +36,7 @@
 	 * Get the page maker object.
 	 */
 	PageMaker getPageMaker();
+
+	BucketFactory getBucketFactory();
 }
 

Modified: trunk/freenet/src/freenet/clients/http/ToadletContextImpl.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/ToadletContextImpl.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/ToadletContextImpl.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -32,18 +32,19 @@
 	private final MultiValueTable headers;
 	private final OutputStream sockOutputStream;
 	private final PageMaker pagemaker;
+	private final BucketFactory bf;
 	
 	/** Is the context closed? If so, don't allow any more writes. This is because there
 	 * may be later requests.
 	 */
 	private boolean closed;
 	
-	public ToadletContextImpl(Socket sock, MultiValueTable headers, String CSSName) throws IOException {
+	public ToadletContextImpl(Socket sock, MultiValueTable headers, String CSSName, BucketFactory bf) throws IOException {
 		this.sock = sock;
 		this.headers = headers;
 		this.closed = false;
 		sockOutputStream = sock.getOutputStream();
-		
+		this.bf = bf;
 		pagemaker = new PageMaker(CSSName);
 	}
 
@@ -185,7 +186,7 @@
 				
 				boolean shouldDisconnect = shouldDisconnectAfterHandled(split[2].equals("HTTP/1.0"), headers);
 				
-				ToadletContextImpl ctx = new ToadletContextImpl(sock, headers, container.getCSSName());
+				ToadletContextImpl ctx = new ToadletContextImpl(sock, headers, container.getCSSName(), bf);
 				
 				/*
 				 * if we're handling a POST, copy the data into a bucket now,
@@ -315,4 +316,8 @@
 		if(closed) throw new ToadletContextClosedException();
 		BucketTools.copyTo(data, sockOutputStream, Long.MAX_VALUE);
 	}
+
+	public BucketFactory getBucketFactory() {
+		return bf;
+	}
 }

Added: trunk/freenet/src/freenet/clients/http/filter/CSSParser.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/CSSParser.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/CSSParser.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,59 @@
+package freenet.clients.http.filter;
+
+import java.io.Reader;
+import java.io.Writer;
+
+import freenet.support.Logger;
+
+/**
+ * WARNING: this is not as thorough as the HTML filter - we do not
+ * enumerate all possible attributes etc. New versions of the spec could
+ * conceivably lead to new risks How this would happen: a) Another way to
+ * include URLs, apart from @import and url() (we are safe from new @
+ * directives though) b) A way to specify the MIME type of includes, IF
+ * those includes could be a risky type (HTML, CSS, etc) This is still FAR
+ * more rigorous than the old filter though.
+ * <p>
+ * If you want extra paranoia, turn on paranoidStringCheck, which will
+ * throw an exception when it encounters strings with colons in; then the
+ * only risk is something that includes, and specifies the type of, HTML,
+ * XML or XSL.
+ * </p>
+ */
+class CSSParser extends CSSTokenizerFilter {
+
+	final FilterCallback cb;
+	
+	CSSParser(
+		Reader r,
+		Writer w,
+		boolean paranoidStringCheck,
+		FilterCallback cb) {
+		super(r, w, paranoidStringCheck);
+		this.cb = cb;
+		this.deleteErrors = super.deleteErrors;
+	}
+
+	void throwError(String s) throws DataFilterException {
+		HTMLFilter.throwFilterException(s);
+	}
+
+	String processImportURL(String s) {
+		return "\""
+			+ HTMLFilter.sanitizeURI(HTMLFilter.stripQuotes(s), "text/css", null, cb)
+			+ "\"";
+	}
+
+	String processURL(String s) {
+		return HTMLFilter.sanitizeURI(HTMLFilter.stripQuotes(s), null, null, cb);
+	}
+
+	void log(String s) {
+		if (Logger.shouldLog(Logger.DEBUG, this))
+			Logger.debug(this, s);
+	}
+
+	void logError(String s) {
+		Logger.error(this, s);
+	}
+}
\ No newline at end of file

Added: trunk/freenet/src/freenet/clients/http/filter/CSSReadFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/CSSReadFilter.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/CSSReadFilter.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,83 @@
+package freenet.clients.http.filter;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.util.HashMap;
+
+import freenet.support.Bucket;
+import freenet.support.BucketFactory;
+import freenet.support.HTMLEncoder;
+import freenet.support.Logger;
+import freenet.support.io.NullWriter;
+
+public class CSSReadFilter implements ContentDataFilter, CharsetExtractor {
+
+	public Bucket readFilter(Bucket bucket, BucketFactory bf, String charset,
+			HashMap otherParams, FilterCallback cb) throws DataFilterException,
+			IOException {
+		if (Logger.shouldLog(Logger.DEBUG, this))
+			Logger.debug(
+				this,
+				"running "
+					+ this
+					+ " on "
+					+ bucket
+					+ ","
+					+ charset);
+		InputStream strm = bucket.getInputStream();
+		Bucket temp = bf.makeBucket(bucket.size());
+		OutputStream os = temp.getOutputStream();
+		Reader r;
+		Writer w;
+		try {
+			r = new BufferedReader(new InputStreamReader(strm, charset), 32768);
+			w = new BufferedWriter(new OutputStreamWriter(os, charset), 32768);
+		} catch (UnsupportedEncodingException e) {
+			os.close();
+			strm.close();
+			throw new DataFilterException("Warning: Unknown character set ("+charset+")", "Warning: Unknown character set ("+HTMLEncoder.encode(charset)+")",
+					"<p><b>Unknown character set</b> The page you are about to display has an unknown character set. "+
+					"This means that we are not able to filter the page, and it may compromize your anonymity.");
+		}
+		CSSParser parser = new CSSParser(r, w, false, cb);
+		parser.parse();
+		r.close();
+		w.close();
+		return temp;
+	}
+
+	public Bucket writeFilter(Bucket data, BucketFactory bf, String charset,
+			HashMap otherParams, FilterCallback cb) throws DataFilterException,
+			IOException {
+		throw new UnsupportedOperationException();
+	}
+
+	public String getCharset(Bucket bucket, String parseCharset) throws DataFilterException, IOException {
+		InputStream strm = bucket.getInputStream();
+		Writer w = new NullWriter();
+		Reader r;
+		try {
+			r = new BufferedReader(new InputStreamReader(strm, parseCharset), 32768);
+		} catch (UnsupportedEncodingException e) {
+			throw new Error(e);
+		}
+		CSSParser parser = new CSSParser(r, w, false, new NullFilterCallback());
+		try {
+			parser.parse();
+		} catch (Throwable t) {
+			// Ignore ALL errors!
+			Logger.minor(this, "Caught "+t+" trying to detect MIME type with "+parseCharset);
+		}
+		r.close();
+		return parser.detectedCharset;
+	}
+
+}

Added: trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,1499 @@
+/* The following code was generated by JFlex 1.3.5 on 18/03/06 13:46 */
+
+package freenet.clients.http.filter;
+import java.io.*;
+import java.util.*;
+/* This class tokenizes a CSS2 Reader stream, writes it out to the output Writer, and filters any URLs found */
+// WARNING: this is not as thorough as the HTML parser - new versions of the standard could lead to anonymity risks. See comments in SaferFilter.java
+// Mostly from http://www.w3.org/TR/REC-CSS2/grammar.html
+
+
+/**
+ * This class is a scanner generated by 
+ * <a href="http://www.jflex.de/">JFlex</a> 1.3.5
+ * on 18/03/06 13:46 from the specification file
+ * <tt>file:/usr/src/cvs/eclipse-workspace/Freenet 0.7/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex</tt>
+ */
+class CSSTokenizerFilter {
+
+  /** This character denotes the end of file */
+  final public static int YYEOF = -1;
+
+  /** initial size of the lookahead buffer */
+  final private static int YY_BUFFERSIZE = 16384;
+
+  /** lexical states */
+  final public static int YYINITIAL = 0;
+
+  /** 
+   * Translates characters to character classes
+   */
+  final private static String yycmap_packed = 
+    "\11\0\1\3\1\3\1\0\1\3\1\3\22\0\1\5\1\43\1\11"+
+    "\1\25\1\4\1\63\1\4\1\12\1\23\1\14\1\41\1\56\1\26"+
+    "\1\10\1\13\1\40\12\1\1\64\1\35\1\42\1\46\1\44\1\24"+
+    "\1\31\1\50\1\6\1\54\1\52\1\51\1\53\1\22\1\60\1\20"+
+    "\1\7\1\62\1\30\1\32\1\21\1\34\1\33\1\7\1\17\1\15"+
+    "\1\16\1\27\1\7\1\55\1\57\1\7\1\61\1\4\1\2\4\4"+
+    "\1\50\1\6\1\54\1\52\1\51\1\53\1\22\1\60\1\20\1\7"+
+    "\1\62\1\30\1\32\1\21\1\34\1\33\1\7\1\17\1\15\1\16"+
+    "\1\27\1\7\1\55\1\57\1\7\1\61\1\36\1\47\1\37\1\45"+
+    "\uff81\0";
+
+  /** 
+   * Translates characters to character classes
+   */
+  final private static char [] yycmap = yy_unpack_cmap(yycmap_packed);
+
+  /** 
+   * Translates a state to a row index in the transition table
+   */
+  final private static int yy_rowMap [] = { 
+        0,    53,   106,   159,   212,   265,   318,   371,   424,   477, 
+       53,   530,    53,   583,   636,    53,   689,   742,   795,   848, 
+      901,   954,  1007,   159,  1060,   477,    53,  1113,    53,    53, 
+     1166,  1219,   371,  1272,    53,   424,  1325,  1378,  1431,  1484, 
+     1537,  1590,  1643,  1696,  1749,  1802,  1855,  1908,  1961,  2014, 
+     2067,  2120,  2173,    53,    53,  2226,  2279,  2332,    53,   371, 
+      424,  2385,  2438,  2491,  2544,  2597,  2650,  2703,  2756,  2809, 
+     2862,  2915,  2968,  3021,  3074,  3127,  3180,  3233,    53,  3286, 
+     3339,  3392,  3445,  3498,  3551,  3604,  3657,  3710,  3763,  3816, 
+     3869,  3922,  3975,  4028,  4081,  4134,  4187,  4240,  1696,  4293, 
+     4346,  4399,  4452,  4505,  4558,    53,  4611,    53,  4664,  4717, 
+     4770,  4823,  4876,  4929,  4982,  4081,  5035,  5088,    53,  5141, 
+     5194,  5247,  5300,  5353,  5406,  5459,  1696,  5512,  5565,  2067, 
+     5618,  5671,  5724,  5777,  5830,  4929,  5883,  5936,  5989,  6042, 
+     6095,  6148,    53,  6201,  6254,  6307,  6360,  6413,  1696,  6466, 
+     6519,  6572,  6625,  6678,  1537,  6731,  6784,  5035,  5989,  6837, 
+     6890,  6943,  5088,  6095,  6996,  5141,  7049,  7102,  7155,  7208, 
+     7261,  7314,  7367,  7420,  7473,  7526,  5989,  7579,  6095,  7632, 
+     7685,    53,  7738,  7791,  7844,  7897,  7950,  8003,  8056,  8109, 
+     8162,  8162,  8215,  8268,  8321,  8374,  8427,  8480,  8533,  8586, 
+     7897,  7897,  8639,  8692,  8745,  8798,  8851,  8798,  8162,  8904, 
+     8957,  9010,  9063,  9116,  9169,    53,  9222,  9275,  9328,  9381, 
+     9434,    53,  9487,  9540,  9593,  9646,  9699,  1696,  9752,  9805, 
+     9858,  9911,  9911,  9964, 10017, 10070, 10123, 10176,  8056, 10229, 
+    10282, 10335, 10388, 10441,  8798, 10494, 10547, 10600,  9328, 10653, 
+       53, 10706, 10759, 10812, 10865, 10918, 10971, 11024, 11077, 11130, 
+    11183, 11236, 11289, 11342, 11289, 11395, 11448, 11501,  9063, 11554, 
+    11607, 11660, 11713, 11766, 11819, 11872, 11925, 11978, 12031, 12084, 
+    12137, 12190,  9911,    53, 12243, 12296, 12349, 12402, 12455, 12508, 
+    12561, 12614, 12667, 12720, 12773, 12826, 12879, 12932, 12985, 13038, 
+    13091, 13144, 13197, 13250, 13303
+  };
+
+  /** 
+   * The packed transition table of the DFA (part 0)
+   */
+  final private static String yy_packed0 = 
+    "\1\2\1\3\1\4\1\5\1\2\1\5\2\6\1\7"+
+    "\1\10\1\11\1\12\1\13\6\6\2\2\1\14\1\15"+
+    "\1\16\1\6\1\17\3\6\1\20\1\21\1\22\1\23"+
+    "\1\15\1\24\1\25\1\15\1\26\1\2\1\27\6\6"+
+    "\1\15\4\6\1\2\1\15\66\0\1\3\1\30\1\31"+
+    "\1\0\1\31\3\6\2\0\1\32\1\0\6\6\1\33"+
+    "\2\0\1\34\2\6\1\0\3\6\1\35\12\0\6\6"+
+    "\1\0\4\6\1\36\2\0\1\37\1\6\1\0\2\6"+
+    "\1\37\41\6\5\37\10\6\3\0\1\5\1\0\1\5"+
+    "\30\0\1\21\1\22\26\0\1\6\1\30\1\31\1\0"+
+    "\1\31\3\6\4\0\6\6\1\33\2\0\1\34\2\6"+
+    "\1\0\3\6\1\35\12\0\6\6\1\0\4\6\12\0"+
+    "\1\40\54\0\2\41\1\42\6\41\1\43\53\41\2\44"+
+    "\1\45\7\44\1\43\52\44\1\0\1\46\64\0\1\47"+
+    "\1\50\3\0\1\47\2\51\4\0\6\51\4\0\2\51"+
+    "\1\0\3\51\13\0\5\47\1\51\1\0\4\51\3\0"+
+    "\1\6\1\30\1\31\1\0\1\31\3\6\4\0\2\6"+
+    "\1\52\3\6\1\33\2\0\1\34\2\6\1\0\3\6"+
+    "\1\35\12\0\6\6\1\53\4\6\3\0\1\54\1\55"+
+    "\3\0\2\54\5\0\3\54\1\56\2\54\4\0\2\54"+
+    "\1\0\1\57\1\60\1\54\13\0\3\54\1\61\1\62"+
+    "\1\54\1\0\4\54\5\0\1\21\1\0\1\21\62\0"+
+    "\1\22\1\0\1\22\120\0\1\63\66\0\1\64\57\0"+
+    "\1\65\74\0\1\66\64\0\1\67\21\0\1\31\1\0"+
+    "\1\31\20\0\1\34\6\0\1\35\30\0\1\70\1\71"+
+    "\1\34\1\0\1\34\2\70\5\0\6\70\4\0\2\70"+
+    "\1\0\3\70\13\0\6\70\1\0\4\70\3\0\1\72"+
+    "\1\30\1\6\1\0\1\6\1\72\2\6\4\0\6\6"+
+    "\1\33\2\0\1\34\2\6\1\0\3\6\1\35\12\0"+
+    "\5\72\1\6\1\0\4\6\46\0\1\73\20\0\2\41"+
+    "\1\42\6\41\1\74\53\41\2\44\1\45\7\44\1\75"+
+    "\52\44\1\0\1\46\13\0\1\36\1\0\1\76\1\77"+
+    "\1\0\1\100\7\0\1\101\1\102\15\0\1\103\1\104"+
+    "\1\0\1\105\3\0\1\106\1\0\1\107\1\36\2\0"+
+    "\1\110\1\50\3\0\1\110\2\51\4\0\6\51\4\0"+
+    "\2\51\1\0\3\51\13\0\5\110\1\51\1\0\4\51"+
+    "\3\0\1\111\1\51\1\0\2\51\1\111\41\51\5\111"+
+    "\10\51\1\0\1\51\1\50\3\0\3\51\4\0\6\51"+
+    "\4\0\2\51\1\0\3\51\13\0\6\51\1\0\4\51"+
+    "\3\0\1\6\1\30\1\31\1\0\1\31\3\6\4\0"+
+    "\6\6\1\33\2\0\1\34\1\6\1\112\1\0\3\6"+
+    "\1\35\12\0\6\6\1\0\4\6\3\0\1\113\4\0"+
+    "\1\113\15\0\1\114\23\0\5\113\10\0\1\115\1\54"+
+    "\1\116\3\115\3\54\1\0\3\115\6\54\4\115\2\54"+
+    "\1\115\3\54\1\117\1\115\1\117\10\115\6\54\1\115"+
+    "\4\54\2\115\1\0\1\120\1\54\1\0\2\54\1\120"+
+    "\41\54\5\120\10\54\1\115\1\54\1\116\3\115\3\54"+
+    "\1\0\3\115\6\54\4\115\2\54\1\115\1\121\2\54"+
+    "\1\117\1\115\1\117\10\115\6\54\1\115\4\54\3\115"+
+    "\1\54\1\116\3\115\3\54\1\0\3\115\6\54\4\115"+
+    "\2\54\1\115\3\54\1\117\1\115\1\117\10\115\1\54"+
+    "\1\122\4\54\1\115\4\54\3\115\1\54\1\116\3\115"+
+    "\3\54\1\0\3\115\6\54\4\115\2\54\1\115\3\54"+
+    "\1\117\1\115\1\117\10\115\1\123\5\54\1\115\4\54"+
+    "\3\115\1\54\1\116\3\115\3\54\1\0\3\115\6\54"+
+    "\4\115\2\54\1\115\2\54\1\124\1\117\1\115\1\117"+
+    "\10\115\6\54\1\115\4\54\3\115\1\54\1\116\3\115"+
+    "\3\54\1\0\3\115\6\54\4\115\2\54\1\115\3\54"+
+    "\1\117\1\115\1\117\10\115\6\54\1\115\1\54\1\125"+
+    "\2\54\2\115\41\63\1\126\23\63\10\0\1\127\131\0"+
+    "\1\130\10\0\1\70\1\71\1\31\1\0\1\31\3\70"+
+    "\4\0\6\70\3\0\1\34\2\70\1\0\3\70\1\35"+
+    "\12\0\6\70\1\0\4\70\3\0\1\131\1\70\1\0"+
+    "\2\70\1\131\41\70\5\131\10\70\1\0\1\132\1\30"+
+    "\1\6\1\0\1\6\1\132\2\6\4\0\6\6\1\33"+
+    "\2\0\1\34\2\6\1\0\3\6\1\35\12\0\5\132"+
+    "\1\6\1\0\4\6\52\0\1\133\35\0\1\36\62\0"+
+    "\1\76\62\0\1\36\14\0\1\36\106\0\1\36\2\0"+
+    "\1\36\37\0\1\36\24\0\1\36\56\0\1\134\45\0"+
+    "\1\36\113\0\1\36\63\0\1\106\5\0\1\135\1\50"+
+    "\3\0\1\135\2\51\4\0\6\51\4\0\2\51\1\0"+
+    "\3\51\13\0\5\135\1\51\1\0\4\51\3\0\1\136"+
+    "\1\50\1\51\1\0\1\51\1\136\2\51\4\0\6\51"+
+    "\4\0\2\51\1\0\3\51\13\0\5\136\1\51\1\0"+
+    "\4\51\3\0\1\6\1\30\1\31\1\0\1\31\3\6"+
+    "\4\0\6\6\1\137\2\0\1\34\2\6\1\0\3\6"+
+    "\1\35\12\0\6\6\1\0\4\6\3\0\1\140\4\0"+
+    "\1\140\1\0\1\141\13\0\1\142\23\0\5\140\34\0"+
+    "\1\142\40\0\11\115\1\0\23\115\1\117\1\115\1\117"+
+    "\26\115\1\120\1\54\1\115\2\54\1\120\26\54\1\143"+
+    "\1\54\1\143\10\54\5\120\10\54\1\115\1\144\1\116"+
+    "\1\54\1\115\1\54\1\144\2\54\1\0\3\115\6\54"+
+    "\4\115\2\54\1\115\3\54\1\117\1\115\1\117\10\115"+
+    "\5\144\1\54\1\115\4\54\3\115\1\54\1\116\3\115"+
+    "\3\54\1\0\3\115\6\54\4\115\2\54\1\115\1\54"+
+    "\1\145\1\54\1\117\1\115\1\117\10\115\6\54\1\115"+
+    "\4\54\3\115\1\54\1\116\3\115\3\54\1\0\3\115"+
+    "\6\54\4\115\2\54\1\115\3\54\1\117\1\115\1\117"+
+    "\10\115\2\54\1\146\3\54\1\115\4\54\3\115\1\54"+
+    "\1\116\3\115\3\54\1\0\3\115\5\54\1\147\4\115"+
+    "\2\54\1\115\3\54\1\117\1\115\1\117\10\115\6\54"+
+    "\1\115\4\54\3\115\1\54\1\116\3\115\3\54\1\0"+
+    "\3\115\4\54\1\150\1\54\4\115\2\54\1\115\3\54"+
+    "\1\117\1\115\1\117\10\115\6\54\1\115\4\54\3\115"+
+    "\1\54\1\116\3\115\3\54\1\0\3\115\6\54\4\115"+
+    "\2\54\1\115\3\54\1\117\1\115\1\117\10\115\1\151"+
+    "\5\54\1\115\4\54\2\115\40\63\1\152\1\153\23\63"+
+    "\10\0\1\154\113\0\1\155\26\0\1\156\1\71\1\70"+
+    "\1\0\1\70\1\156\2\70\4\0\6\70\3\0\1\34"+
+    "\2\70\1\0\3\70\1\35\12\0\5\156\1\70\1\0"+
+    "\4\70\3\0\1\157\1\30\1\6\1\0\1\6\1\157"+
+    "\2\6\4\0\6\6\1\33\2\0\1\34\2\6\1\0"+
+    "\3\6\1\35\12\0\5\157\1\6\1\0\4\6\54\0"+
+    "\1\36\34\0\1\36\43\0\1\160\1\50\3\0\1\160"+
+    "\2\51\4\0\6\51\4\0\2\51\1\0\3\51\13\0"+
+    "\5\160\1\51\1\0\4\51\3\0\1\161\1\50\1\51"+
+    "\1\0\1\51\1\161\2\51\4\0\6\51\4\0\2\51"+
+    "\1\0\3\51\13\0\5\161\1\51\1\0\4\51\2\0"+
+    "\2\162\1\163\1\164\1\162\1\164\3\162\1\165\1\166"+
+    "\1\162\1\167\6\162\1\170\41\162\1\0\1\171\4\0"+
+    "\1\171\1\0\1\141\13\0\1\172\23\0\5\171\11\0"+
+    "\1\173\4\0\1\173\41\0\5\173\34\0\1\172\40\0"+
+    "\1\115\1\174\1\116\1\54\1\115\1\54\1\174\2\54"+
+    "\1\0\3\115\6\54\4\115\2\54\1\115\3\54\1\117"+
+    "\1\115\1\117\10\115\5\174\1\54\1\115\4\54\3\115"+
+    "\1\54\1\116\3\115\3\54\1\0\3\115\6\54\4\115"+
+    "\2\54\1\115\2\54\1\175\1\117\1\115\1\117\10\115"+
+    "\6\54\1\115\4\54\3\115\1\54\1\116\3\115\3\54"+
+    "\1\0\3\115\3\54\1\176\2\54\4\115\2\54\1\115"+
+    "\3\54\1\117\1\115\1\117\10\115\6\54\1\115\4\54"+
+    "\3\115\1\54\1\116\3\115\3\54\1\0\3\115\6\54"+
+    "\4\115\2\54\1\115\3\54\1\117\1\115\1\117\10\115"+
+    "\1\54\1\177\4\54\1\115\4\54\3\115\1\54\1\116"+
+    "\3\115\3\54\1\0\3\115\1\54\1\200\4\54\4\115"+
+    "\2\54\1\115\3\54\1\117\1\115\1\117\10\115\6\54"+
+    "\1\115\4\54\3\115\1\54\1\116\3\115\3\54\1\0"+
+    "\3\115\2\54\1\201\3\54\4\115\2\54\1\115\3\54"+
+    "\1\117\1\115\1\117\10\115\6\54\1\115\4\54\2\115"+
+    "\40\63\1\202\1\153\23\63\20\0\1\203\45\0\1\204"+
+    "\1\71\1\70\1\0\1\70\1\204\2\70\4\0\6\70"+
+    "\3\0\1\34\2\70\1\0\3\70\1\35\12\0\5\204"+
+    "\1\70\1\0\4\70\3\0\1\205\1\30\1\6\1\0"+
+    "\1\6\1\205\2\6\4\0\6\6\1\33\2\0\1\34"+
+    "\2\6\1\0\3\6\1\35\12\0\5\205\1\6\1\0"+
+    "\4\6\3\0\1\206\1\50\3\0\1\206\2\51\4\0"+
+    "\6\51\4\0\2\51\1\0\3\51\13\0\5\206\1\51"+
+    "\1\0\4\51\3\0\1\207\1\50\1\51\1\0\1\51"+
+    "\1\207\2\51\4\0\6\51\4\0\2\51\1\0\3\51"+
+    "\13\0\5\207\1\51\1\0\4\51\2\0\2\162\1\163"+
+    "\6\162\2\170\1\162\1\167\6\162\1\170\43\162\1\163"+
+    "\11\162\1\210\50\162\2\165\1\211\6\165\1\212\2\165"+
+    "\1\213\50\165\2\166\1\214\7\166\1\212\1\166\1\215"+
+    "\50\166\2\170\1\216\11\170\1\217\50\170\1\0\1\220"+
+    "\4\0\1\220\1\0\1\141\13\0\1\221\23\0\5\220"+
+    "\34\0\1\221\41\0\1\222\4\0\1\222\41\0\5\222"+
+    "\10\0\1\115\1\223\1\116\1\54\1\115\1\54\1\223"+
+    "\2\54\1\0\3\115\6\54\4\115\2\54\1\115\3\54"+
+    "\1\117\1\115\1\117\10\115\5\223\1\54\1\115\4\54"+
+    "\3\115\1\54\1\116\3\115\3\54\1\0\3\115\2\54"+
+    "\1\224\3\54\4\115\2\54\1\115\3\54\1\117\1\115"+
+    "\1\117\10\115\6\54\1\115\4\54\3\115\1\54\1\116"+
+    "\3\115\3\54\1\0\3\115\6\54\4\115\2\54\1\115"+
+    "\3\54\1\117\1\115\1\117\10\115\1\225\5\54\1\115"+
+    "\4\54\3\115\1\54\1\116\3\115\2\54\1\226\1\0"+
+    "\3\115\6\54\4\115\2\54\1\115\3\54\1\117\1\115"+
+    "\1\117\10\115\6\54\1\115\4\54\3\115\1\54\1\116"+
+    "\3\115\3\54\1\0\3\115\1\227\5\54\4\115\2\54"+
+    "\1\115\3\54\1\117\1\115\1\117\10\115\6\54\1\115"+
+    "\4\54\2\115\32\0\1\230\33\0\1\231\1\71\1\70"+
+    "\1\0\1\70\1\231\2\70\4\0\6\70\3\0\1\34"+
+    "\2\70\1\0\3\70\1\35\12\0\5\231\1\70\1\0"+
+    "\4\70\3\0\1\232\1\30\1\6\1\0\1\6\1\232"+
+    "\2\6\4\0\6\6\1\33\2\0\1\34\2\6\1\0"+
+    "\3\6\1\35\12\0\5\232\1\6\1\0\4\6\3\0"+
+    "\1\233\1\50\3\0\1\233\2\51\4\0\6\51\4\0"+
+    "\2\51\1\0\3\51\13\0\5\233\1\51\1\0\4\51"+
+    "\3\0\1\234\1\50\1\51\1\0\1\51\1\234\2\51"+
+    "\4\0\6\51\4\0\2\51\1\0\3\51\13\0\5\234"+
+    "\1\51\1\0\4\51\2\0\2\165\1\211\6\165\1\235"+
+    "\2\165\1\236\50\165\2\170\1\216\1\212\1\170\1\212"+
+    "\6\170\1\167\50\170\2\237\1\240\6\237\1\241\53\237"+
+    "\2\166\1\214\7\166\1\242\1\166\1\243\50\166\2\244"+
+    "\1\245\7\244\1\241\52\244\2\170\1\216\11\170\1\246"+
+    "\50\170\1\0\1\247\4\0\1\247\1\0\1\141\13\0"+
+    "\1\250\23\0\5\247\34\0\1\250\41\0\1\251\4\0"+
+    "\1\251\41\0\5\251\10\0\1\115\1\252\1\116\1\54"+
+    "\1\115\1\54\1\252\2\54\1\0\3\115\6\54\4\115"+
+    "\2\54\1\115\3\54\1\117\1\115\1\117\10\115\5\252"+
+    "\1\54\1\115\4\54\3\115\1\54\1\116\3\115\3\54"+
+    "\1\0\3\115\1\54\1\253\4\54\4\115\2\54\1\115"+
+    "\3\54\1\117\1\115\1\117\10\115\6\54\1\115\4\54"+
+    "\3\115\1\54\1\116\3\115\3\54\1\0\3\115\6\54"+
+    "\4\115\2\54\1\115\3\54\1\117\1\115\1\117\10\115"+
+    "\3\54\1\254\2\54\1\115\4\54\3\115\1\54\1\116"+
+    "\3\115\3\54\1\0\3\115\6\54\4\115\2\54\1\115"+
+    "\3\54\1\117\1\115\1\117\10\115\1\54\1\255\4\54"+
+    "\1\115\4\54\2\115\33\0\1\256\32\0\1\257\1\71"+
+    "\1\70\1\0\1\70\1\257\2\70\4\0\6\70\3\0"+
+    "\1\34\2\70\1\0\3\70\1\35\12\0\5\257\1\70"+
+    "\1\0\4\70\3\0\1\6\1\30\1\6\1\0\4\6"+
+    "\4\0\6\6\1\33\2\0\1\34\2\6\1\0\3\6"+
+    "\1\35\12\0\6\6\1\0\4\6\3\0\1\260\1\50"+
+    "\1\51\1\0\1\51\1\260\2\51\4\0\6\51\4\0"+
+    "\2\51\1\0\3\51\13\0\5\260\1\51\1\0\4\51"+
+    "\2\0\2\165\1\211\1\235\1\165\1\235\3\165\1\212"+
+    "\2\165\1\261\50\165\2\237\1\240\6\237\1\262\53\237"+
+    "\3\0\1\241\1\0\1\241\6\0\1\167\50\0\2\166"+
+    "\1\214\1\242\1\166\1\242\4\166\1\212\1\166\1\263"+
+    "\50\166\2\244\1\245\7\244\1\264\52\244\1\0\1\265"+
+    "\4\0\1\265\1\0\1\141\13\0\1\266\23\0\5\265"+
+    "\34\0\1\266\41\0\1\267\4\0\1\267\41\0\5\267"+
+    "\10\0\1\115\1\270\1\116\1\54\1\115\1\54\1\270"+
+    "\2\54\1\0\3\115\6\54\4\115\2\54\1\115\3\54"+
+    "\1\117\1\115\1\117\10\115\5\270\1\54\1\115\4\54"+
+    "\2\115\1\271\1\272\1\273\1\274\1\271\1\274\3\272"+
+    "\1\275\1\276\1\271\1\115\6\272\1\115\3\271\2\272"+
+    "\1\271\3\272\1\277\1\271\1\300\10\271\6\272\1\271"+
+    "\4\272\2\271\1\115\1\54\1\116\3\115\3\54\1\0"+
+    "\3\115\6\54\4\115\2\54\1\115\3\54\1\117\1\115"+
+    "\1\117\10\115\1\301\5\54\1\115\4\54\3\115\1\54"+
+    "\1\116\3\115\3\54\1\0\3\115\1\54\1\302\4\54"+
+    "\4\115\2\54\1\115\3\54\1\117\1\115\1\117\10\115"+
+    "\6\54\1\115\4\54\2\115\34\0\1\303\31\0\1\304"+
+    "\1\71\1\70\1\0\1\70\1\304\2\70\4\0\6\70"+
+    "\3\0\1\34\2\70\1\0\3\70\1\35\12\0\5\304"+
+    "\1\70\1\0\4\70\3\0\1\51\1\50\1\51\1\0"+
+    "\4\51\4\0\6\51\4\0\2\51\1\0\3\51\13\0"+
+    "\6\51\1\0\4\51\2\0\2\237\1\240\1\262\1\237"+
+    "\1\262\3\237\1\241\2\237\1\261\50\237\2\244\1\245"+
+    "\1\264\1\244\1\264\4\244\1\241\1\244\1\263\50\244"+
+    "\10\0\1\141\55\0\1\305\4\0\1\305\41\0\5\305"+
+    "\10\0\1\115\1\54\1\116\1\54\1\115\4\54\1\0"+
+    "\3\115\6\54\4\115\2\54\1\115\3\54\1\117\1\115"+
+    "\1\117\10\115\6\54\1\115\4\54\2\115\2\271\1\306"+
+    "\6\271\1\0\1\115\1\271\1\115\6\271\1\115\11\271"+
+    "\1\277\1\271\1\300\26\271\1\272\1\273\3\271\3\272"+
+    "\1\0\1\115\1\271\1\115\6\272\1\115\3\271\2\272"+
+    "\1\271\3\272\1\277\1\271\1\300\10\271\6\272\1\271"+
+    "\4\272\3\271\1\307\1\310\1\271\2\272\1\307\26\272"+
+    "\1\311\1\272\1\312\10\272\5\307\10\272\2\271\1\306"+
+    "\1\274\1\271\1\274\3\271\1\275\1\276\1\271\1\115"+
+    "\6\271\1\115\11\271\1\277\1\271\1\300\25\271\2\275"+
+    "\1\313\6\275\1\314\53\275\2\276\1\315\6\276\1\316"+
+    "\1\317\22\276\1\320\1\276\1\320\25\276\2\321\1\322"+
+    "\6\321\2\0\1\321\1\0\6\321\1\0\11\321\1\277"+
+    "\27\321\1\115\1\54\1\116\3\115\3\54\1\0\3\115"+
+    "\6\54\4\115\2\54\1\115\3\54\1\117\1\115\1\117"+
+    "\10\115\4\54\1\323\1\54\1\115\4\54\3\115\1\54"+
+    "\1\116\1\324\1\115\1\324\3\54\1\325\1\326\2\115"+
+    "\6\54\4\115\2\54\1\115\3\54\1\117\1\115\1\117"+
+    "\10\115\6\54\1\115\4\54\2\115\17\0\1\327\46\0"+
+    "\1\70\1\71\1\70\1\0\4\70\4\0\6\70\3\0"+
+    "\1\34\2\70\1\0\3\70\1\35\12\0\6\70\1\0"+
+    "\4\70\3\0\1\330\4\0\1\330\41\0\5\330\10\0"+
+    "\2\271\1\306\6\271\1\321\23\271\1\277\1\271\1\300"+
+    "\26\271\1\331\1\273\1\272\1\271\1\272\1\331\2\272"+
+    "\1\0\1\115\1\271\1\115\6\272\1\115\3\271\2\272"+
+    "\1\271\3\272\1\277\1\271\1\300\10\271\5\331\1\272"+
+    "\1\271\4\272\3\271\1\272\1\273\3\271\3\272\1\321"+
+    "\3\271\6\272\4\271\2\272\1\271\3\272\1\277\1\271"+
+    "\1\300\10\271\6\272\1\271\4\272\2\271\2\275\1\313"+
+    "\6\275\1\332\53\275\1\0\1\333\1\334\1\335\1\0"+
+    "\1\335\2\333\5\0\6\333\4\0\2\333\1\0\3\333"+
+    "\1\336\12\0\6\333\1\0\4\333\2\0\2\276\1\315"+
+    "\6\276\1\316\1\337\22\276\1\320\1\276\1\320\25\276"+
+    "\2\316\1\340\7\316\1\314\52\316\1\115\1\341\1\342"+
+    "\1\343\1\115\1\343\2\341\1\115\1\0\3\115\6\341"+
+    "\4\115\2\341\1\115\3\341\1\336\1\115\1\117\10\115"+
+    "\6\341\1\115\4\341\2\115\2\321\1\322\32\321\1\277"+
+    "\27\321\1\115\1\54\1\116\3\115\3\54\1\0\3\115"+
+    "\6\54\4\115\2\54\1\115\3\54\1\117\1\115\1\117"+
+    "\10\115\1\54\1\344\4\54\1\115\4\54\5\115\1\324"+
+    "\1\115\1\324\3\115\1\325\1\326\22\115\1\117\1\115"+
+    "\1\117\25\115\2\325\1\345\6\325\1\346\53\325\2\326"+
+    "\1\347\6\326\1\350\1\115\22\326\1\351\1\326\1\351"+
+    "\25\326\16\0\1\352\46\0\1\271\1\353\1\273\1\272"+
+    "\1\271\1\272\1\353\2\272\1\0\1\115\1\271\1\115"+
+    "\6\272\1\115\3\271\2\272\1\271\3\272\1\277\1\271"+
+    "\1\300\10\271\5\353\1\272\1\271\4\272\2\271\1\275"+
+    "\1\354\1\355\1\356\1\275\1\356\2\354\1\275\1\314"+
+    "\3\275\6\354\4\275\2\354\1\275\3\354\1\357\12\275"+
+    "\6\354\1\275\4\354\2\275\1\0\1\333\1\334\1\360"+
+    "\1\0\1\360\3\333\4\0\6\333\3\0\1\335\2\333"+
+    "\1\0\3\333\1\336\12\0\6\333\1\0\4\333\3\0"+
+    "\1\361\1\333\1\0\2\333\1\361\41\333\5\361\10\333"+
+    "\1\0\1\333\1\334\1\335\1\0\1\335\2\333\5\0"+
+    "\6\333\4\0\2\333\1\0\3\333\13\0\6\333\1\0"+
+    "\4\333\2\0\1\276\1\362\1\363\1\364\1\276\1\364"+
+    "\2\362\1\276\1\316\1\317\2\276\6\362\4\276\2\362"+
+    "\1\276\3\362\1\365\1\276\1\320\10\276\6\362\1\276"+
+    "\4\362\2\276\2\316\1\340\7\316\1\366\52\316\1\115"+
+    "\1\341\1\342\1\367\1\115\1\367\3\341\1\0\3\115"+
+    "\6\341\3\115\1\343\2\341\1\115\3\341\1\336\1\115"+
+    "\1\117\10\115\6\341\1\115\4\341\3\115\1\370\1\341"+
+    "\1\115\2\341\1\370\2\341\1\333\23\341\1\371\1\341"+
+    "\1\371\10\341\5\370\10\341\1\115\1\341\1\342\1\343"+
+    "\1\115\1\343\2\341\1\115\1\0\3\115\6\341\4\115"+
+    "\2\341\1\115\3\341\1\117\1\115\1\117\10\115\6\341"+
+    "\1\115\4\341\2\115\2\325\1\345\6\325\1\372\53\325"+
+    "\3\0\1\346\1\0\1\346\27\0\1\373\27\0\2\326"+
+    "\1\347\6\326\1\350\23\326\1\351\1\326\1\351\25\326"+
+    "\2\350\1\374\7\350\1\346\52\350\50\0\1\375\14\0"+
+    "\1\271\1\376\1\273\1\272\1\271\1\272\1\376\2\272"+
+    "\1\0\1\115\1\271\1\115\6\272\1\115\3\271\2\272"+
+    "\1\271\3\272\1\277\1\271\1\300\10\271\5\376\1\272"+
+    "\1\271\4\272\2\271\1\275\1\354\1\355\1\377\1\275"+
+    "\1\377\3\354\1\314\3\275\6\354\3\275\1\356\2\354"+
+    "\1\275\3\354\1\357\12\275\6\354\1\275\4\354\3\275"+
+    "\1\u0100\1\u0101\1\275\2\354\1\u0100\2\354\1\u0102\36\354"+
+    "\5\u0100\10\354\1\275\1\354\1\355\1\356\1\275\1\356"+
+    "\2\354\1\275\1\314\3\275\6\354\4\275\2\354\1\275"+
+    "\3\354\13\275\6\354\1\275\4\354\2\275\3\0\1\360"+
+    "\1\0\1\360\20\0\1\335\6\0\1\336\30\0\1\u0103"+
+    "\1\334\1\333\1\0\1\333\1\u0103\2\333\4\0\6\333"+
+    "\3\0\1\335\2\333\1\0\3\333\1\336\12\0\5\u0103"+
+    "\1\333\1\0\4\333\2\0\1\276\1\362\1\363\1\u0104"+
+    "\1\276\1\u0104\3\362\1\316\1\317\2\276\6\362\3\276"+
+    "\1\364\2\362\1\276\3\362\1\365\1\276\1\320\10\276"+
+    "\6\362\1\276\4\362\3\276\1\u0105\1\u0106\1\276\2\362"+
+    "\1\u0105\2\362\1\u0107\1\u0108\22\362\1\u0109\1\362\1\u0109"+
+    "\10\362\5\u0105\10\362\1\276\1\362\1\363\1\364\1\276"+
+    "\1\364\2\362\1\276\1\316\1\317\2\276\6\362\4\276"+
+    "\2\362\1\276\3\362\1\320\1\276\1\320\10\276\6\362"+
+    "\1\276\4\362\2\276\1\316\1\u0107\1\u010a\1\u010b\1\316"+
+    "\1\u010b\2\u0107\2\316\1\314\2\316\6\u0107\4\316\2\u0107"+
+    "\1\316\3\u0107\1\365\12\316\6\u0107\1\316\4\u0107\2\316"+
+    "\3\115\1\367\1\115\1\367\3\115\1\0\14\115\1\343"+
+    "\6\115\1\336\1\115\1\117\26\115\1\u010c\1\342\1\341"+
+    "\1\115\1\341\1\u010c\2\341\1\0\3\115\6\341\3\115"+
+    "\1\343\2\341\1\115\3\341\1\336\1\115\1\117\10\115"+
+    "\5\u010c\1\341\1\115\4\341\2\115\2\325\1\345\1\372"+
+    "\1\325\1\372\3\325\1\346\23\325\1\u010d\27\325\2\350"+
+    "\1\374\7\350\1\u010e\52\350\21\0\1\u010f\43\0\1\271"+
+    "\1\u0110\1\273\1\272\1\271\1\272\1\u0110\2\272\1\0"+
+    "\1\115\1\271\1\115\6\272\1\115\3\271\2\272\1\271"+
+    "\3\272\1\277\1\271\1\300\10\271\5\u0110\1\272\1\271"+
+    "\4\272\2\271\2\275\1\313\1\377\1\275\1\377\3\275"+
+    "\1\314\14\275\1\356\6\275\1\357\30\275\1\u0111\1\355"+
+    "\1\354\1\275\1\354\1\u0111\2\354\1\314\3\275\6\354"+
+    "\3\275\1\356\2\354\1\275\3\354\1\357\12\275\5\u0111"+
+    "\1\354\1\275\4\354\3\275\1\354\1\355\1\377\1\275"+
+    "\1\377\3\354\1\332\3\275\6\354\3\275\1\356\2\354"+
+    "\1\275\3\354\1\357\12\275\6\354\1\275\4\354\3\275"+
+    "\1\354\1\355\1\u0112\1\275\1\u0112\3\354\1\314\3\275"+
+    "\6\354\3\275\1\356\2\354\1\275\3\354\1\357\12\275"+
+    "\6\354\1\275\4\354\2\275\1\0\1\u0113\1\334\1\333"+
+    "\1\0\1\333\1\u0113\2\333\4\0\6\333\3\0\1\335"+
+    "\2\333\1\0\3\333\1\336\12\0\5\u0113\1\333\1\0"+
+    "\4\333\2\0\2\276\1\315\1\u0104\1\276\1\u0104\3\276"+
+    "\1\316\1\317\13\276\1\364\6\276\1\365\1\276\1\320"+
+    "\26\276\1\u0114\1\363\1\362\1\276\1\362\1\u0114\2\362"+
+    "\1\316\1\317\2\276\6\362\3\276\1\364\2\362\1\276"+
+    "\3\362\1\365\1\276\1\320\10\276\5\u0114\1\362\1\276"+
+    "\4\362\3\276\1\362\1\363\1\u0104\1\276\1\u0104\3\362"+
+    "\1\316\1\337\2\276\6\362\3\276\1\364\2\362\1\276"+
+    "\3\362\1\365\1\276\1\320\10\276\6\362\1\276\4\362"+
+    "\2\276\1\316\1\u0107\1\u010a\1\u0115\1\316\1\u0115\3\u0107"+
+    "\1\316\1\314\2\316\6\u0107\3\316\1\u010b\2\u0107\1\316"+
+    "\3\u0107\1\365\12\316\6\u0107\1\316\4\u0107\2\316\1\276"+
+    "\1\362\1\363\1\u0116\1\276\1\u0116\3\362\1\316\1\317"+
+    "\2\276\6\362\3\276\1\364\2\362\1\276\3\362\1\365"+
+    "\1\276\1\320\10\276\6\362\1\276\4\362\2\276\1\316"+
+    "\1\u0117\1\u0118\1\316\2\u0107\1\u0117\3\u0107\1\u0119\35\u0107"+
+    "\5\u0117\10\u0107\1\316\1\u0107\1\u010a\1\u010b\1\316\1\u010b"+
+    "\2\u0107\2\316\1\314\2\316\6\u0107\4\316\2\u0107\1\316"+
+    "\3\u0107\13\316\6\u0107\1\316\4\u0107\2\316\1\115\1\u011a"+
+    "\1\342\1\341\1\115\1\341\1\u011a\2\341\1\0\3\115"+
+    "\6\341\3\115\1\343\2\341\1\115\3\341\1\336\1\115"+
+    "\1\117\10\115\5\u011a\1\341\1\115\4\341\2\115\2\350"+
+    "\1\374\1\u010e\1\350\1\u010e\4\350\1\346\22\350\1\u011b"+
+    "\27\350\16\0\1\u011c\46\0\1\271\1\u011d\1\273\1\272"+
+    "\1\271\1\272\1\u011d\2\272\1\0\1\115\1\271\1\115"+
+    "\6\272\1\115\3\271\2\272\1\271\3\272\1\277\1\271"+
+    "\1\300\10\271\5\u011d\1\272\1\271\4\272\2\271\1\275"+
+    "\1\u011e\1\355\1\354\1\275\1\354\1\u011e\2\354\1\314"+
+    "\3\275\6\354\3\275\1\356\2\354\1\275\3\354\1\357"+
+    "\12\275\5\u011e\1\354\1\275\4\354\3\275\1\354\1\355"+
+    "\1\u0112\1\275\1\u0112\2\354\1\275\1\314\3\275\6\354"+
+    "\3\275\1\356\2\354\1\275\3\354\1\357\12\275\6\354"+
+    "\1\275\4\354\2\275\1\0\1\u011f\1\334\1\333\1\0"+
+    "\1\333\1\u011f\2\333\4\0\6\333\3\0\1\335\2\333"+
+    "\1\0\3\333\1\336\12\0\5\u011f\1\333\1\0\4\333"+
+    "\2\0\1\276\1\u0120\1\363\1\362\1\276\1\362\1\u0120"+
+    "\2\362\1\316\1\317\2\276\6\362\3\276\1\364\2\362"+
+    "\1\276\3\362\1\365\1\276\1\320\10\276\5\u0120\1\362"+
+    "\1\276\4\362\2\276\2\316\1\340\1\u0115\1\316\1\u0115"+
+    "\4\316\1\314\13\316\1\u010b\6\316\1\365\27\316\1\276"+
+    "\1\362\1\363\1\u0116\1\276\1\u0116\2\362\1\276\1\316"+
+    "\1\317\2\276\6\362\3\276\1\364\2\362\1\276\3\362"+
+    "\1\365\1\276\1\320\10\276\6\362\1\276\4\362\2\276"+
+    "\1\316\1\u0121\1\u010a\1\u0107\1\316\1\u0107\1\u0121\2\u0107"+
+    "\1\316\1\314\2\316\6\u0107\3\316\1\u010b\2\u0107\1\316"+
+    "\3\u0107\1\365\12\316\5\u0121\1\u0107\1\316\4\u0107\3\316"+
+    "\1\u0107\1\u010a\1\u0115\1\316\1\u0115\3\u0107\1\316\1\366"+
+    "\2\316\6\u0107\3\316\1\u010b\2\u0107\1\316\3\u0107\1\365"+
+    "\12\316\6\u0107\1\316\4\u0107\3\316\1\u0107\1\u010a\1\u0122"+
+    "\1\316\1\u0122\3\u0107\1\316\1\314\2\316\6\u0107\3\316"+
+    "\1\u010b\2\u0107\1\316\3\u0107\1\365\12\316\6\u0107\1\316"+
+    "\4\u0107\2\316\1\115\1\u0123\1\342\1\341\1\115\1\341"+
+    "\1\u0123\2\341\1\0\3\115\6\341\3\115\1\343\2\341"+
+    "\1\115\3\341\1\336\1\115\1\117\10\115\5\u0123\1\341"+
+    "\1\115\4\341\2\115\1\271\1\272\1\273\1\272\1\271"+
+    "\4\272\1\0\1\115\1\271\1\115\6\272\1\115\3\271"+
+    "\2\272\1\271\3\272\1\277\1\271\1\300\10\271\6\272"+
+    "\1\271\4\272\2\271\1\275\1\u0124\1\355\1\354\1\275"+
+    "\1\354\1\u0124\2\354\1\314\3\275\6\354\3\275\1\356"+
+    "\2\354\1\275\3\354\1\357\12\275\5\u0124\1\354\1\275"+
+    "\4\354\2\275\1\0\1\u0125\1\334\1\333\1\0\1\333"+
+    "\1\u0125\2\333\4\0\6\333\3\0\1\335\2\333\1\0"+
+    "\3\333\1\336\12\0\5\u0125\1\333\1\0\4\333\2\0"+
+    "\1\276\1\u0126\1\363\1\362\1\276\1\362\1\u0126\2\362"+
+    "\1\316\1\317\2\276\6\362\3\276\1\364\2\362\1\276"+
+    "\3\362\1\365\1\276\1\320\10\276\5\u0126\1\362\1\276"+
+    "\4\362\2\276\1\316\1\u0127\1\u010a\1\u0107\1\316\1\u0107"+
+    "\1\u0127\2\u0107\1\316\1\314\2\316\6\u0107\3\316\1\u010b"+
+    "\2\u0107\1\316\3\u0107\1\365\12\316\5\u0127\1\u0107\1\316"+
+    "\4\u0107\3\316\1\u0107\1\u010a\1\u0122\1\316\1\u0122\2\u0107"+
+    "\2\316\1\314\2\316\6\u0107\3\316\1\u010b\2\u0107\1\316"+
+    "\3\u0107\1\365\12\316\6\u0107\1\316\4\u0107\2\316\1\115"+
+    "\1\u0128\1\342\1\341\1\115\1\341\1\u0128\2\341\1\0"+
+    "\3\115\6\341\3\115\1\343\2\341\1\115\3\341\1\336"+
+    "\1\115\1\117\10\115\5\u0128\1\341\1\115\4\341\2\115"+
+    "\1\275\1\u0129\1\355\1\354\1\275\1\354\1\u0129\2\354"+
+    "\1\314\3\275\6\354\3\275\1\356\2\354\1\275\3\354"+
+    "\1\357\12\275\5\u0129\1\354\1\275\4\354\2\275\1\0"+
+    "\1\u012a\1\334\1\333\1\0\1\333\1\u012a\2\333\4\0"+
+    "\6\333\3\0\1\335\2\333\1\0\3\333\1\336\12\0"+
+    "\5\u012a\1\333\1\0\4\333\2\0\1\276\1\u012b\1\363"+
+    "\1\362\1\276\1\362\1\u012b\2\362\1\316\1\317\2\276"+
+    "\6\362\3\276\1\364\2\362\1\276\3\362\1\365\1\276"+
+    "\1\320\10\276\5\u012b\1\362\1\276\4\362\2\276\1\316"+
+    "\1\u012c\1\u010a\1\u0107\1\316\1\u0107\1\u012c\2\u0107\1\316"+
+    "\1\314\2\316\6\u0107\3\316\1\u010b\2\u0107\1\316\3\u0107"+
+    "\1\365\12\316\5\u012c\1\u0107\1\316\4\u0107\2\316\1\115"+
+    "\1\u012d\1\342\1\341\1\115\1\341\1\u012d\2\341\1\0"+
+    "\3\115\6\341\3\115\1\343\2\341\1\115\3\341\1\336"+
+    "\1\115\1\117\10\115\5\u012d\1\341\1\115\4\341\2\115"+
+    "\1\275\1\u012e\1\355\1\354\1\275\1\354\1\u012e\2\354"+
+    "\1\314\3\275\6\354\3\275\1\356\2\354\1\275\3\354"+
+    "\1\357\12\275\5\u012e\1\354\1\275\4\354\2\275\1\0"+
+    "\1\333\1\334\1\333\1\0\4\333\4\0\6\333\3\0"+
+    "\1\335\2\333\1\0\3\333\1\336\12\0\6\333\1\0"+
+    "\4\333\2\0\1\276\1\u012f\1\363\1\362\1\276\1\362"+
+    "\1\u012f\2\362\1\316\1\317\2\276\6\362\3\276\1\364"+
+    "\2\362\1\276\3\362\1\365\1\276\1\320\10\276\5\u012f"+
+    "\1\362\1\276\4\362\2\276\1\316\1\u0130\1\u010a\1\u0107"+
+    "\1\316\1\u0107\1\u0130\2\u0107\1\316\1\314\2\316\6\u0107"+
+    "\3\316\1\u010b\2\u0107\1\316\3\u0107\1\365\12\316\5\u0130"+
+    "\1\u0107\1\316\4\u0107\2\316\1\115\1\341\1\342\1\341"+
+    "\1\115\4\341\1\0\3\115\6\341\3\115\1\343\2\341"+
+    "\1\115\3\341\1\336\1\115\1\117\10\115\6\341\1\115"+
+    "\4\341\2\115\1\275\1\354\1\355\1\354\1\275\4\354"+
+    "\1\314\3\275\6\354\3\275\1\356\2\354\1\275\3\354"+
+    "\1\357\12\275\6\354\1\275\4\354\2\275\1\276\1\362"+
+    "\1\363\1\362\1\276\4\362\1\316\1\317\2\276\6\362"+
+    "\3\276\1\364\2\362\1\276\3\362\1\365\1\276\1\320"+
+    "\10\276\6\362\1\276\4\362\2\276\1\316\1\u0131\1\u010a"+
+    "\1\u0107\1\316\1\u0107\1\u0131\2\u0107\1\316\1\314\2\316"+
+    "\6\u0107\3\316\1\u010b\2\u0107\1\316\3\u0107\1\365\12\316"+
+    "\5\u0131\1\u0107\1\316\4\u0107\3\316\1\u0107\1\u010a\1\u0107"+
+    "\1\316\4\u0107\1\316\1\314\2\316\6\u0107\3\316\1\u010b"+
+    "\2\u0107\1\316\3\u0107\1\365\12\316\6\u0107\1\316\4\u0107"+
+    "\2\316";
+
+  /** 
+   * The transition table of the DFA
+   */
+  final private static int yytrans [] = yy_unpack();
+
+
+  /* error codes */
+  final private static int YY_UNKNOWN_ERROR = 0;
+  final private static int YY_ILLEGAL_STATE = 1;
+  final private static int YY_NO_MATCH = 2;
+  final private static int YY_PUSHBACK_2BIG = 3;
+
+  /* error messages for the codes above */
+  final private static String YY_ERROR_MSG[] = {
+    "Unkown internal scanner error",
+    "Internal error: unknown state",
+    "Error: could not match input",
+    "Error: pushback value was too large"
+  };
+
+  /**
+   * YY_ATTRIBUTE[aState] contains the attributes of state <code>aState</code>
+   */
+  private final static byte YY_ATTRIBUTE[] = {
+     0,  9,  1,  1,  1,  1,  1,  1,  1,  1,  9,  1,  9,  1,  1,  9, 
+     1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  9,  0,  9,  9,  1,  0, 
+     0,  0,  9,  0,  0,  1,  1,  0,  1,  1,  0,  0,  0,  0,  0,  0, 
+     0,  0,  0,  0,  0,  9,  9,  0,  0,  1,  9,  1,  1,  0,  0,  0, 
+     0,  0,  0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  0,  0,  9,  0, 
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  1,  1,  1,  1, 
+     0,  1,  1,  0,  0,  0,  0,  0,  0,  9,  0,  9,  0,  0,  1,  1, 
+     1,  0,  0,  0,  0,  0,  9,  0,  1,  1,  1,  0,  0,  0,  1,  0, 
+     0,  1,  0,  0,  1,  1,  1,  1,  0,  0,  1,  0,  1,  0,  9,  1, 
+     1,  1,  0,  0,  1,  0,  0,  0,  0,  1,  1,  1,  0,  1,  0,  0, 
+     0,  0,  1,  0,  0,  1,  1,  1,  1,  0,  0,  0,  0,  0,  0,  1, 
+     1,  0,  1,  0,  1,  9,  1,  0,  0,  0,  0,  0,  0,  0,  1,  1, 
+     0,  0,  0,  0,  1,  0,  0,  0,  1,  1,  0,  0,  0,  0,  0,  1, 
+     0,  0,  0,  0,  0,  0,  0,  9,  0,  0,  0,  0,  0,  9,  0,  0, 
+     0,  0,  0,  1,  0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  1,  0, 
+     0,  0,  0,  0,  1,  0,  0,  0,  1,  0,  9,  0,  0,  0,  0,  0, 
+     0,  0,  0,  0,  0,  0,  0,  0,  1,  0,  0,  0,  1,  0,  0,  0, 
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  1,  9,  0,  0,  0,  0, 
+     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0
+  };
+
+  /** the input device */
+  private java.io.Reader yy_reader;
+
+  /** the current state of the DFA */
+  private int yy_state;
+
+  /** the current lexical state */
+  private int yy_lexical_state = YYINITIAL;
+
+  /** this buffer contains the current text to be matched and is
+      the source of the yytext() string */
+  private char yy_buffer[] = new char[YY_BUFFERSIZE];
+
+  /** the textposition at the last accepting state */
+  private int yy_markedPos;
+
+  /** the textposition at the last state to be included in yytext */
+  private int yy_pushbackPos;
+
+  /** the current text position in the buffer */
+  private int yy_currentPos;
+
+  /** startRead marks the beginning of the yytext() string in the buffer */
+  private int yy_startRead;
+
+  /** endRead marks the last character in the buffer, that has been read
+      from input */
+  private int yy_endRead;
+
+  /** number of newlines encountered up to the start of the matched text */
+  private int yyline;
+
+  /** the number of characters up to the start of the matched text */
+  private int yychar;
+
+  /**
+   * the number of characters from the last newline up to the start of the 
+   * matched text
+   */
+  private int yycolumn; 
+
+  /** 
+   * yy_atBOL == true <=> the scanner is currently at the beginning of a line
+   */
+  private boolean yy_atBOL = true;
+
+  /** yy_atEOF == true <=> the scanner is at the EOF */
+  private boolean yy_atEOF;
+
+  /* user code: */
+	// Put stuff to include in the class here
+	String detectedCharset;
+
+	// External flag
+	boolean paranoidStringCheck = false;
+	boolean deleteErrors = true;
+	boolean debug = true;
+	
+	// Internal flags
+	boolean postBadImportFlag = false; // both URLs and @import's
+	boolean importFlag = false;
+	boolean urlFlag = false;
+
+	// Writer
+	Writer w = null; // Will NPE if not initialized properly
+
+	public void parse () throws IOException, DataFilterException {
+		while (yylex() != null);
+	}
+	
+	CSSTokenizerFilter(Reader r, Writer w, boolean paranoidStringCheck) {
+		this(r);
+		this.w = w;
+		this.paranoidStringCheck = paranoidStringCheck;
+	}
+
+	void throwError(String s) throws IOException, DataFilterException {
+		throw new IllegalStateException("You MUST override throwError!");
+	}
+
+	String processImportURL(String s) {
+		throw new IllegalStateException("You MUST override processImportURL!");
+	}
+
+	String processURL(String s) {
+		throw new IllegalStateException("You MUST override processURL!");
+	}
+	
+	void log(String s) {
+		System.err.println("CSSTokenizerFilter: "+s);
+	}
+
+	void logError(String s) {
+		System.err.println("CSSTokenizerFilter ERROR: "+s);
+	}
+	
+	static String unquote(String s, char c) {
+		if(s.length() > 1) {
+			if(s.charAt(s.length()-1) == c) {
+				s = s.substring(1, s.length()-1);
+				return s;
+			} else return "";
+		} else return "";
+	}
+	
+	// This is not very efficient. The parser below knows the quoting rules too.
+	
+	static boolean isHexDigit(char c) {
+		return ('a' <= c && c <= 'f' ||
+		  	'A' <= c && c <= 'F' ||
+			'0' <= c && c <= '9');
+	}
+	
+	class DecodedStringThingy {
+		char quote; // " " means not quoted
+		boolean url; // in a url() ?
+		String data;
+		public String suffix; // includes any whitespace
+		public DecodedStringThingy(String s) {
+			if(s.startsWith("url(")) {
+				s = s.substring("url(".length());
+				url = true;
+			}
+			char q = s.charAt(0);
+			if(q == '\'' || q == '\"') {
+				quote = q;
+				s = s.substring(1);
+			} else quote = ' ';
+			StringBuffer buffer = new StringBuffer();
+			int x = 0;
+			boolean justEscaping = false;
+			boolean stillEscaping = false;
+			StringBuffer hexEscape = new StringBuffer();
+			while(x < s.length()-1) {
+				char c = s.charAt(x);
+				x++;
+				if(justEscaping) {
+					if(c == '\n') {
+						buffer.append(c);
+						justEscaping = false;
+					} else if(isHexDigit(c)) {
+						hexEscape.append(c);
+						justEscaping = false;
+						stillEscaping = true;
+					} else {
+						buffer.append(c);
+						// Will need to be reencoded if quote or \n
+						justEscaping = false;
+					}
+				} else if(stillEscaping) {
+					if(isHexDigit(c) && hexEscape.length() < 6) {
+						hexEscape.append(c);
+					} else if(Character.isWhitespace(c)) {
+						// Ignore one whitespace char after an escape
+						int d = Integer.parseInt(hexEscape.toString(),
+									 16);
+						if(d > 0xFFFF) {
+							String error = 
+							    "UCS-4 CHARACTERS OVER 0xFFFF NOT SUPPORTED!";
+							logError(error);
+							try {
+								w.write("/* "+error+"*/");
+							} catch (IOException e) {};
+						} else {
+							c = (char)d;
+							buffer.append(c);
+						}
+						stillEscaping = false;
+						hexEscape = new StringBuffer();
+					} else {
+						int d = Integer.parseInt(hexEscape.toString(),
+									 16);
+						if(d > 0xFFFF) {
+							String error = 
+							    "UCS-4 CHARACTERS OVER 0xFFFF NOT SUPPORTED!";
+							logError(error);
+							try {
+								w.write("/* "+error+"*/");
+							} catch (IOException e) {};
+						} else {
+							char o = (char)d;
+							buffer.append(o);
+						}
+						buffer.append(c);
+						stillEscaping = false;
+						hexEscape = new StringBuffer();
+					}
+				} else {
+					if(quote != ' ' && c == quote) {
+						break;
+					} else if (c == '\\') {
+						justEscaping = true;
+					} else {
+						buffer.append(c);
+					}
+				}
+			}
+			data = buffer.toString();
+			if(s.length() > (x+1))
+				suffix = s.substring(x+1);
+			else suffix = "";
+		}
+		
+		public String toString() {
+			StringBuffer out = new StringBuffer();
+			if(url) out.append("url(");
+			if(quote != ' ') out.append(quote);
+			out.append(unescapeData());
+			if(quote != ' ') out.append(quote);
+			if(url) out.append(")");
+			out.append(suffix);
+			return out.toString();
+		}
+		
+		public String unescapeData() {
+			StringBuffer sb = new StringBuffer();
+			for(int i=0;i<data.length();i++) {
+				char c = data.charAt(i);
+				if(c == quote || c == '\n') {
+					sb.append('\\');
+				}
+				sb.append(c);
+			}
+			return sb.toString();
+		}
+	}
+
+
+  /**
+   * Creates a new scanner
+   * There is also a java.io.InputStream version of this constructor.
+   *
+   * @param   in  the java.io.Reader to read input from.
+   */
+  CSSTokenizerFilter(java.io.Reader in) {
+    this.yy_reader = in;
+  }
+
+  /**
+   * Creates a new scanner.
+   * There is also java.io.Reader version of this constructor.
+   *
+   * @param   in  the java.io.Inputstream to read input from.
+   */
+  CSSTokenizerFilter(java.io.InputStream in) {
+    this(new java.io.InputStreamReader(in));
+  }
+
+  /** 
+   * Unpacks the split, compressed DFA transition table.
+   *
+   * @return the unpacked transition table
+   */
+  private static int [] yy_unpack() {
+    int [] trans = new int[13356];
+    int offset = 0;
+    offset = yy_unpack(yy_packed0, offset, trans);
+    return trans;
+  }
+
+  /** 
+   * Unpacks the compressed DFA transition table.
+   *
+   * @param packed   the packed transition table
+   * @return         the index of the last entry
+   */
+  private static int yy_unpack(String packed, int offset, int [] trans) {
+    int i = 0;       /* index in packed string  */
+    int j = offset;  /* index in unpacked array */
+    int l = packed.length();
+    while (i < l) {
+      int count = packed.charAt(i++);
+      int value = packed.charAt(i++);
+      value--;
+      do trans[j++] = value; while (--count > 0);
+    }
+    return j;
+  }
+
+  /** 
+   * Unpacks the compressed character translation table.
+   *
+   * @param packed   the packed character translation table
+   * @return         the unpacked character translation table
+   */
+  private static char [] yy_unpack_cmap(String packed) {
+    char [] map = new char[0x10000];
+    int i = 0;  /* index in packed string  */
+    int j = 0;  /* index in unpacked array */
+    while (i < 182) {
+      int  count = packed.charAt(i++);
+      char value = packed.charAt(i++);
+      do map[j++] = value; while (--count > 0);
+    }
+    return map;
+  }
+
+
+  /**
+   * Refills the input buffer.
+   *
+   * @return      <code>false</code>, iff there was new input.
+   * 
+   * @exception   IOException  if any I/O-Error occurs
+   */
+  private boolean yy_refill() throws java.io.IOException {
+
+    /* first: make room (if you can) */
+    if (yy_startRead > 0) {
+      System.arraycopy(yy_buffer, yy_startRead, 
+                       yy_buffer, 0, 
+                       yy_endRead-yy_startRead);
+
+      /* translate stored positions */
+      yy_endRead-= yy_startRead;
+      yy_currentPos-= yy_startRead;
+      yy_markedPos-= yy_startRead;
+      yy_pushbackPos-= yy_startRead;
+      yy_startRead = 0;
+    }
+
+    /* is the buffer big enough? */
+    if (yy_currentPos >= yy_buffer.length) {
+      /* if not: blow it up */
+      char newBuffer[] = new char[yy_currentPos*2];
+      System.arraycopy(yy_buffer, 0, newBuffer, 0, yy_buffer.length);
+      yy_buffer = newBuffer;
+    }
+
+    /* finally: fill the buffer with new input */
+    int numRead = yy_reader.read(yy_buffer, yy_endRead, 
+                                            yy_buffer.length-yy_endRead);
+
+    if (numRead < 0) {
+      return true;
+    }
+    else {
+      yy_endRead+= numRead;  
+      return false;
+    }
+  }
+
+
+  /**
+   * Closes the input stream.
+   */
+  final public void yyclose() throws java.io.IOException {
+    yy_atEOF = true;            /* indicate end of file */
+    yy_endRead = yy_startRead;  /* invalidate buffer    */
+
+    if (yy_reader != null)
+      yy_reader.close();
+  }
+
+
+  /**
+   * Closes the current stream, and resets the
+   * scanner to read from a new input stream.
+   *
+   * All internal variables are reset, the old input stream 
+   * <b>cannot</b> be reused (internal buffer is discarded and lost).
+   * Lexical state is set to <tt>YY_INITIAL</tt>.
+   *
+   * @param reader   the new input stream 
+   */
+  final public void yyreset(java.io.Reader reader) throws java.io.IOException {
+    yyclose();
+    yy_reader = reader;
+    yy_atBOL  = true;
+    yy_atEOF  = false;
+    yy_endRead = yy_startRead = 0;
+    yy_currentPos = yy_markedPos = yy_pushbackPos = 0;
+    yyline = yychar = yycolumn = 0;
+    yy_lexical_state = YYINITIAL;
+  }
+
+
+  /**
+   * Returns the current lexical state.
+   */
+  final public int yystate() {
+    return yy_lexical_state;
+  }
+
+
+  /**
+   * Enters a new lexical state
+   *
+   * @param newState the new lexical state
+   */
+  final public void yybegin(int newState) {
+    yy_lexical_state = newState;
+  }
+
+
+  /**
+   * Returns the text matched by the current regular expression.
+   */
+  final public String yytext() {
+    return new String( yy_buffer, yy_startRead, yy_markedPos-yy_startRead );
+  }
+
+
+  /**
+   * Returns the character at position <tt>pos</tt> from the 
+   * matched text. 
+   * 
+   * It is equivalent to yytext().charAt(pos), but faster
+   *
+   * @param pos the position of the character to fetch. 
+   *            A value from 0 to yylength()-1.
+   *
+   * @return the character at position pos
+   */
+  final public char yycharat(int pos) {
+    return yy_buffer[yy_startRead+pos];
+  }
+
+
+  /**
+   * Returns the length of the matched text region.
+   */
+  final public int yylength() {
+    return yy_markedPos-yy_startRead;
+  }
+
+
+  /**
+   * Reports an error that occured while scanning.
+   *
+   * In a wellformed scanner (no or only correct usage of 
+   * yypushback(int) and a match-all fallback rule) this method 
+   * will only be called with things that "Can't Possibly Happen".
+   * If this method is called, something is seriously wrong
+   * (e.g. a JFlex bug producing a faulty scanner etc.).
+   *
+   * Usual syntax/scanner level error handling should be done
+   * in error fallback rules.
+   *
+   * @param   errorCode  the code of the errormessage to display
+   */
+  private void yy_ScanError(int errorCode) {
+    String message;
+    try {
+      message = YY_ERROR_MSG[errorCode];
+    }
+    catch (ArrayIndexOutOfBoundsException e) {
+      message = YY_ERROR_MSG[YY_UNKNOWN_ERROR];
+    }
+
+    throw new Error(message);
+  } 
+
+
+  /**
+   * Pushes the specified amount of characters back into the input stream.
+   *
+   * They will be read again by then next call of the scanning method
+   *
+   * @param number  the number of characters to be read again.
+   *                This number must not be greater than yylength()!
+   */
+  private void yypushback(int number)  {
+    if ( number > yylength() )
+      yy_ScanError(YY_PUSHBACK_2BIG);
+
+    yy_markedPos -= number;
+  }
+
+
+  /**
+   * Resumes scanning until the next regular expression is matched,
+   * the end of input is encountered or an I/O-Error occurs.
+   *
+   * @return      the next token
+   * @exception   IOException  if any I/O-Error occurs
+ * @throws DataFilterException 
+   */
+  public Yytoken yylex() throws java.io.IOException, DataFilterException {
+    int yy_input;
+    int yy_action;
+
+    // cached fields:
+    int yy_currentPos_l;
+    int yy_startRead_l;
+    int yy_markedPos_l;
+    int yy_endRead_l = yy_endRead;
+    char [] yy_buffer_l = yy_buffer;
+    char [] yycmap_l = yycmap;
+
+    int [] yytrans_l = yytrans;
+    int [] yy_rowMap_l = yy_rowMap;
+    byte [] yy_attr_l = YY_ATTRIBUTE;
+
+    while (true) {
+      yy_markedPos_l = yy_markedPos;
+
+      yy_action = -1;
+
+      yy_startRead_l = yy_currentPos_l = yy_currentPos = 
+                       yy_startRead = yy_markedPos_l;
+
+      yy_state = yy_lexical_state;
+
+
+      yy_forAction: {
+        while (true) {
+
+          if (yy_currentPos_l < yy_endRead_l)
+            yy_input = yy_buffer_l[yy_currentPos_l++];
+          else if (yy_atEOF) {
+            yy_input = YYEOF;
+            break yy_forAction;
+          }
+          else {
+            // store back cached positions
+            yy_currentPos  = yy_currentPos_l;
+            yy_markedPos   = yy_markedPos_l;
+            boolean eof = yy_refill();
+            // get translated positions and possibly new buffer
+            yy_currentPos_l  = yy_currentPos;
+            yy_markedPos_l   = yy_markedPos;
+            yy_buffer_l      = yy_buffer;
+            yy_endRead_l     = yy_endRead;
+            if (eof) {
+              yy_input = YYEOF;
+              break yy_forAction;
+            }
+            else {
+              yy_input = yy_buffer_l[yy_currentPos_l++];
+            }
+          }
+          int yy_next = yytrans_l[ yy_rowMap_l[yy_state] + yycmap_l[yy_input] ];
+          if (yy_next == -1) break yy_forAction;
+          yy_state = yy_next;
+
+          int yy_attributes = yy_attr_l[yy_state];
+          if ( (yy_attributes & 1) == 1 ) {
+            yy_action = yy_state; 
+            yy_markedPos_l = yy_currentPos_l; 
+            if ( (yy_attributes & 8) == 8 ) break yy_forAction;
+          }
+
+        }
+      }
+
+      // store back cached position
+      yy_markedPos = yy_markedPos_l;
+
+      switch (yy_action) {
+
+        case 78: 
+        case 98: 
+        case 191: 
+        case 201: 
+        case 207: 
+        case 232: 
+        case 248: 
+        case 264: 
+          { 
+	if(!deleteErrors) {
+		throwError("Unknown @identifier "+yytext());
+	} else {
+		String s = yytext();
+		if(debug) log("Discarded identifier: "+s);
+		// Ignore
+	}
+ }
+        case 306: break;
+        case 118: 
+        case 135: 
+        case 176: 
+        case 178: 
+          { 
+	// This is horrible. However it seems that there is no other way to do it with either jflex or CUP, as {URL} cannot be an unambiguous token :(
+	String s = yytext();
+	if(debug) log("Recognized URL: "+s);
+	
+	DecodedStringThingy dst = new DecodedStringThingy(s);
+	
+	if(!dst.url) {
+		throw new IllegalStateException("parsing url().. isn't a url()");
+	}
+	if(dst.suffix.length() > 0) {
+		yypushback(dst.suffix.length());
+		dst.suffix = "";
+	}
+	
+	s = dst.data;
+	if(debug) log("URL now: "+s);
+	s = processURL(s);
+	dst.data = s;
+	if(s == null || s.equals("")) {
+		if(debug) log("URL invalid");
+		w.write("url()");
+	} else {
+		s = dst.toString();
+		if(debug) log("Writing: "+s);
+		w.write(s);
+	}
+ }
+        case 307: break;
+        case 26: 
+        case 94: 
+          { 
+	String s = yytext();
+	if(s.startsWith("url")) throwError("Invalid contents of url()");
+	w.write(s);
+	if(debug) log("Matched function start: "+s);
+ }
+        case 308: break;
+        case 28: 
+          { 
+	if(postBadImportFlag) {
+		// Ignore
+		postBadImportFlag = false;
+		if(debug) log("Ignoring mediums list because after bad import: "+
+			yytext());
+	} else {
+		String s = yytext();
+		w.write(s);
+		if(debug) log("Matched and passing on mediums list: "+s);
+	}
+ }
+        case 309: break;
+        case 190: 
+        case 200: 
+        case 221: 
+        case 238: 
+        case 244: 
+          { 
+	String s = yytext();
+	if(debug) log("Found @import: "+s);
+	s = s.substring("@import".length());
+	s = s.trim();
+	DecodedStringThingy dst = new DecodedStringThingy(s);
+	s = dst.data;
+	if(debug) log("URL: "+s);
+	s = processURL(s);
+	if (!(s == null || s.equals(""))) {
+		if(debug) log("URL now: "+s);
+		s = "@import "+dst.toString();
+		if(debug) log("Writing: "+s);
+		w.write(s);
+	} else
+		if(debug) log("Dropped @import");
+ }
+        case 310: break;
+        case 105: 
+        case 129: 
+          { 
+	// Comment
+	// CSS comments are harmless? - FIXME check
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched comment: "+s);
+ }
+        case 311: break;
+        case 107: 
+          {  
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched HTML comment: "+s);
+ }
+        case 312: break;
+        case 148: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched @media: "+s);
+ }
+        case 313: break;
+        case 74: 
+        case 75: 
+        case 95: 
+        case 97: 
+        case 120: 
+        case 121: 
+        case 143: 
+        case 144: 
+        case 166: 
+        case 167: 
+        case 180: 
+        case 181: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched unicode: "+s);
+ }
+        case 314: break;
+        case 37: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched number: "+s);
+ }
+        case 315: break;
+        case 54: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched |=: "+s);
+ }
+        case 316: break;
+        case 126: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched @page: "+s);
+ }
+        case 317: break;
+        case 92: 
+        case 154: 
+          { 
+	String s = yytext();
+	if(debug) log("Got hexcolor: "+s);
+	w.write(s);
+ }
+        case 318: break;
+        case 2: 
+        case 5: 
+        case 13: 
+        case 30: 
+        case 41: 
+        case 57: 
+        case 73: 
+        case 89: 
+        case 110: 
+        case 132: 
+        case 153: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched ident: "+s);
+ }
+        case 319: break;
+        case 34: 
+        case 59: 
+        case 60: 
+          { 
+	String s = yytext();
+	if(debug) log("Matched string: "+s);
+	if(paranoidStringCheck && s.indexOf(':') != -1) {
+		w.write("/* Deleted disallowed string */");
+		log("Deleted disallowed string: "+s);
+	} else {
+		w.write(s);
+	}
+ }
+        case 320: break;
+        case 38: 
+        case 40: 
+        case 71: 
+        case 72: 
+        case 93: 
+        case 111: 
+        case 112: 
+        case 133: 
+        case 134: 
+        case 155: 
+        case 175: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched #name: "+s);
+ }
+        case 321: break;
+        case 283: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched important: "+s);
+ }
+        case 322: break;
+        case 15: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched semicolon: "+s);
+ }
+        case 323: break;
+        case 138: 
+        case 140: 
+        case 142: 
+        case 157: 
+        case 162: 
+        case 165: 
+          { 
+	String s = yytext();
+	if(debug) log("Ignoring unrecognizable url: "+s);
+	w.write("/* Ignoring unmatchable URL */url()");
+ }
+        case 324: break;
+        case 53: 
+          {  
+	String s = yytext();
+	w.write(s); 
+	if(debug) log("Matched ~=: "+s);
+ }
+        case 325: break;
+        case 122: 
+        case 145: 
+        case 168: 
+        case 182: 
+        case 196: 
+        case 215: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched unicode range: "+s);
+ }
+        case 326: break;
+        case 17: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched close braces: "+s);
+ }
+        case 327: break;
+        case 10: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched function end: "+s);
+ }
+        case 328: break;
+        case 250: 
+        case 268: 
+        case 282: 
+          { 
+	String s = yytext();
+	detectedCharset = s;
+	if(debug) log("Matched and ignoring charset: "+s);
+	// Ignore
+ }
+        case 329: break;
+        case 1: 
+        case 3: 
+        case 7: 
+        case 8: 
+        case 11: 
+        case 14: 
+        case 19: 
+        case 20: 
+        case 21: 
+        case 22: 
+          { 
+	String s = yytext();
+	char c = s.charAt(0);
+	log("Matched anything: "+yytext()+" - ignoring");
+	w.write("/* ignored unmatched char: "+c+" */"); // single char cannot break out of comment
+ }
+        case 330: break;
+        case 227: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched @font-face: "+s);
+ }
+        case 331: break;
+        case 16: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched open braces: "+s);
+ }
+        case 332: break;
+        case 6: 
+        case 9: 
+        case 12: 
+        case 18: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched single char: "+s);
+ }
+        case 333: break;
+        case 4: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched whitespace: "+s);
+ }
+        case 334: break;
+        case 29: 
+          { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched measurement: "+s);
+ }
+        case 335: break;
+        case 58: 
+          { 
+	String s = yytext();
+	w.write(s); 
+	if(debug) log("Matched HTML comment: "+s);
+ }
+        case 336: break;
+        default: 
+          if (yy_input == YYEOF && yy_startRead == yy_currentPos) {
+            yy_atEOF = true;
+            return null;
+          } 
+          else {
+            yy_ScanError(YY_NO_MATCH);
+          }
+      }
+    }
+  }
+
+
+}

Added: trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/CSSTokenizerFilter.jflex	2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,443 @@
+package freenet.clients.http.filter;
+import java.io.*;
+import java.util.*;
+/* This class tokenizes a CSS2 Reader stream, writes it out to the output Writer, and filters any URLs found */
+// WARNING: this is not as thorough as the HTML parser - new versions of the standard could lead to anonymity risks. See comments in SaferFilter.java
+// Mostly from http://www.w3.org/TR/REC-CSS2/grammar.html
+
+%%
+
+%{
+	// Put stuff to include in the class here
+	String detectedCharset;
+
+	// External flag
+	boolean paranoidStringCheck = false;
+	boolean deleteErrors = true;
+	boolean debug = true;
+	
+	// Internal flags
+	boolean postBadImportFlag = false; // both URLs and @import's
+	boolean importFlag = false;
+	boolean urlFlag = false;
+
+	// Writer
+	Writer w = null; // Will NPE if not initialized properly
+
+	public void parse () throws IOException {
+		while (yylex() != null);
+	}
+	
+	CSSTokenizerFilter(Reader r, Writer w, boolean paranoidStringCheck) {
+		this(r);
+		this.w = w;
+		this.paranoidStringCheck = paranoidStringCheck;
+	}
+
+	void throwError(String s) throws IOException {
+		throw new IllegalStateException("You MUST override throwError!");
+	}
+
+	String processImportURL(String s) {
+		throw new IllegalStateException("You MUST override processImportURL!");
+	}
+
+	String processURL(String s) {
+		throw new IllegalStateException("You MUST override processURL!");
+	}
+	
+	void log(String s) {
+		System.err.println("CSSTokenizerFilter: "+s);
+	}
+
+	void logError(String s) {
+		System.err.println("CSSTokenizerFilter ERROR: "+s);
+	}
+	
+	static String unquote(String s, char c) {
+		if(s.length() > 1) {
+			if(s.charAt(s.length()-1) == c) {
+				s = s.substring(1, s.length()-1);
+				return s;
+			} else return "";
+		} else return "";
+	}
+	
+	// This is not very efficient. The parser below knows the quoting rules too.
+	
+	static boolean isHexDigit(char c) {
+		return ('a' <= c && c <= 'f' ||
+		  	'A' <= c && c <= 'F' ||
+			'0' <= c && c <= '9');
+	}
+	
+	class DecodedStringThingy {
+		char quote; // " " means not quoted
+		boolean url; // in a url() ?
+		String data;
+		public String suffix; // includes any whitespace
+		public DecodedStringThingy(String s) {
+			if(s.startsWith("url(")) {
+				s = s.substring("url(".length());
+				url = true;
+			}
+			char q = s.charAt(0);
+			if(q == '\'' || q == '\"') {
+				quote = q;
+				s = s.substring(1);
+			} else quote = ' ';
+			StringBuffer buffer = new StringBuffer();
+			int x = 0;
+			boolean justEscaping = false;
+			boolean stillEscaping = false;
+			StringBuffer hexEscape = new StringBuffer();
+			while(x < s.length()-1) {
+				char c = s.charAt(x);
+				x++;
+				if(justEscaping) {
+					if(c == '\n') {
+						buffer.append(c);
+						justEscaping = false;
+					} else if(isHexDigit(c)) {
+						hexEscape.append(c);
+						justEscaping = false;
+						stillEscaping = true;
+					} else {
+						buffer.append(c);
+						// Will need to be reencoded if quote or \n
+						justEscaping = false;
+					}
+				} else if(stillEscaping) {
+					if(isHexDigit(c) && hexEscape.length() < 6) {
+						hexEscape.append(c);
+					} else if(Character.isWhitespace(c)) {
+						// Ignore one whitespace char after an escape
+						int d = Integer.parseInt(hexEscape.toString(),
+									 16);
+						if(d > 0xFFFF) {
+							String error = 
+							    "UCS-4 CHARACTERS OVER 0xFFFF NOT SUPPORTED!";
+							logError(error);
+							try {
+								w.write("/* "+error+"*/");
+							} catch (IOException e) {};
+						} else {
+							c = (char)d;
+							buffer.append(c);
+						}
+						stillEscaping = false;
+						hexEscape = new StringBuffer();
+					} else {
+						int d = Integer.parseInt(hexEscape.toString(),
+									 16);
+						if(d > 0xFFFF) {
+							String error = 
+							    "UCS-4 CHARACTERS OVER 0xFFFF NOT SUPPORTED!";
+							logError(error);
+							try {
+								w.write("/* "+error+"*/");
+							} catch (IOException e) {};
+						} else {
+							char o = (char)d;
+							buffer.append(o);
+						}
+						buffer.append(c);
+						stillEscaping = false;
+						hexEscape = new StringBuffer();
+					}
+				} else {
+					if(quote != ' ' && c == quote) {
+						break;
+					} else if (c == '\\') {
+						justEscaping = true;
+					} else {
+						buffer.append(c);
+					}
+				}
+			}
+			data = buffer.toString();
+			if(s.length() > (x+1))
+				suffix = s.substring(x+1);
+			else suffix = "";
+		}
+		
+		public String toString() {
+			StringBuffer out = new StringBuffer();
+			if(url) out.append("url(");
+			if(quote != ' ') out.append(quote);
+			out.append(unescapeData());
+			if(quote != ' ') out.append(quote);
+			if(url) out.append(")");
+			out.append(suffix);
+			return out.toString();
+		}
+		
+		public String unescapeData() {
+			StringBuffer sb = new StringBuffer();
+			for(int i=0;i<data.length();i++) {
+				char c = data.charAt(i);
+				if(c == quote || c == '\n') {
+					sb.append('\\');
+				}
+				sb.append(c);
+			}
+			return sb.toString();
+		}
+	}
+%}
+
+%class CSSTokenizerFilter
+%unicode
+%ignorecase
+
+// Case sensitivity DOES NOT AFFECT CHARACTER CLASSES!
+H=[0-9a-fA-F]
+NONASCII=[\200-\4177777]
+UNICODE=\\{H}{1,6}[ \t\r\n\f]?
+ESCAPE={UNICODE}|\\[ -~\200-\4177777]
+NMSTART=[a-zA-Z]|{NONASCII}|{ESCAPE}
+NMCHAR=[a-zA-Z0-9-]|{NONASCII}|{ESCAPE}
+
+// The spec (http://www.w3.org/TR/REC-CSS2/grammar.html, mostly D.2 for this bit)
+// is on crack wrt string/url, so this is guesswork
+STRING1=\"(\\{NL}|\'|\\\"|{NONASCII}|{ESCAPE}|[^\"])*\"
+STRING2=\'(\\{NL}|\"|\\\'|{NONASCII}|{ESCAPE}|[^\'])*\'
+
+IDENT={NMSTART}{NMCHAR}*
+NAME={NMCHAR}+
+NUM=[0-9]+|[0-9]*"."[0-9]+
+STRING={STRING1}|{STRING2}
+INBRACKET=([^\)]|"\\)"|STRING)*
+
+// See comments for STRING1/STRING2 :)
+URL=([^\(\)\"\']|{NONASCII}|{ESCAPE})*
+
+W=[ \t\r\n\f]*
+NL=\n|\r\n|\r|\f
+RANGE=\?{1,6}|{H}(\?{0,5}|{H}(\?{0,4}|{H}(\?{0,3}|{H}(\?{0,2}|{H}(\??|{H})))))
+HEXCOLOR="#"(({H}{H}{H})|({H}{H}{H}{H}{H}{H}))
+
+// From grammer
+MEDIUM={IDENT}{W}*
+// As distinct from MEDIA, which allows rulesets
+MEDIUMS={MEDIUM}(","{W}*{MEDIUM})*
+
+// This is rather incomprehensible, so I am adding log messages for every token. They will not actually call log() unless debug is true.
+
+// Loosly based on http://www.w3.org/TR/REC-CSS2/grammar.html
+%%
+
+{HEXCOLOR} {
+	String s = yytext();
+	if(debug) log("Got hexcolor: "+s);
+	w.write(s);
+}
+"url("{W}*({STRING}|{URL}){W}")" {
+	// This is horrible. However it seems that there is no other way to do it with either jflex or CUP, as {URL} cannot be an unambiguous token :(
+	String s = yytext();
+	if(debug) log("Recognized URL: "+s);
+	
+	DecodedStringThingy dst = new DecodedStringThingy(s);
+	
+	if(!dst.url) {
+		throw new IllegalStateException("parsing url().. isn't a url()");
+	}
+	if(dst.suffix.length() > 0) {
+		yypushback(dst.suffix.length());
+		dst.suffix = "";
+	}
+	
+	s = dst.data;
+	if(debug) log("URL now: "+s);
+	s = processURL(s);
+	dst.data = s;
+	if(s == null || s.equals("")) {
+		if(debug) log("URL invalid");
+		w.write("url()");
+	} else {
+		s = dst.toString();
+		if(debug) log("Writing: "+s);
+		w.write(s);
+	}
+}
+"@import"{W}{W}*({STRING}|{URL})({W}*{W}{MEDIUMS})?";" {
+	String s = yytext();
+	if(debug) log("Found @import: "+s);
+	s = s.substring("@import".length());
+	s = s.trim();
+	DecodedStringThingy dst = new DecodedStringThingy(s);
+	s = dst.data;
+	if(debug) log("URL: "+s);
+	s = processURL(s);
+	if (!(s == null || s.equals(""))) {
+		if(debug) log("URL now: "+s);
+		s = "@import "+dst.toString();
+		if(debug) log("Writing: "+s);
+		w.write(s);
+	} else
+		if(debug) log("Dropped @import");
+}
+{W}"{"{W} {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched open braces: "+s);
+}
+{W}"}"{W} {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched close braces: "+s);
+}
+[ \t\r\n\f]+	{
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched whitespace: "+s);
+}
+\/\*[^*]*\*+([^/][^*]*\*+)*\/	{
+	// Comment
+	// CSS comments are harmless? - FIXME check
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched comment: "+s);
+}
+"<!--" { 
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched HTML comment: "+s);
+}
+"-->" {
+	String s = yytext();
+	w.write(s); 
+	if(debug) log("Matched HTML comment: "+s);
+}
+"~=" { 
+	String s = yytext();
+	w.write(s); 
+	if(debug) log("Matched ~=: "+s);
+}
+"|=" {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched |=: "+s);
+}
+{IDENT} {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched ident: "+s);
+}
+"@page" {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched @page: "+s);
+}
+"@media" {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched @media: "+s);
+}
+"@font-face" {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched @font-face: "+s);
+}
+"@"{IDENT}[^;\}\"]*[;\}] {
+	if(!deleteErrors) {
+		throwError("Unknown @identifier "+yytext());
+	} else {
+		String s = yytext();
+		if(debug) log("Discarded identifier: "+s);
+		// Ignore
+	}
+}
+"#"{NAME} {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched #name: "+s);
+}
+"!{W}important" {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched important: "+s);
+}
+U\+{RANGE} {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched unicode: "+s);
+}
+U\+{H}{1,6}-{H}{1,6} {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched unicode range: "+s);
+}
+{NUM}("em"|"ex"|"px"|"cm"|"mm"|"in"|"pc"|"deg"|"rad"|"grad"|"ms"|"s"|"Hz"|"kHz"|"%") {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched measurement: "+s);
+}
+{NUM} {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched number: "+s);
+}
+
+{MEDIUMS}{W}*";" {
+	if(postBadImportFlag) {
+		// Ignore
+		postBadImportFlag = false;
+		if(debug) log("Ignoring mediums list because after bad import: "+
+			yytext());
+	} else {
+		String s = yytext();
+		w.write(s);
+		if(debug) log("Matched and passing on mediums list: "+s);
+	}
+}
+
+"@charset"{W}*{STRING}{W}*";" {
+	String s = yytext();
+	detectedCharset = s;
+	if(debug) log("Matched and ignoring charset: "+s);
+	// Ignore
+}
+"url("{INBRACKET}")" {
+	String s = yytext();
+	if(debug) log("Ignoring unrecognizable url: "+s);
+	w.write("/* Ignoring unmatchable URL */url()");
+}
+{IDENT}"(" {
+	String s = yytext();
+	if(s.startsWith("url")) throwError("Invalid contents of url()");
+	w.write(s);
+	if(debug) log("Matched function start: "+s);
+}
+")" {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched function end: "+s);
+}
+";" {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched semicolon: "+s);
+}
+{STRING} {
+	String s = yytext();
+	if(debug) log("Matched string: "+s);
+	if(paranoidStringCheck && s.indexOf(':') != -1) {
+		w.write("/* Deleted disallowed string */");
+		log("Deleted disallowed string: "+s);
+	} else {
+		w.write(s);
+	}
+}
+// These are plain chars, which would be passed through as tokens somehow by the spec'd tokenizer
+","|":"|"/"|">"|"-"|"+"|"."|"*" {
+	String s = yytext();
+	w.write(s);
+	if(debug) log("Matched single char: "+s);
+}
+. {
+	String s = yytext();
+	char c = s.charAt(0);
+	log("Matched anything: "+yytext()+" - ignoring");
+	w.write("/* ignored unmatched char: "+c+" */"); // single char cannot break out of comment
+}

Modified: trunk/freenet/src/freenet/clients/http/filter/CharsetExtractor.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/CharsetExtractor.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/CharsetExtractor.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -1,5 +1,7 @@
 package freenet.clients.http.filter;
 
+import java.io.IOException;
+
 import freenet.support.Bucket;
 
 /**
@@ -8,6 +10,6 @@
  */
 public interface CharsetExtractor {
 	
-	String getCharset(Bucket data);
+	String getCharset(Bucket data, String parseCharset) throws DataFilterException, IOException;
 
 }

Modified: trunk/freenet/src/freenet/clients/http/filter/ContentDataFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/ContentDataFilter.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/ContentDataFilter.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -1,12 +1,17 @@
 package freenet.clients.http.filter;
 
+import java.io.IOException;
+import java.util.HashMap;
+
 import freenet.support.Bucket;
+import freenet.support.BucketFactory;
 
 /**
  * Data filter for a specific MIME type.
  */
 public interface ContentDataFilter {
 	
-	public Bucket filter(Bucket data, String charset, FilterCallback cb);
+	public Bucket readFilter(Bucket data, BucketFactory bf, String charset, HashMap otherParams, FilterCallback cb) throws DataFilterException, IOException;
 
+	public Bucket writeFilter(Bucket data, BucketFactory bf, String charset, HashMap otherParams, FilterCallback cb) throws DataFilterException, IOException;
 }

Modified: trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/ContentFilter.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -1,7 +1,16 @@
 package freenet.clients.http.filter;
 
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashMap;
 import java.util.Hashtable;
 
+import freenet.support.Bucket;
+import freenet.support.BucketFactory;
+import freenet.support.BucketTools;
+import freenet.support.Logger;
+
 /**
  * Freenet content filter. This doesn't actually do any filtering,
  * it organizes everything and maintains the database.
@@ -22,7 +31,7 @@
 				true, true, null, null, false, false, false, false, false, false, 
 				"Plain text - not dangerous unless your browser is stupid (e.g. Internet Explorer)",
 				"Plain text - not dangerous unless you include compromizing information",
-				true, "iso-8859-1", null));
+				true, "US-ASCII", null));
 		
 		// GIF - probably safe - FIXME check this out, write filters 
 		register(new MIMEType("image/gif", "gif", new String[0], new String[0], 
@@ -46,18 +55,17 @@
 		
 		// PDF - very dangerous - FIXME ideally we would have a filter, this is such a common format...
 		register(new MIMEType("application/pdf", "pdf", new String[] { "application/x-pdf" }, new String[0],
-				false, false, null, null, true, true, true, true, true, true,
+				false, false, null, null, true, true, true, false, true, true,
 				"Adobe(R) PDF document - VERY DANGEROUS!",
 				"Adobe(R) PDF document - VERY DANGEROUS!",
 				false, null, null));
 		
 		// HTML - dangerous if not filtered
 		register(new MIMEType("text/html", "html", new String[] { "text/xhtml", "text/xml+xhtml" }, new String[] { "htm" },
-				false, false /* maybe? */, new HTMLReadFilter(), new HTMLWriteFilter(), 
-				true, true, true, true, true, true, false,
-				"HTML - not dangerous if filtered",
+				false, false /* maybe? */, new HTMLFilter(), null /* FIXME */, 
+				true, true, true, true, true, true, "HTML - not dangerous if filtered",
 				"HTML - may contain dangerous metadata etc; suggest you check it by hand",
-				true, "iso-8859-1", new HTMLCharsetExtractor()));
+				true, "iso-8859-1", new HTMLFilter()));
 		
 		// CSS - danagerous if not filtered, not sure about the filter
 		register(new MIMEType("text/css", "css", new String[0], new String[0],
@@ -65,7 +73,7 @@
 				true, true, true, true, true, false,
 				"CSS (cascading style sheet, usually used with HTML) - probably not dangerous if filtered, but the filter is not a whitelist filter so take care",
 				"CSS (cascading style sheet, usually used with HTML) - this can probably contain metadata, check it by hand",
-				true, "iso-8859-1", new CSSCharsetExtractor()));
+				true, "utf-8", new CSSReadFilter()));
 		
 	}
 	
@@ -83,7 +91,173 @@
 	public static MIMEType getMIMEType(String mimeType) {
 		return (MIMEType) mimeTypesByName.get(mimeType);
 	}
+
+	/**
+	 * Filter some data.
+	 * @throws IOException If an internal error involving buckets occurred.
+	 */
+	public static Bucket filter(Bucket data, BucketFactory bf, String typeName) throws UnsafeContentTypeException, IOException {
+		String type = typeName;
+		String options = "";
+		String charset = null;
+		HashMap otherParams = null;
+		
+		// First parse the MIME type
+		
+		int idx = type.indexOf(';');
+		if(idx != -1) {
+			options = type.substring(idx+1);
+			type = type.substring(0, idx);
+			// Parse options
+			// Format: <type>/<subtype>[ optional white space ];[ optional white space ]<param>=<value>; <param2>=<value2>; ...
+			String[] rawOpts = options.split(";");
+			for(int i=0;i<rawOpts.length;i++) {
+				String raw = rawOpts[i];
+				idx = raw.indexOf('=');
+				if(idx == -1) {
+					Logger.error(ContentFilter.class, "idx = -1 for '=' on option: "+raw+" from "+typeName);
+					continue;
+				}
+				String before = raw.substring(0, idx).trim();
+				String after = raw.substring(idx+1).trim();
+				if(before.equals("charset")) {
+					charset = after;
+				} else {
+					if(otherParams == null) otherParams = new HashMap();
+					otherParams.put(before, after);
+				}
+			}
+		}
+		
+		// Now look for a MIMEType handler
+		
+		MIMEType handler = getMIMEType(type);
+		
+		if(handler == null)
+			throw new UnknownContentTypeException(typeName);
+		else {
+			
+			if(handler.safeToRead) {
+				return data;
+			}
+			
+			if(handler.readFilter != null) {
+				if(handler.takesACharset && (charset == null || charset.length() == 0)) {
+					charset = detectCharset(data, handler);
+				}
+				
+				return handler.readFilter.readFilter(data, bf, charset, otherParams, new GenericReadFilterCallback());
+			}
+			handler.throwUnsafeContentTypeException();
+			return null;
+		}
+	}
+
+	private static String detectCharset(Bucket data, MIMEType handler) throws IOException {
+		
+		// Detect charset
+		
+		String charset = detectBOM(data);
+		
+		if(charset == null && handler.charsetExtractor != null) {
+
+			// Obviously, this is slow!
+			// This is why we need to detect on insert.
+			
+			if(handler.defaultCharset != null) {
+				try {
+					if((charset = handler.charsetExtractor.getCharset(data, handler.defaultCharset)) != null)
+						return charset;
+				} catch (DataFilterException e) {
+					// Ignore
+				}
+			}
+			try {
+				if((charset = handler.charsetExtractor.getCharset(data, "ISO-8859-1")) != null)
+					return charset;
+			} catch (DataFilterException e) {
+				// Ignore
+			}
+			try {
+				if((charset = handler.charsetExtractor.getCharset(data, "UTF-8")) != null)
+					return charset;
+			} catch (DataFilterException e) {
+				// Ignore
+			}
+			try {
+				if((charset = handler.charsetExtractor.getCharset(data, "UTF-16")) != null)
+					return charset;
+			} catch (DataFilterException e) {
+				// Ignore
+			}
+			try {
+				if((charset = handler.charsetExtractor.getCharset(data, "UTF-32")) != null)
+					return charset;
+			} catch (DataFilterException e) {
+				// Ignore
+			}
+			
+		}
+		
+		// If it doesn't have a BOM, then it's *probably* safe to use as default.
+		
+		return handler.defaultCharset;
+	}
+
+	/**
+	 * Detect a Byte Order Mark, a sequence of bytes which identifies a document as encoded with a 
+	 * specific charset.
+	 * @throws IOException 
+	 */
+	private static String detectBOM(Bucket bucket) throws IOException {
+		byte[] data = new byte[5];
+		InputStream is = bucket.getInputStream();
+		int read = 0;
+		while(read < data.length) {
+			int x;
+			try {
+				x = is.read(data, read, data.length - read);
+			} catch (EOFException e) {
+				x = -1;
+			}
+			if(x <= 0) break;
+		}
+		is.close();
+		if(startsWith(data, bom_utf8)) return "UTF-8";
+		if(startsWith(data, bom_utf16_be) || startsWith(data, bom_utf16_le)) return "UTF-16";
+		if(startsWith(data, bom_utf32_be) || startsWith(data, bom_utf32_le)) return "UTF-32";
+		if(startsWith(data, bom_scsu)) return "SCSU";
+		if(startsWith(data, bom_utf7_1) || startsWith(data, bom_utf7_2)
+				|| startsWith(data, bom_utf7_3) || startsWith(data, bom_utf7_4)
+				|| startsWith(data, bom_utf7_5)) return "UTF-7";
+		if(startsWith(data, bom_utf_ebcdic)) return "UTF-EBCDIC";
+		if(startsWith(data, bom_bocu_1)) return "BOCU-1";
+		return null;
+	}
 	
-	public static 
+	// Byte Order Mark's - from Wikipedia. We keep all of them because a rare encoding might
+	// be deliberately used by an attacker to confuse the filter, because at present a charset
+	// is not mandatory, and because some browsers may pick these up anyway even if one is present.
 	
+	static byte[] bom_utf8 = new byte[] { (byte)0xEF, (byte)0xBB, (byte)0xBF };
+	static byte[] bom_utf16_be = new byte[] { (byte)0xFE, (byte)0xFF };
+	static byte[] bom_utf16_le = new byte[] { (byte)0xFF, (byte)0xFE };
+	static byte[] bom_utf32_be = new byte[] { (byte)0, (byte)0, (byte)0xFE, (byte)0xFF };
+	static byte[] bom_utf32_le = new byte[] { (byte)0xFF, (byte)0xFE, (byte)0, (byte)0 };
+	static byte[] bom_scsu = new byte[] { (byte)0x0E, (byte)0xFE, (byte)0xFF };
+	static byte[] bom_utf7_1 = new byte[] { (byte)0x2B, (byte)0x2F, (byte)0x76, (byte) 0x38 };
+	static byte[] bom_utf7_2 = new byte[] { (byte)0x2B, (byte)0x2F, (byte)0x76, (byte) 0x39 };
+	static byte[] bom_utf7_3 = new byte[] { (byte)0x2B, (byte)0x2F, (byte)0x76, (byte) 0x2B };
+	static byte[] bom_utf7_4 = new byte[] { (byte)0x2B, (byte)0x2F, (byte)0x76, (byte) 0x2F };
+	static byte[] bom_utf7_5 = new byte[] { (byte)0x2B, (byte)0x2F, (byte)0x76, (byte) 0x38, (byte) 0x2D };
+	static byte[] bom_utf_ebcdic = new byte[] { (byte)0xDD, (byte)0x73, (byte)0x66, (byte)0x73 };
+	static byte[] bom_bocu_1 = new byte[] { (byte)0xFB, (byte)0xEE, (byte)0x28 };
+
+	private static boolean startsWith(byte[] data, byte[] cmp) {
+		for(int i=0;i<cmp.length;i++) {
+			if(data[i] != cmp[i]) return false;
+		}
+		return true;
+	}
+
 }

Added: trunk/freenet/src/freenet/clients/http/filter/DataFilterException.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/DataFilterException.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/DataFilterException.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,30 @@
+package freenet.clients.http.filter;
+
+/**
+ * Exception thrown when the data cannot be filtered.
+ */
+public class DataFilterException extends UnsafeContentTypeException {
+
+	final String rawTitle;
+	final String encodedTitle;
+	final String explanation;
+	
+	DataFilterException(String raw, String encoded, String explanation) {
+		this.rawTitle = raw;
+		this.encodedTitle = encoded;
+		this.explanation = explanation;
+	}
+	
+	public String getExplanation() {
+		return explanation;
+	}
+
+	public String getHTMLEncodedTitle() {
+		return encodedTitle;
+	}
+
+	public String getRawTitle() {
+		return rawTitle;
+	}
+
+}

Modified: trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/FilterCallback.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -1,6 +1,6 @@
 package freenet.clients.http.filter;
 
-import freenet.keys.FreenetURI;
+import java.net.URI;
 
 /**
  * Callback to be provided to a content filter.
@@ -10,8 +10,9 @@
 	/**
 	 * Process a URI.
 	 * If it cannot be turned into something sufficiently safe, then return null.
+	 * @param overrideType Force the return type.
 	 */
-	public FreenetURI processURI(FreenetURI uri);
+	public String processURI(String uri, String overrideType);
 
 	/**
 	 * Should we allow GET forms?

Added: trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/GenericReadFilterCallback.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,69 @@
+package freenet.clients.http.filter;
+
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URISyntaxException;
+
+import freenet.keys.FreenetURI;
+import freenet.pluginmanager.HTTPRequest;
+import freenet.support.Logger;
+
+public class GenericReadFilterCallback implements FilterCallback {
+
+	public boolean allowGetForms() {
+		return false;
+	}
+
+	public boolean allowPostForms() {
+		return false;
+	}
+
+	public String processURI(String u, String overrideType) {
+		URI uri;
+		try {
+			uri = new URI(u);
+		} catch (URISyntaxException e1) {
+			return null;
+		}
+		String path = uri.getPath();
+		if(path.startsWith("/")) {
+			// Try to make it into a FreenetURI
+			try {
+				FreenetURI furi = new FreenetURI(path.substring(1));
+				return processURI(furi, uri, overrideType);
+			} catch (MalformedURLException e) {
+				// Obviously not a Freenet URI!
+			}
+		} else {
+			// Relative URI
+			// FIXME resolve it
+			// FIXME Note that we allow links to / inlines from fproxy services.
+			// This is okay because we don't allow forms.
+			HTTPRequest req = new HTTPRequest(uri);
+			return finishProcess(req, overrideType, path);
+		}
+		Logger.normal(this, "Unrecognized URI, dropped: "+uri);
+		return null;
+	}
+
+	private String finishProcess(HTTPRequest req, String overrideType, String path) {
+		String typeOverride = req.getParam("type", null);
+		if(overrideType != null)
+			typeOverride = overrideType;
+		// REDFLAG any other options we should support? 
+		// Obviously we don't want to support ?force= !!
+		// At the moment, ?type= and ?force= are the only options supported by Fproxy anyway.
+		String ret = path;
+		if(typeOverride != null)
+			ret = ret + "?type=" + typeOverride;
+		return ret;
+	}
+
+	private String processURI(FreenetURI furi, URI uri, String overrideType) {
+		// Valid freenet URI, allow it
+		// Now what about the queries?
+		HTTPRequest req = new HTTPRequest(uri);
+		return finishProcess(req, overrideType, "/" + furi.toString(false));
+	}
+	
+}

Added: trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/HTMLFilter.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,1803 @@
+/* -*- Mode: java; c-basic-indent: 4; tab-width: 4 -*- */
+
+package freenet.clients.http.filter;
+
+import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.StringReader;
+import java.io.StringWriter;
+import java.io.UnsupportedEncodingException;
+import java.io.Writer;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.Enumeration;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Iterator;
+import java.util.StringTokenizer;
+import java.util.Vector;
+
+import freenet.support.Bucket;
+import freenet.support.BucketFactory;
+import freenet.support.HTMLDecoder;
+import freenet.support.HTMLEncoder;
+import freenet.support.Logger;
+import freenet.support.io.NullBucket;
+import freenet.support.io.NullWriter;
+
+public class HTMLFilter implements ContentDataFilter, CharsetExtractor {
+
+	private boolean debug = false;
+	private static boolean deleteWierdStuff = true;
+	private static boolean deleteErrors = true;
+	private boolean allowSecurityErrors = false;
+	private boolean allowSecurityWarnings = false;
+	private boolean cssParanoidStringCheck = false;
+
+	private final static String possibleAnonCompromiseMsg =
+		"You have retrieved some content which is not recognised by FProxy, and so we "
+			+ "don't know what your web browser might do with it.  It could be harmless, "
+			+ "but it could make your web browser do something which would compromise your "
+			+ "anonymity.";
+	public void setParanoidStringCheck(boolean b) {
+		cssParanoidStringCheck = b;
+	}
+
+	public void setAllowSecurityWarnings(boolean value) {
+		allowSecurityWarnings = value;
+	}
+
+	public void setAllowSecurityErrors(boolean value) {
+		allowSecurityErrors = value;
+	}
+
+	public void setDebug(boolean debug) {
+		this.debug = debug;
+	}
+
+	public Bucket readFilter(Bucket bucket, BucketFactory bf, String charset, HashMap otherParams, FilterCallback cb) throws DataFilterException, IOException {
+		Logger.minor(this, "readFilter(): charset="+charset);
+		InputStream strm = bucket.getInputStream();
+		Bucket temp = bf.makeBucket(bucket.size());
+		OutputStream os = temp.getOutputStream();
+		Reader r;
+		Writer w;
+		try {
+			r = new BufferedReader(new InputStreamReader(strm, charset), 32768);
+			w = new BufferedWriter(new OutputStreamWriter(os, charset), 32768);
+		} catch (UnsupportedEncodingException e) {
+			os.close();
+			strm.close();
+			throw new DataFilterException("Warning: Unknown character set ("+charset+")", "Warning: Unknown character set ("+HTMLEncoder.encode(charset)+")",
+					"<p><b>Unknown character set</b> The page you are about to display has an unknown character set. "+
+					"This means that we are not able to filter the page, and it may compromize your anonymity.");
+		}
+		HTMLParseContext pc = new HTMLParseContext(r, w, charset, cb);
+		pc.run(temp);
+		r.close();
+		w.close();
+		return temp;
+	}
+	
+	public Bucket writeFilter(Bucket bucket, BucketFactory bf, String charset, HashMap otherParams, FilterCallback cb) throws DataFilterException, IOException {
+		throw new UnsupportedOperationException();
+	}
+	
+	public String getCharset(Bucket bucket, String parseCharset) throws DataFilterException, IOException {
+		Logger.minor(this, "getCharset(): default="+parseCharset);
+		InputStream strm = bucket.getInputStream();
+		Writer w = new NullWriter();
+		Reader r;
+		try {
+			r = new BufferedReader(new InputStreamReader(strm, parseCharset), 32768);
+		} catch (UnsupportedEncodingException e) {
+			throw new Error(e);
+		}
+		HTMLParseContext pc = new HTMLParseContext(r, w, null, new NullFilterCallback());
+		try {
+			pc.run(null);
+		} catch (Throwable t) {
+			// Ignore ALL errors
+			Logger.minor(this, "Caught "+t+" trying to detect MIME type with "+parseCharset);
+		}
+		return pc.detectedCharset;
+	}
+
+	class HTMLParseContext {
+		Reader r;
+		Writer w;
+		String charset;
+		String detectedCharset;
+		final FilterCallback cb;
+
+		HTMLParseContext(Reader r, Writer w, String charset, FilterCallback cb) {
+			this.r = r;
+			this.w = w;
+			this.charset = charset;
+			this.cb = cb;
+		}
+
+		Bucket run(Bucket temp) throws IOException, DataFilterException {
+
+			/**
+			 * TOKENIZE Modes:
+			 * <p>0) in text transitions: '<' ->(1) 1) in tag, not in
+			 * quotes/comment/whitespace transitions: whitespace -> (4) (save
+			 * current element) '"' -> (2) '--' at beginning of tag -> (3) '>' ->
+			 * process whole tag 2) in tag, in quotes transitions: '"' -> (1)
+			 * '>' -> grumble about markup in quotes in tag might confuse older
+			 * user-agents (stay in current state) 3) in tag, in comment
+			 * transitions: '-->' -> save/ignore comment, go to (0) '<' or '>' ->
+			 * grumble about markup in comments 4) in tag, in whitespace
+			 * transitions: '"' -> (2) '>' -> save tag, (0) anything else not
+			 * whitespace -> (1)
+			 * </p>
+			 */
+			StringBuffer b = new StringBuffer(100);
+			Vector splitTag = new Vector();
+			char pprevC = 0;
+			char prevC = 0;
+			char c = 0;
+			mode = INTEXT;
+			while (true) {
+				int x = r.read();
+				if (x == -1) {
+					switch (mode) {
+						case INTEXT :
+							saveText(b, w, this);
+							break;
+						default :
+							// Dump unfinished tag
+							break;
+					}
+					break;
+				} else {
+					pprevC = prevC;
+					prevC = c;
+					c = (char) x;
+					switch (mode) {
+						case INTEXT :
+							if (c == '<') {
+								saveText(b, w, this);
+								b.setLength(0);
+								mode = INTAG;
+							} else {
+								b.append(c);
+							}
+							break;
+						case INTAG :
+							if (HTMLDecoder.isWhitespace(c)) {
+								splitTag.add(b.toString());
+								mode = INTAGWHITESPACE;
+								b.setLength(0);
+							} else if (c == '>') {
+								splitTag.add(b.toString());
+								b.setLength(0);
+								processTag(splitTag, w, this);
+								splitTag.clear();
+								mode = INTEXT;
+							} else if (
+								b.length() == 2
+									&& c == '-'
+									&& prevC == '-'
+									&& pprevC == '!') {
+								mode = INTAGCOMMENT;
+								b.append(c);
+							} else if (c == '"') {
+								mode = INTAGQUOTES;
+								b.append(c);
+							} else if (c == '\'') {
+								mode = INTAGSQUOTES;
+								b.append(c);
+							} else {
+								b.append(c);
+							}
+							break;
+						case INTAGQUOTES :
+							if (c == '"') {
+								mode = INTAG;
+								b.append(c); // Part of the element
+							} else if (c == '>' || c == '<') {
+								if (!deleteErrors) {
+									throwFilterException("Tags in markup");
+									b.append(c);
+									return new NullBucket();
+								} else {
+									if (c == '>') {
+										w.write(
+											"<!-- Tags in string attribute -->");
+										splitTag.clear();
+										b.setLength(0);
+										mode = INTEXT;
+										// End tag now
+									} else {
+										killTag = true;
+										writeAfterTag
+											+= "<!-- Tags in string attribute -->";
+										// Wait for end of tag then zap it
+									}
+								}
+							} else {
+								b.append(c);
+							}
+							break;
+						case INTAGSQUOTES :
+							if (c == '\'') {
+								mode = INTAG;
+								b.append(c); // Part of the element
+							} else if (c == '>' || c == '<') {
+								if (!deleteErrors) {
+									throwFilterException("Tags in markup");
+									b.append(c);
+									return new NullBucket();
+								} else {
+									if (c == '>') {
+										w.write(
+											"<!-- Tags in string attribute -->");
+										splitTag.clear();
+										b.setLength(0);
+										mode = INTEXT;
+										// End tag now
+									} else {
+										killTag = true;
+										writeAfterTag
+											+= "<!-- Tags in string attribute -->";
+										// Wait for end of tag then zap it
+									}
+									writeAfterTag
+										+= "<!-- Tags in string attribute -->";
+									killTag = true;
+								}
+							} else {
+								b.append(c);
+							}
+							break;
+							/*
+							 * Comments are often used to temporarily disable
+							 * markup; I shall allow it. (avian) White space is
+							 * not permitted between the markup declaration
+							 * open delimiter ("
+							 * <!") and the comment open delimiter ("--"), but
+							 * is permitted between the comment close delimiter
+							 * ("--") and the markup declaration close
+							 * delimiter (">"). A common error is to include a
+							 * string of hyphens ("---") within a comment.
+							 * Authors should avoid putting two or more
+							 * adjacent hyphens inside comments. However, the
+							 * only browser that actually gets it right is IE
+							 * (others either don't allow it or allow other
+							 * chars as well). The only safe course of action
+							 * is to allow any and all chars, but eat them.
+							 * (avian)
+							 */
+						case INTAGCOMMENT :
+							if (b.length() >= 4 && c == '-' && prevC == '-') {
+								b.append(c);
+								mode = INTAGCOMMENTCLOSING;
+							} else
+								b.append(c);
+							break;
+						case INTAGCOMMENTCLOSING :
+							if (c == '>') {
+								saveComment(b, w, this);
+								b.setLength(0);
+								mode = INTEXT;
+							}
+							break;
+						case INTAGWHITESPACE :
+							if (c == '"') {
+								mode = INTAGQUOTES;
+								b.append(c);
+							} else if (c == '\'') {
+								// e.g. <div align = 'center'> (avian)
+								mode = INTAGSQUOTES;
+								b.append(c);
+							} else if (c == '>') {
+								if (!killTag)
+									processTag(splitTag, w, this);
+								killTag = false;
+								splitTag.clear();
+								mode = INTEXT;
+							} else if (HTMLDecoder.isWhitespace(c)) {
+								// More whitespace, what fun
+							} else {
+								mode = INTAG;
+								b.append(c);
+							}
+					}
+				}
+			}
+			return temp;
+		}
+
+		int mode;
+		static final int INTEXT = 0;
+		static final int INTAG = 1;
+		static final int INTAGQUOTES = 2;
+		static final int INTAGSQUOTES = 3;
+		static final int INTAGCOMMENT = 4;
+		static final int INTAGCOMMENTCLOSING = 5;
+		static final int INTAGWHITESPACE = 6;
+		boolean killTag = false; // just this one
+		boolean writeStyleScriptWithTag = false; // just this one
+		boolean expectingBadComment = false;
+		// has to be set on or off explicitly by tags
+		boolean inStyle = false; // has to be set on or off explicitly by tags
+		boolean inScript = false; // has to be set on or off explicitly by tags
+		boolean killText = false; // has to be set on or off explicitly by tags
+		int styleScriptRecurseCount = 0;
+		String currentStyleScriptChunk = new String();
+		String writeAfterTag = "";
+	}
+
+	void saveText(StringBuffer s, Writer w, HTMLParseContext pc)
+		throws IOException {
+		if (pc.killText) {
+			return;
+		}
+
+		String style = s.toString();
+		if (pc.inStyle) {
+			pc.currentStyleScriptChunk += style;
+			return; // is parsed and written elsewhere
+		}
+		w.write(style);
+	}
+
+	void processTag(Vector splitTag, Writer w, HTMLParseContext pc)
+		throws IOException, DataFilterException {
+		// First, check that it is a recognized tag
+		ParsedTag t = new ParsedTag(splitTag);
+		if (!pc.killTag) {
+			t = t.sanitize(pc);
+			if (t != null) {
+				boolean deletedStyle = false;
+				if (pc.writeStyleScriptWithTag) {
+					pc.writeStyleScriptWithTag = false;
+					String style = pc.currentStyleScriptChunk;
+					if (style == null || style.length() == 0)
+						pc.writeAfterTag += "<!-- deleted unknown style -->";
+					else
+						w.write(style);
+					pc.currentStyleScriptChunk = "";
+				}
+				t.write(w);
+				if (pc.writeAfterTag.length() > 0) {
+					w.write(pc.writeAfterTag);
+					pc.writeAfterTag = "";
+				}
+			} else
+				pc.writeStyleScriptWithTag = false;
+		} else {
+			pc.killTag = false;
+			pc.writeStyleScriptWithTag = false;
+		}
+	}
+
+	void saveComment(StringBuffer s, Writer w, HTMLParseContext pc)
+		throws IOException {
+		if (pc.expectingBadComment)
+			return; // ignore it
+
+		if (pc.inStyle || pc.inScript) {
+			pc.currentStyleScriptChunk += "<" + s + ">";
+			return; // </style> handler should write
+		}
+		if (pc.killTag) {
+			pc.killTag = false;
+			return;
+		}
+		w.write('<');
+		w.write(s.toString());
+		w.write('>');
+	}
+
+	static void throwFilterException(String s) throws DataFilterException {
+		// FIXME
+		throw new DataFilterException(s, s,
+				"The HTML filter failed to parse the page: "+s);
+	}
+
+	static class ParsedTag {
+		String element = null;
+		String[] unparsedAttrs = null;
+		boolean startSlash = false;
+		boolean endSlash = false;
+		/*
+		 * public ParsedTag(ParsedTag t) { this.element = t.element;
+		 * this.unparsedAttrs = (String[]) t.unparsedAttrs.clone();
+		 * this.startSlash = t.startSlash; this.endSlash = t.endSlash; }
+		 */
+		public ParsedTag(ParsedTag t, String[] outAttrs) {
+			this.element = t.element;
+			this.unparsedAttrs = outAttrs;
+			this.startSlash = t.startSlash;
+			this.endSlash = t.endSlash;
+		}
+
+		public ParsedTag(Vector v) {
+			int len = v.size();
+			if (len == 0)
+				return;
+			String s = (String) v.elementAt(len - 1);
+			if ((len - 1 != 0 || s.length() > 1) && s.endsWith("/")) {
+				s = s.substring(0, s.length() - 1);
+				v.setElementAt(s, len - 1);
+				if (s.length() == 0)
+					len--;
+				endSlash = true;
+				// Don't need to set it back because everything is an I-value
+			}
+			s = (String) v.elementAt(0);
+			if (s.length() > 1 && s.startsWith("/")) {
+				s = s.substring(1);
+				v.setElementAt(s, 0);
+				startSlash = true;
+			}
+			element = (String) v.elementAt(0);
+			if (len > 1) {
+				unparsedAttrs = new String[len - 1];
+				for (int x = 1; x < len; x++)
+					unparsedAttrs[x - 1] = (String) v.elementAt(x);
+			}
+		}
+
+		public ParsedTag sanitize(HTMLParseContext pc) throws DataFilterException {
+			TagVerifier tv =
+				(TagVerifier) allowedTagsVerifiers.get(element.toLowerCase());
+			if (tv == null) {
+				if (deleteWierdStuff) {
+					return null;
+				} else {
+					String err = "<!-- unknown tag ";
+					boolean safe = true;
+					for (int x = 0; x < element.length(); x++) {
+						if (!Character.isLetter(element.charAt(x))) {
+							safe = false;
+							break;
+						}
+					}
+					if (safe)
+						err += element + " ";
+					err += "-->";
+					// FIXME: Hmmm, why did we just do all this, err is not
+					// used beyond this point... (avian)
+					if (!deleteErrors)
+						throwFilterException(
+							"Unknown tag: " + HTMLEncoder.encode(element));
+					return null;
+				}
+			}
+			return tv.sanitize(this, pc);
+		}
+
+		public String toString() {
+			if (element == null)
+				return null;
+			StringBuffer sb = new StringBuffer("<");
+			if (startSlash)
+				sb.append('/');
+			sb.append(element);
+			if (unparsedAttrs != null) {
+				int n = unparsedAttrs.length;
+				for (int i = 0; i < n; i++) {
+					sb.append(' ').append(unparsedAttrs[i]);
+				}
+			}
+			if (endSlash)
+				sb.append(" /");
+			sb.append('>');
+			return sb.toString();
+		}
+
+		public void write(Writer w) throws IOException {
+			String s = toString();
+			if (s != null)
+				w.write(s);
+		}
+	}
+
+	static final Hashtable allowedTagsVerifiers = new Hashtable();
+	static final String[] emptyStringArray = new String[0];
+
+	static {
+		allowedTagsVerifiers.put("?xml", new XmlTagVerifier());
+		allowedTagsVerifiers.put(
+			"!doctype",
+			new DocTypeTagVerifier("!doctype"));
+		allowedTagsVerifiers.put("html", new HtmlTagVerifier());
+		allowedTagsVerifiers.put(
+			"head",
+			new TagVerifier(
+				"head",
+				new String[] { "id" },
+				new String[] { "profile" }));
+		allowedTagsVerifiers.put(
+			"title",
+			new TagVerifier("title", new String[] { "id" }));
+		allowedTagsVerifiers.put("meta", new MetaTagVerifier());
+		allowedTagsVerifiers.put(
+			"body",
+			new CoreTagVerifier(
+				"body",
+				new String[] { "bgcolor", "text", "link", "vlink", "alink" },
+				new String[] { "background" },
+				new String[] { "onload", "onunload" }));
+		String[] group =
+			{ "div", "h1", "h2", "h3", "h4", "h5", "h6", "p", "caption" };
+		for (int x = 0; x < group.length; x++)
+			allowedTagsVerifiers.put(
+				group[x],
+				new CoreTagVerifier(
+					group[x],
+					new String[] { "align" },
+					emptyStringArray,
+					emptyStringArray));
+		String[] group2 =
+			{
+				"span",
+				"address",
+				"em",
+				"strong",
+				"dfn",
+				"code",
+				"samp",
+				"kbd",
+				"var",
+				"cite",
+				"abbr",
+				"acronym",
+				"sub",
+				"sup",
+				"dt",
+				"dd",
+				"tt",
+				"i",
+				"b",
+				"big",
+				"small",
+				"strike",
+				"s",
+				"u",
+				"noframes",
+				"fieldset",
+				"noscript",
+				"xmp",
+				"listing",
+				"plaintext",
+				"center",
+				"bdo" };
+		for (int x = 0; x < group2.length; x++)
+			allowedTagsVerifiers.put(
+				group2[x],
+				new CoreTagVerifier(
+					group2[x],
+					emptyStringArray,
+					emptyStringArray,
+					emptyStringArray));
+		allowedTagsVerifiers.put(
+			"blockquote",
+			new CoreTagVerifier(
+				"blockquote",
+				emptyStringArray,
+				new String[] { "cite" },
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"q",
+			new CoreTagVerifier(
+				"q",
+				emptyStringArray,
+				new String[] { "cite" },
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"br",
+			new BaseCoreTagVerifier(
+				"br",
+				new String[] { "clear" },
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"pre",
+			new CoreTagVerifier(
+				"pre",
+				new String[] { "width", "xml:space" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"ins",
+			new CoreTagVerifier(
+				"ins",
+				new String[] { "datetime" },
+				new String[] { "cite" },
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"del",
+			new CoreTagVerifier(
+				"del",
+				new String[] { "datetime" },
+				new String[] { "cite" },
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"ul",
+			new CoreTagVerifier(
+				"ul",
+				new String[] { "type", "compact" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"ol",
+			new CoreTagVerifier(
+				"ol",
+				new String[] { "type", "compact", "start" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"li",
+			new CoreTagVerifier(
+				"li",
+				new String[] { "type", "value" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"dl",
+			new CoreTagVerifier(
+				"dl",
+				new String[] { "compact" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"dir",
+			new CoreTagVerifier(
+				"dir",
+				new String[] { "compact" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"menu",
+			new CoreTagVerifier(
+				"menu",
+				new String[] { "compact" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"table",
+			new CoreTagVerifier(
+				"table",
+				new String[] {
+					"summary",
+					"width",
+					"border",
+					"frame",
+					"rules",
+					"cellspacing",
+					"cellpadding",
+					"align",
+					"bgcolor" },
+				new String[] { "background" },
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"thead",
+			new CoreTagVerifier(
+				"thead",
+				new String[] { "align", "char", "charoff", "valign" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"tfoot",
+			new CoreTagVerifier(
+				"tfoot",
+				new String[] { "align", "char", "charoff", "valign" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"tbody",
+			new CoreTagVerifier(
+				"tbody",
+				new String[] { "align", "char", "charoff", "valign" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"colgroup",
+			new CoreTagVerifier(
+				"colgroup",
+				new String[] {
+					"span",
+					"width",
+					"align",
+					"char",
+					"charoff",
+					"valign" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"col",
+			new CoreTagVerifier(
+				"col",
+				new String[] {
+					"span",
+					"width",
+					"align",
+					"char",
+					"charoff",
+					"valign" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"tr",
+			new CoreTagVerifier(
+				"tr",
+				new String[] {
+					"align",
+					"char",
+					"charoff",
+					"valign",
+					"bgcolor" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"th",
+			new CoreTagVerifier(
+				"th",
+				new String[] {
+					"abbr",
+					"axis",
+					"headers",
+					"scope",
+					"rowspan",
+					"colspan",
+					"align",
+					"char",
+					"charoff",
+					"valign",
+					"nowrap",
+					"bgcolor",
+					"width",
+					"height" },
+				new String[] { "background" },
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"td",
+			new CoreTagVerifier(
+				"td",
+				new String[] {
+					"abbr",
+					"axis",
+					"headers",
+					"scope",
+					"rowspan",
+					"colspan",
+					"align",
+					"char",
+					"charoff",
+					"valign",
+					"nowrap",
+					"bgcolor",
+					"width",
+					"height" },
+				new String[] { "background" },
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"a",
+			new LinkTagVerifier(
+				"a",
+				new String[] {
+					"accesskey",
+					"tabindex",
+					"name",
+					"shape",
+					"coords",
+					"target" },
+				emptyStringArray,
+				new String[] { "onfocus", "onblur" }));
+		allowedTagsVerifiers.put(
+			"link",
+			new LinkTagVerifier(
+				"link",
+				new String[] { "media", "target" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"base",
+			new TagVerifier(
+				"base",
+				new String[] { "id", "target" },
+				new String[] { "href" }));
+		allowedTagsVerifiers.put(
+			"img",
+			new CoreTagVerifier(
+				"img",
+				new String[] {
+					"alt",
+					"name",
+					"height",
+					"width",
+					"ismap",
+					"align",
+					"border",
+					"hspace",
+					"vspace" },
+				new String[] { "src", "longdesc", "usemap" },
+				emptyStringArray));
+		// FIXME: object tag -
+		// http://www.w3.org/TR/html4/struct/objects.html#h-13.3
+		// FIXME: param tag -
+		// http://www.w3.org/TR/html4/struct/objects.html#h-13.3.2
+		// applet tag PROHIBITED - we do not support applets (FIXME?)
+		allowedTagsVerifiers.put(
+			"map",
+			new CoreTagVerifier(
+				"map",
+				new String[] { "name" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"area",
+			new CoreTagVerifier(
+				"area",
+				new String[] {
+					"accesskey",
+					"tabindex",
+					"shape",
+					"coords",
+					"nohref",
+					"alt",
+					"target" },
+				new String[] { "href" },
+				new String[] { "onfocus", "onblur" }));
+		allowedTagsVerifiers.put("style", new StyleTagVerifier());
+		allowedTagsVerifiers.put(
+			"font",
+			new BaseCoreTagVerifier(
+				"font",
+				new String[] { "size", "color", "face" },
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"basefont",
+			new BaseCoreTagVerifier(
+				"basefont",
+				new String[] { "size", "color", "face" },
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"hr",
+			new CoreTagVerifier(
+				"hr",
+				new String[] { "align", "noshade", "size", "width" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"frameset",
+			new CoreTagVerifier(
+				"frameset",
+				new String[] { "rows", "cols" },
+				emptyStringArray,
+				new String[] { "onload", "onunload" },
+				false));
+		allowedTagsVerifiers.put(
+			"frame",
+			new BaseCoreTagVerifier(
+				"frame",
+				new String[] {
+					"name",
+					"frameborder",
+					"marginwidth",
+					"marginheight",
+					"noresize",
+					"scrolling" },
+				new String[] { "longdesc", "src" }));
+		allowedTagsVerifiers.put(
+			"iframe",
+			new BaseCoreTagVerifier(
+				"iframe",
+				new String[] {
+					"name",
+					"frameborder",
+					"marginwidth",
+					"marginheight",
+					"scrolling",
+					"align",
+					"height",
+					"width" },
+				new String[] { "longdesc", "src" }));
+		
+		// FIXME no form support for now; when we have NIM posting support, reinstate, and
+		// LIMIT TO METHOD=GET !!!
+		
+//		allowedTagsVerifiers.put(
+//			"form",
+//			new CoreTagVerifier(
+//				"form",
+//				new String[] {
+//					"method",
+//					"name",
+//					"enctype",
+//					"accept",
+//					"accept-charset",
+//					"target" },
+//				new String[] { "action" },
+//				new String[] { "onsubmit", "onreset" }));
+		allowedTagsVerifiers.put(
+			"input",
+			new CoreTagVerifier(
+				"input",
+				new String[] {
+					"accesskey",
+					"tabindex",
+					"type",
+					"name",
+					"value",
+					"checked",
+					"disabled",
+					"readonly",
+					"size",
+					"maxlength",
+					"alt",
+					"ismap",
+					"accept",
+					"align" },
+				new String[] { "src", "usemap" },
+				new String[] { "onfocus", "onblur", "onselect", "onchange" }));
+		allowedTagsVerifiers.put(
+			"button",
+			new CoreTagVerifier(
+				"button",
+				new String[] {
+					"accesskey",
+					"tabindex",
+					"name",
+					"value",
+					"type",
+					"disabled" },
+				emptyStringArray,
+				new String[] { "onfocus", "onblur" }));
+		allowedTagsVerifiers.put(
+			"select",
+			new CoreTagVerifier(
+				"select",
+				new String[] {
+					"name",
+					"size",
+					"multiple",
+					"disabled",
+					"tabindex" },
+				emptyStringArray,
+				new String[] { "onfocus", "onblur", "onchange" }));
+		allowedTagsVerifiers.put(
+			"optgroup",
+			new CoreTagVerifier(
+				"optgroup",
+				new String[] { "disabled", "label" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"option",
+			new CoreTagVerifier(
+				"option",
+				new String[] { "selected", "disabled", "label", "value" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"textarea",
+			new CoreTagVerifier(
+				"textarea",
+				new String[] {
+					"accesskey",
+					"tabindex",
+					"name",
+					"rows",
+					"cols",
+					"disabled",
+					"readonly" },
+				emptyStringArray,
+				new String[] { "onfocus", "onblur", "onselect", "onchange" }));
+		allowedTagsVerifiers.put(
+			"isindex",
+			new BaseCoreTagVerifier(
+				"isindex",
+				new String[] { "prompt" },
+				emptyStringArray));
+		allowedTagsVerifiers.put(
+			"label",
+			new CoreTagVerifier(
+				"label",
+				new String[] { "for", "accesskey" },
+				emptyStringArray,
+				new String[] { "onfocus", "onblur" }));
+		allowedTagsVerifiers.put(
+			"legend",
+			new CoreTagVerifier(
+				"legend",
+				new String[] { "accesskey", "align" },
+				emptyStringArray,
+				emptyStringArray));
+		allowedTagsVerifiers.put("script", new ScriptTagVerifier());
+	}
+
+	static class TagVerifier {
+		final String tag;
+		final HashSet allowedAttrs;
+		final HashSet uriAttrs;
+
+		TagVerifier(String tag, String[] allowedAttrs) {
+			this(tag, allowedAttrs, null);
+		}
+
+		TagVerifier(String tag, String[] allowedAttrs, String[] uriAttrs) {
+			this.tag = tag;
+			this.allowedAttrs = new HashSet();
+			if (allowedAttrs != null) {
+				for (int x = 0; x < allowedAttrs.length; x++)
+					this.allowedAttrs.add(allowedAttrs[x]);
+			}
+			this.uriAttrs = new HashSet();
+			if (uriAttrs != null) {
+				for (int x = 0; x < uriAttrs.length; x++)
+					this.uriAttrs.add(uriAttrs[x]);
+			}
+		}
+
+		ParsedTag sanitize(ParsedTag t, HTMLParseContext pc) throws DataFilterException {
+			Hashtable h = new Hashtable();
+			boolean equals = false;
+			String prevX = "";
+			if (t.unparsedAttrs != null)
+				for (int i = 0; i < t.unparsedAttrs.length; i++) {
+					String s = t.unparsedAttrs[i];
+					if (equals) {
+						equals = false;
+						s = stripQuotes(s);
+						h.remove(prevX);
+						h.put(prevX, s);
+						prevX = "";
+					} else {
+						int idx = s.indexOf('=');
+						if (idx == s.length() - 1) {
+							equals = true;
+							if (idx == 0) {
+								// prevX already set
+							} else {
+								prevX = s.substring(0, s.length() - 1);
+								prevX = prevX.toLowerCase();
+							}
+						} else if (idx > -1) {
+							String x = s.substring(0, idx);
+							if (x.length() == 0)
+								x = prevX;
+							x = x.toLowerCase();
+							String y;
+							if (idx == s.length() - 1)
+								y = "";
+							else
+								y = s.substring(idx + 1, s.length());
+							y = stripQuotes(y);
+							h.remove(x);
+							h.put(x, y);
+							prevX = x;
+						} else {
+							h.remove(s);
+							h.put(s, new Object());
+							prevX = s;
+						}
+					}
+				}
+			h = sanitizeHash(h, t, pc);
+			if (h == null)
+				return null;
+			if (t.startSlash)
+				return new ParsedTag(t, null);
+			String[] outAttrs = new String[h.size()];
+			int i = 0;
+			for (Enumeration e = h.keys(); e.hasMoreElements();) {
+				String x = (String) e.nextElement();
+				Object o = h.get(x);
+				String y;
+				if (o instanceof String)
+					y = (String) o;
+				else
+					y = null;
+				String out = x;
+				if (y != null)
+					out += "=\"" + y + '"';
+				outAttrs[i++] = out;
+			}
+			return new ParsedTag(t, outAttrs);
+		}
+
+		Hashtable sanitizeHash(
+			Hashtable h,
+			ParsedTag p,
+			HTMLParseContext pc) throws DataFilterException {
+			Hashtable hn = new Hashtable();
+			for (Enumeration e = h.keys(); e.hasMoreElements();) {
+				String x = (String) e.nextElement();
+				Object o = h.get(x);
+				// Straight attribs
+				if (allowedAttrs.contains(x)) {
+					hn.put(x, o);
+					continue;
+				}
+				if (uriAttrs.contains(x)) {
+					// URI
+					if (o instanceof String) {
+						// Java's URL handling doesn't seem suitable
+						String uri = (String) o;
+						uri = HTMLDecoder.decode(uri);
+						uri = sanitizeURI(uri, null, null, pc.cb);
+						if (uri != null) {
+							uri = HTMLEncoder.encode(uri);
+							hn.put(x, uri);
+						}
+					}
+					// FIXME: rewrite absolute URLs, handle ?date= etc
+				}
+			}
+			// lang, xml:lang and dir can go on anything
+			// lang or xml:lang = language [ "-" country [ "-" variant ] ]
+			// The variant can be just about anything; no way to test (avian)
+			String s = getHashString(h, "lang");
+			if (s != null)
+				hn.put("lang", s);
+			s = getHashString(h, "xml:lang");
+			if (s != null)
+				hn.put("xml:lang", s);
+			s = getHashString(h, "dir");
+			if (s != null
+				&& (s.equalsIgnoreCase("ltr") || s.equalsIgnoreCase("rtl")))
+				hn.put("dir", s);
+			return hn;
+		}
+	}
+
+	static String stripQuotes(String s) {
+		final String quotes = "\"'";
+		if (s.length() >= 2) {
+			int n = quotes.length();
+			for (int x = 0; x < n; x++) {
+				char cc = quotes.charAt(x);
+				if (s.charAt(0) == cc && s.charAt(s.length() - 1) == cc) {
+					if (s.length() > 2)
+						s = s.substring(1, s.length() - 1);
+					else
+						s = "";
+					break;
+				}
+			}
+		}
+		return s;
+	}
+
+	//	static String[] titleString = new String[] {"title"};
+
+	static abstract class ScriptStyleTagVerifier extends TagVerifier {
+		ScriptStyleTagVerifier(
+			String tag,
+			String[] allowedAttrs,
+			String[] uriAttrs) {
+			super(tag, allowedAttrs, uriAttrs);
+		}
+
+		abstract void setStyle(boolean b, HTMLParseContext pc);
+
+		abstract boolean getStyle(HTMLParseContext pc);
+
+		abstract void processStyle(HTMLParseContext pc);
+
+		Hashtable sanitizeHash(
+			Hashtable h,
+			ParsedTag p,
+			HTMLParseContext pc) throws DataFilterException {
+			Hashtable hn = super.sanitizeHash(h, p, pc);
+			if (p.startSlash) {
+				return finish(h, hn, pc);
+			} else {
+				return start(h, hn, pc);
+			}
+		}
+
+		Hashtable finish(
+			Hashtable h,
+			Hashtable hn,
+			HTMLParseContext pc) throws DataFilterException {
+			// Finishing
+			pc.styleScriptRecurseCount--;
+			if (pc.styleScriptRecurseCount < 0) {
+				if (deleteErrors)
+					pc.writeAfterTag
+						+= "<!-- Too many nested style or script tags - ambiguous or invalid parsing -->";
+				else
+					throwFilterException("Too many nested </style> tags - ambiguous or invalid parsing, can't reliably filter so removing the inner tags - garbage may appear in browser");
+				return null;
+			}
+			setStyle(false, pc);
+			processStyle(pc);
+			pc.expectingBadComment = false;
+			pc.writeStyleScriptWithTag = true;
+			// Pass it on, no params for </style>
+			return hn;
+		}
+
+		Hashtable start(Hashtable h, Hashtable hn, HTMLParseContext pc) throws DataFilterException {
+			pc.styleScriptRecurseCount++;
+			if (pc.styleScriptRecurseCount > 1) {
+				if (deleteErrors)
+					pc.writeAfterTag
+						+= "<!-- Too many nested style or script tags -->";
+				else
+					throwFilterException("Too many nested </style> tags - ambiguous or invalid parsing, can't reliably filter so removing the inner tags - garbage may appear in browser");
+				return null;
+			}
+			setStyle(true, pc);
+			String type = getHashString(h, "type");
+			if (type != null) {
+				if (!type.equalsIgnoreCase("text/css") /* FIXME */
+					) {
+					pc.killText = true;
+					pc.expectingBadComment = true;
+					return null; // kill the tag
+				}
+				hn.put("type", "text/css");
+			}
+			return hn;
+		}
+	}
+
+	static class StyleTagVerifier extends ScriptStyleTagVerifier {
+		StyleTagVerifier() {
+			super(
+				"style",
+				new String[] { "id", "media", "title", "xml:space" },
+				emptyStringArray);
+		}
+
+		void setStyle(boolean b, HTMLParseContext pc) {
+			pc.inStyle = b;
+		}
+
+		boolean getStyle(HTMLParseContext pc) {
+			return pc.inStyle;
+		}
+
+		void processStyle(HTMLParseContext pc) {
+			try {
+				pc.currentStyleScriptChunk =
+					sanitizeStyle(pc.currentStyleScriptChunk, pc.cb);
+			} catch (DataFilterException e) {
+				Logger.error(this, "Error parsing style: "+e, e);
+				pc.currentStyleScriptChunk = "";
+			}
+		}
+	}
+
+	static class ScriptTagVerifier extends ScriptStyleTagVerifier {
+		ScriptTagVerifier() {
+			super(
+				"script",
+				new String[] {
+					"id",
+					"charset",
+					"type",
+					"language",
+					"defer",
+					"xml:space" },
+				new String[] { "src" });
+			/*
+			 * FIXME: src not supported type ignored (we will need to check
+			 * this when if/when we support scripts charset ignored
+			 */
+		}
+
+		Hashtable sanitizeHash(
+			Hashtable hn,
+			ParsedTag p,
+			HTMLParseContext pc) {
+			//Hashtable h = super.sanitizeHash(hn, p, pc);
+			return null; // Lose the tags
+		}
+
+		void setStyle(boolean b, HTMLParseContext pc) {
+			pc.inScript = b;
+		}
+
+		boolean getStyle(HTMLParseContext pc) {
+			return pc.inScript;
+		}
+
+		void processStyle(HTMLParseContext pc) {
+			pc.currentStyleScriptChunk =
+				sanitizeScripting(pc.currentStyleScriptChunk);
+		}
+	}
+
+	static class BaseCoreTagVerifier extends TagVerifier {
+		BaseCoreTagVerifier(
+			String tag,
+			String[] allowedAttrs,
+			String[] uriAttrs) {
+			super(tag, allowedAttrs, uriAttrs);
+		}
+
+		Hashtable sanitizeHash(
+			Hashtable h,
+			ParsedTag p,
+			HTMLParseContext pc) throws DataFilterException {
+			Hashtable hn = super.sanitizeHash(h, p, pc);
+			// %i18n dealt with by TagVerifier
+			// %coreattrs
+			String id = getHashString(h, "id");
+			if (id != null) {
+				hn.put("id", id);
+				// hopefully nobody will be stupid enough to encode URLs into
+				// the unique ID... :)
+			}
+			String classNames = getHashString(h, "class");
+			if (classNames != null) {
+				hn.put("class", classNames);
+				// ditto
+			}
+			String style = getHashString(h, "style");
+			if (style != null) {
+				style = sanitizeStyle(style, pc.cb);
+				if (style != null)
+					style = escapeQuotes(style);
+				if (style != null)
+					hn.put("style", style);
+			}
+			String title = getHashString(h, "title");
+			if (title != null) {
+				// PARANOIA: title is PLAIN TEXT, right? In all user agents? :)
+				hn.put("title", title);
+			}
+			return hn;
+		}
+	}
+
+	static class CoreTagVerifier extends BaseCoreTagVerifier {
+		final HashSet eventAttrs;
+		static final String[] stdEvents =
+			new String[] {
+				"onclick",
+				"ondblclick",
+				"onmousedown",
+				"onmouseup",
+				"onmouseover",
+				"onmousemove",
+				"onmouseout",
+				"onkeypress",
+				"onkeydown",
+				"onkeyup" };
+
+		CoreTagVerifier(
+			String tag,
+			String[] allowedAttrs,
+			String[] uriAttrs,
+			String[] eventAttrs) {
+			this(tag, allowedAttrs, uriAttrs, eventAttrs, true);
+		}
+
+		CoreTagVerifier(
+			String tag,
+			String[] allowedAttrs,
+			String[] uriAttrs,
+			String[] eventAttrs,
+			boolean addStdEvents) {
+			super(tag, allowedAttrs, uriAttrs);
+			this.eventAttrs = new HashSet();
+			if (eventAttrs != null) {
+				for (int x = 0; x < eventAttrs.length; x++)
+					this.eventAttrs.add(eventAttrs[x]);
+			}
+			if (addStdEvents) {
+				for (int x = 0; x < stdEvents.length; x++)
+					this.eventAttrs.add(stdEvents[x]);
+			}
+		}
+
+		Hashtable sanitizeHash(
+			Hashtable h,
+			ParsedTag p,
+			HTMLParseContext pc) throws DataFilterException {
+			Hashtable hn = super.sanitizeHash(h, p, pc);
+			// events (default and added)
+			for (Iterator e = eventAttrs.iterator(); e.hasNext();) {
+				String name = (String) e.next();
+				String arg = getHashString(h, name);
+				if (arg != null) {
+					arg = sanitizeScripting(arg);
+					if (arg != null)
+						hn.put(name, arg);
+				}
+			}
+			return hn;
+		}
+	}
+
+	static class LinkTagVerifier extends CoreTagVerifier {
+		LinkTagVerifier(
+			String tag,
+			String[] allowedAttrs,
+			String[] uriAttrs,
+			String[] eventAttrs) {
+			super(tag, allowedAttrs, uriAttrs, eventAttrs);
+		}
+
+		Hashtable sanitizeHash(
+			Hashtable h,
+			ParsedTag p,
+			HTMLParseContext pc) throws DataFilterException {
+			Hashtable hn = super.sanitizeHash(h, p, pc);
+			String hreflang = getHashString(h, "hreflang");
+			String charset = null;
+			String type = getHashString(h, "type");
+			if (type != null) {
+				String[] typesplit = splitType(type);
+				type = typesplit[0];
+				if (typesplit[1] != null && typesplit[1].length() > 0)
+					charset = typesplit[1];
+				Logger.debug(
+					this,
+					"Processing link tag, type="
+						+ type
+						+ ", charset="
+						+ charset);
+			}
+			String c = getHashString(h, "charset");
+			if (c != null)
+				charset = c;
+			String href = getHashString(h, "href");
+			if (href != null) {
+				final String[] rels = new String[] { "rel", "rev" };
+				for (int x = 0; x < rels.length; x++) {
+					String reltype = rels[x];
+					String rel = getHashString(h, reltype);
+					if (rel != null) {
+						StringTokenizer tok = new StringTokenizer(rel, " ");
+						while (tok.hasMoreTokens()) {
+							String t = tok.nextToken();
+							if (t.equalsIgnoreCase("alternate")
+								|| t.equalsIgnoreCase("stylesheet")) {
+								// FIXME: hardcoding text/css
+								type = "text/css";
+							} // FIXME: do we want to do anything with the
+							// other possible rel's?
+						}
+						hn.put(reltype, rel);
+					}
+				}
+				//				Core.logger.log(this, "Sanitizing URI: "+href+" with type "+
+				//					type+" and charset "+charset,
+				//					Logger.DEBUG);
+				href = HTMLDecoder.decode(href);
+				href = sanitizeURI(href, type, charset, pc.cb);
+				if (href != null) {
+					href = HTMLEncoder.encode(href);
+					hn.put("href", href);
+					if (type != null)
+						hn.put("type", type);
+					if (charset != null)
+						hn.put("charset", charset);
+					if (charset != null && hreflang != null)
+						hn.put("hreflang", hreflang);
+				}
+			}
+			// FIXME: allow these if the charset and encoding are encoded into
+			// the URL
+			// FIXME: link types -
+			// http://www.w3.org/TR/html4/types.html#type-links - the
+			// stylesheet stuff, primarily - rel and rev properties - parse
+			// these, use same fix as above (browser may assume text/css for
+			// anything linked as a stylesheet)
+			return hn;
+		}
+	}
+
+	static class MetaTagVerifier extends TagVerifier {
+		MetaTagVerifier() {
+			super("meta", new String[] { "id" });
+		}
+
+		Hashtable sanitizeHash(
+			Hashtable h,
+			ParsedTag p,
+			HTMLParseContext pc,
+			int linkHtl) throws DataFilterException {
+			Hashtable hn = super.sanitizeHash(h, p, pc);
+			/*
+			 * Several possibilities: a) meta http-equiv=X content=Y b) meta
+			 * name=X content=Y
+			 */
+			String http_equiv = getHashString(h, "http-equiv");
+			String name = getHashString(h, "name");
+			String content = getHashString(h, "content");
+			String scheme = getHashString(h, "scheme");
+			if (content != null) {
+				if (name != null && http_equiv == null) {
+					if (name.equalsIgnoreCase("Author")) {
+						hn.put("name", name);
+						hn.put("content", content);
+					} else if (name.equalsIgnoreCase("Keywords")) {
+						hn.put("name", name);
+						hn.put("content", content);
+					} else if (name.equalsIgnoreCase("Description")) {
+						hn.put("name", name);
+						hn.put("content", content);
+					}
+				} else if (http_equiv != null && name == null) {
+					if (http_equiv.equalsIgnoreCase("Expires")) {
+						hn.put("http-equiv", http_equiv);
+						hn.put("content", content);
+					} else if (
+						http_equiv.equalsIgnoreCase("Content-Script-Type")) {
+						// We don't support script at this time.
+					} else if (
+						http_equiv.equalsIgnoreCase("Content-Style-Type")) {
+						// FIXME: charsets
+						if (content.equalsIgnoreCase("text/css")) {
+							// FIXME: selectable style languages - only matters
+							// when we have implemented more than one
+							// FIXME: if we ever do allow it... the spec
+							// http://www.w3.org/TR/html4/present/styles.html#h-14.2.1
+							// says only the last definition counts...
+							//        but it only counts if it's in the HEAD section,
+							// so we DONT need to parse the whole doc
+							hn.put("http-equiv", http_equiv);
+							hn.put("content", content);
+						}
+						// FIXME: add some more headers - Dublin Core?
+					} else if (http_equiv.equalsIgnoreCase("Content-Type")) {
+						String[] typesplit = splitType(content);
+						if (typesplit[0].equalsIgnoreCase("text/html")
+							&& (typesplit[1] == null
+								|| typesplit[1].equalsIgnoreCase(pc.charset))) {
+							hn.put("http-equiv", http_equiv);
+							hn.put(
+								"content",
+								typesplit[0]
+									+ (typesplit[1] != null
+										? "; charset=" + typesplit[1]
+										: ""));
+						}
+						if(typesplit[1] != null)
+							pc.detectedCharset = typesplit[1];
+					} else if (
+						http_equiv.equalsIgnoreCase("Content-Language")) {
+						hn.put("http-equiv", "Content-Language");
+						hn.put("content", content);
+					}
+				}
+			}
+			if (hn.isEmpty())
+				return null;
+			return hn;
+		}
+	}
+
+	static class DocTypeTagVerifier extends TagVerifier {
+		DocTypeTagVerifier(String tag) {
+			super(tag, null);
+		}
+
+		static final Hashtable DTDs = new Hashtable();
+
+		static {
+			DTDs.put(
+				"-//W3C//DTD XHTML 1.0 Strict//EN",
+				"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd");
+			DTDs.put(
+				"-//W3C//DTD XHTML 1.0 Transitional//EN",
+				"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd");
+			DTDs.put(
+				"-//W3C//DTD XHTML 1.0 Frameset//EN",
+				"http://www.w3.org/TR/xhtml1/DTD/xhtml1-frameset.dtd");
+			DTDs.put(
+				"-//W3C//DTD HTML 4.01//EN",
+				"http://www.w3.org/TR/html4/strict.dtd");
+			DTDs.put(
+				"-//W3C//DTD HTML 4.01 Transitional//EN",
+				"http://www.w3.org/TR/html4/loose.dtd");
+			DTDs.put(
+				"-//W3C//DTD HTML 4.01 Frameset//EN",
+				"http://www.w3.org/TR/html4/frameset.dtd");
+			DTDs.put("-//W3C//DTD HTML 3.2 Final//EN", new Object());
+		}
+
+		ParsedTag sanitize(ParsedTag t, HTMLParseContext pc) {
+			if (!(t.unparsedAttrs.length == 3 || t.unparsedAttrs.length == 4))
+				return null;
+			if (!t.unparsedAttrs[0].equalsIgnoreCase("html"))
+				return null;
+			if (!t.unparsedAttrs[1].equalsIgnoreCase("public"))
+				return null;
+			String s = stripQuotes(t.unparsedAttrs[2]);
+			if (!DTDs.containsKey(s))
+				return null;
+			if (t.unparsedAttrs.length == 4) {
+				String ss = stripQuotes(t.unparsedAttrs[3]);
+				String spec = getHashString(DTDs, s);
+				if (spec != null && !spec.equals(ss))
+					return null;
+			}
+			return t;
+		}
+	}
+
+	static class XmlTagVerifier extends TagVerifier {
+		XmlTagVerifier() {
+			super("?xml", null);
+		}
+
+		ParsedTag sanitize(ParsedTag t, HTMLParseContext pc) {
+			if (t.unparsedAttrs.length != 2)
+				return null;
+			if (!t.unparsedAttrs[0].equals("version=\"1.0\""))
+				return null;
+			if (!t.unparsedAttrs[1].startsWith("encoding=\"")
+				&& !t.unparsedAttrs[1].endsWith("\"?"))
+				return null;
+			if (!t
+				.unparsedAttrs[1]
+				.substring(10, t.unparsedAttrs[1].length() - 2)
+				.equalsIgnoreCase(pc.charset))
+				return null;
+			return t;
+		}
+	}
+
+	static class HtmlTagVerifier extends TagVerifier {
+		HtmlTagVerifier() {
+			super("html", new String[] { "id", "version" });
+		}
+
+		Hashtable sanitizeHash(
+			Hashtable h,
+			ParsedTag p,
+			HTMLParseContext pc,
+			int linkHtl) throws DataFilterException {
+			Hashtable hn = super.sanitizeHash(h, p, pc);
+			String xmlns = getHashString(h, "xmlns");
+			if (xmlns != null && xmlns.equals("http://www.w3.org/1999/xhtml"))
+				hn.put("xmlns", xmlns);
+			return hn;
+		}
+	}
+
+	static String sanitizeStyle(String style, FilterCallback cb) throws DataFilterException {
+		Logger.debug(
+			HTMLFilter.class,
+			"Sanitizing style: " + style);
+		Reader r = new StringReader(style);
+		Writer w = new StringWriter();
+		CSSParser pc = new CSSParser(r, w, false, cb);
+		try {
+			pc.parse();
+		} catch (IOException e) {
+			Logger.error(
+				HTMLFilter.class,
+				"IOException parsing inline CSS!");
+		} catch (Error e) {
+			if (e.getMessage().equals("Error: could not match input")) {
+				// this sucks, it should be a proper exception
+				Logger.normal(
+					HTMLFilter.class,
+					"CSS Parse Error!",
+					e);
+				return "/* Could not match input style */";
+			} else
+				throw e;
+		}
+		String s = w.toString();
+		if (s == null || s.length() == 0)
+			return null;
+		//		Core.logger.log(SaferFilter.class, "Style now: " + s, Logger.DEBUG);
+		Logger.debug(HTMLFilter.class, "Style finally: " + s);
+		return s;
+	}
+
+	static String escapeQuotes(String s) {
+		StringBuffer buf = new StringBuffer(s.length());
+		for (int x = 0; x < s.length(); x++) {
+			char c = s.charAt(x);
+			if (c == '\"') {
+				buf.append("&quot;");
+			} else {
+				buf.append(c);
+			}
+		}
+		return buf.toString();
+	}
+
+	static String sanitizeScripting(String script) {
+		// Kill it. At some point we may want to allow certain recipes - FIXME
+		return null;
+	}
+
+	static String sanitizeURI(String uri, FilterCallback cb) {
+		return sanitizeURI(uri, null, null, cb);
+	}
+
+	/*
+	 * While we're only interested in the type and the charset, the format is a
+	 * lot more flexible than that. (avian) TEXT/PLAIN; format=flowed;
+	 * charset=US-ASCII IMAGE/JPEG; name=test.jpeg; x-unix-mode=0644
+	 */
+	static String[] splitType(String type) {
+		StringFieldParser sfp;
+		String charset = null, param, name, value;
+		int x;
+
+		sfp = new StringFieldParser(type, ';');
+		type = sfp.nextField().trim();
+		while (sfp.hasMoreFields()) {
+			param = sfp.nextField();
+			x = param.indexOf('=');
+			if (x != -1) {
+				name = param.substring(0, x).trim();
+				value = param.substring(x + 1).trim();
+				if (name.equals("charset"))
+					charset = value;
+			}
+		}
+		return new String[] { type, charset };
+	}
+
+	// A simple string splitter
+	// StringTokenizer doesn't work well for our purpose. (avian)
+	static class StringFieldParser {
+		private String str;
+		private int maxPos, curPos;
+		private char c;
+
+		public StringFieldParser(String str) {
+			this(str, '\t');
+		}
+
+		public StringFieldParser(String str, char c) {
+			this.str = str;
+			this.maxPos = str.length();
+			this.curPos = 0;
+			this.c = c;
+		}
+
+		public boolean hasMoreFields() {
+			return curPos <= maxPos;
+		}
+
+		public String nextField() {
+			int start, end;
+
+			if (curPos > maxPos)
+				return null;
+			start = curPos;
+			while (curPos < maxPos && str.charAt(curPos) != c)
+				curPos++;
+			end = curPos;
+			curPos++;
+			return str.substring(start, end);
+		}
+	}
+
+	static String sanitizeURI(
+		String suri,
+		String overrideType,
+		String overrideCharset,
+		FilterCallback cb) {
+		if(overrideCharset != null && overrideCharset.length() > 0)
+			overrideType += ";charset="+overrideCharset;
+		return cb.processURI(suri, overrideType);
+	}
+
+	static String getHashString(Hashtable h, String key) {
+		Object o = h.get(key);
+		if (o == null)
+			return null;
+		if (o instanceof String)
+			return (String) o;
+		else
+			return null;
+	}
+
+}

Added: trunk/freenet/src/freenet/clients/http/filter/KnownUnsafeContentTypeException.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/KnownUnsafeContentTypeException.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/KnownUnsafeContentTypeException.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,48 @@
+package freenet.clients.http.filter;
+
+public class KnownUnsafeContentTypeException extends UnsafeContentTypeException {
+
+	MIMEType type;
+	
+	public KnownUnsafeContentTypeException(MIMEType type) {
+		this.type = type;
+	}
+
+	public String getExplanation() {
+		StringBuffer sb = new StringBuffer();
+		sb.append("<p><b>");
+		sb.append(type.readDescription);
+		sb.append("</b></p>\n" +
+			"<p>This is a potentially dangerous MIME type. If the node lets it through, your browser may " +
+			"do bad things leading to compromize of your anonymity, and your IP address being exposed in "+
+			"connection with this page. In particular:<ul>");
+		if(type.dangerousInlines) 
+			sb.append("<li><font color=\"red\"><b>Dangerous inlines:</b></font> This type of content can contain inline images or "+
+					"videos, and can therefore load content from the non-anonymous open Web, exposing your "+
+					"IP address.</li>");
+		if(type.dangerousLinks)
+			sb.append("<li><font color=\"red\"><b>Dangerous links:</b></font> This type of content can contain links to the "+
+					"non-anonymous Web; if you click on them (and they may be disguised), this may expose "+
+					"your IP address.</li>");
+		if(type.dangerousScripting)
+			sb.append("<li><font color=\"red\"><b>Dangerous scripting:</b></font> This type of content can contain dangerous scripts "+
+					"which when executed may compromize your anonymity by connecting to the open Web or "+
+					"otherwise breach security.</li>");
+		if(type.dangerousReadMetadata)
+			sb.append("<li><font color=\"red\"><b>Dangerous metadata:</b></font> This type of content can contain metadata which may "+
+					"be displayed by some browsers or other software, which may contain dangerous links or inlines.</li>");
+		
+		sb.append("</ul>Since there is no built-in filter for this data, you should take the utmost of care!");
+		
+		return sb.toString();
+	}
+
+	public String getHTMLEncodedTitle() {
+		return "Known dangerous type: "+type.primaryMimeType;
+	}
+
+	public String getRawTitle() {
+		return "Known dangerous type: "+type.primaryMimeType;
+	}
+
+}

Modified: trunk/freenet/src/freenet/clients/http/filter/MIMEType.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/MIMEType.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/MIMEType.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -5,49 +5,49 @@
  */
 public class MIMEType {
 	
-	final String primaryMimeType;
-	final String[] alternateMimeTypes;
+	public final String primaryMimeType;
+	public final String[] alternateMimeTypes;
 	
-	final String primaryExtension;
-	final String[] alternateExtensions;
+	public final String primaryExtension;
+	public final String[] alternateExtensions;
 	
 	/** Is the data safe to read as-is? This is true for text/plain. */
-	final boolean safeToRead;
+	public final boolean safeToRead;
 	
 	/** Is the data safe to write as-is? */
-	final boolean safeToWrite;
+	public final boolean safeToWrite;
 	
 	/** Content filter to make data safe to read */
-	final ContentDataFilter readFilter;
+	public final ContentDataFilter readFilter;
 	
 	/** Content filter to make data safe to write */
-	final ContentDataFilter writeFilter;
+	public final ContentDataFilter writeFilter;
 
 	// Detail. Not necessarily an exhaustive list.
 	
-	final boolean dangerousLinks;
+	public final boolean dangerousLinks;
 	
-	final boolean dangerousInlines;
+	public final boolean dangerousInlines;
 	
-	final boolean dangerousScripting;
+	public final boolean dangerousScripting;
 	
-	final boolean dangerousReadMetadata;
+	public final boolean dangerousReadMetadata;
 	
-	final boolean dangerousWriteMetadata;
+	public final boolean dangerousWriteMetadata;
 	
-	final boolean dangerousToWriteEvenWithFilter;
+	public final boolean dangerousToWriteEvenWithFilter;
 	
 	// These are in addition to the above
 	
-	final String readDescription;
+	public final String readDescription;
 	
-	final String writeDescription;
+	public final String writeDescription;
 	
-	final boolean takesACharset;
+	public final boolean takesACharset;
 	
-	final String defaultCharset;
+	public final String defaultCharset;
 	
-	final CharsetExtractor charsetExtractor;
+	public final CharsetExtractor charsetExtractor;
 	
 	MIMEType(String type, String ext, String[] extraTypes, String[] extraExts,
 			boolean safeToRead, boolean safeToWrite, ContentDataFilter readFilter,
@@ -76,4 +76,11 @@
 		this.defaultCharset = defaultCharset;
 		this.charsetExtractor = charsetExtractor;
 	}
+
+	/**
+	 * Throw an exception indicating that this is a dangerous content type.
+	 */
+	public void throwUnsafeContentTypeException() throws KnownUnsafeContentTypeException {
+		throw new KnownUnsafeContentTypeException(this);
+	}
 }

Added: trunk/freenet/src/freenet/clients/http/filter/NullFilterCallback.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/NullFilterCallback.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/NullFilterCallback.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,18 @@
+package freenet.clients.http.filter;
+
+
+public class NullFilterCallback implements FilterCallback {
+
+	public boolean allowGetForms() {
+		return false;
+	}
+
+	public boolean allowPostForms() {
+		return false;
+	}
+
+	public String processURI(String uri, String overrideType) {
+		return null;
+	}
+
+}

Added: trunk/freenet/src/freenet/clients/http/filter/UnknownContentTypeException.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/UnknownContentTypeException.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/UnknownContentTypeException.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,37 @@
+package freenet.clients.http.filter;
+
+import freenet.support.HTMLEncoder;
+
+public class UnknownContentTypeException extends UnsafeContentTypeException {
+
+	final String type;
+	final String encodedType;
+	
+	public UnknownContentTypeException(String typeName) {
+		this.type = typeName;
+		encodedType = HTMLEncoder.encode(type);
+	}
+	
+	public String getType() {
+		return type;
+	}
+
+	public String getHTMLEncodedTitle() {
+		return "Unknown and potentially dangerous content type: "+encodedType;
+	}
+
+	public String getRawTitle() {
+		return "Unknown and potentially dangerous content type: "+type;
+	}
+	
+	public String getExplanation() {
+		return "<p>Your Freenet node does not know anything about this MIME type. " +
+				"This means that your browser might do something dangerous in response " +
+				"to downloading this file. For example, many formats can contain embedded images " +
+				"or videos, which are downloaded from the web; this is by no means innocuous, " +
+				"because they can ruin your anonymity and expose your IP address (if the attacker " +
+				"runs the web site or has access to its logs). Hyperlinks to the Web can also be a " +
+				"threat, for much the same reason, as can scripting, for this and other reasons.</p>";
+	}
+	
+}

Added: trunk/freenet/src/freenet/clients/http/filter/UnsafeContentTypeException.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/UnsafeContentTypeException.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/UnsafeContentTypeException.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,26 @@
+package freenet.clients.http.filter;
+
+/**
+ * Thrown by the filter when it cannot guarantee the safety of the data, because it is an unknown type,
+ * because it cannot be filtered, or because we do not know how to filter it.
+ * 
+ * Base class for UnknownContentTypeException and KnownUnsafeContentTypeException.
+ */
+public abstract class UnsafeContentTypeException extends Exception {
+
+	/**
+	 * Get the contents of the error page.
+	 */
+	public abstract String getExplanation();
+	
+	/**
+	 * Get the title of the error page.
+	 */
+	public abstract String getHTMLEncodedTitle();
+	
+	/**
+	 * Get the raw title of the error page. (May be unsafe for HTML).
+	 */
+	public abstract String getRawTitle();
+	
+}

Added: trunk/freenet/src/freenet/clients/http/filter/Yytoken.java
===================================================================
--- trunk/freenet/src/freenet/clients/http/filter/Yytoken.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/clients/http/filter/Yytoken.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,32 @@
+package freenet.clients.http.filter;
+
+/**
+ * Parsing token.
+ *
+ * @author devrandom at hyper.to
+ */
+
+class Yytoken {
+  public int m_index;
+  public String m_text;
+  public int m_line;
+  public int m_charBegin;
+  public int m_charEnd;
+  
+  Yytoken (int index, String text, int line, int charBegin, int charEnd) {
+     m_index = index;
+    m_text = text;
+    m_line = line;
+    m_charBegin = charBegin;
+    m_charEnd = charEnd;
+  }
+
+  public String toString() {
+    return "Text   : "+m_text+
+           "\nindex : "+m_index+
+           "\nline  : "+m_line+
+           "\ncBeg. : "+m_charBegin+
+           "\ncEnd. : "+m_charEnd;
+  }
+}
+

Modified: trunk/freenet/src/freenet/node/TextModeClientInterface.java
===================================================================
--- trunk/freenet/src/freenet/node/TextModeClientInterface.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/node/TextModeClientInterface.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -429,11 +429,11 @@
             	// FIXME depends on CHK's still being renamable
                 //uri = uri.setDocName(f.getName());
             	
-                outsb.append("URI: "+uri);
+                outsb.append("URI: "+uri+"\n");
             	long endTime = System.currentTimeMillis();
                 long sz = f.length();
                 double rate = 1000.0 * sz / (endTime-startTime);
-                outsb.append("Upload rate: "+rate+" bytes / second");
+                outsb.append("Upload rate: "+rate+" bytes / second\n");
             } catch (FileNotFoundException e1) {
                 outsb.append("File not found");
             } catch (InserterException e) {

Modified: trunk/freenet/src/freenet/node/Version.java
===================================================================
--- trunk/freenet/src/freenet/node/Version.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/node/Version.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -20,7 +20,7 @@
 	public static final String protocolVersion = "1.0";
 
 	/** The build number of the current revision */
-	private static final int buildNumber = 543;
+	private static final int buildNumber = 544;
 
 	/** Oldest build of Fred we will talk to */
 	private static final int lastGoodBuild = 507;

Added: trunk/freenet/src/freenet/support/io/NullWriter.java
===================================================================
--- trunk/freenet/src/freenet/support/io/NullWriter.java	2006-03-18 14:52:17 UTC (rev 8274)
+++ trunk/freenet/src/freenet/support/io/NullWriter.java	2006-03-18 15:18:54 UTC (rev 8275)
@@ -0,0 +1,17 @@
+package freenet.support.io;
+
+import java.io.IOException;
+import java.io.Writer;
+
+public class NullWriter extends Writer {
+
+	public void write(char[] cbuf, int off, int len) throws IOException {
+	}
+
+	public void flush() throws IOException {
+	}
+
+	public void close() throws IOException {
+	}
+
+}




More information about the cvs mailing list