/*
*/ import java.io.*; /* * This class will search through all of the words in the * WordNet dictionary (available at http://www.cogsci.princeton.edu/~wn) * and write all of the words that only contain the hex * characters "abcdef" to an output file. Optionally, * you can set the global usePseudoChars variable to true * and receive an output of words containing "abcdeflisoz", * where the number 1 can be substituted for "l" and "i", * 5 can be substituted for "s", 2 can be substituted for * for "z", and 0 can be substituted for "o". * * The point here was to make a list of words that could * be formed using only the letters and numbers used in * hexadecimal notation. */ public class HexWords { // if usePseudoChars is true, then we'll also accept // the letters l, i, s, z, and o in our words (which // can be substituted for the numbers 1, 5, 2, and 0 static boolean usePseudoChars = false; public static void main (String[] args) { // set up our default values, which can be overridden by // command line parameters String dirName = "C:\\Documents and Settings\\a217041\\Desktop\\WordNet-1.7.1\\dict"; String outputFileName = "hexwords.txt"; PrintWriter outputFile = null; if (args.length > 0) outputFileName = args[0]; if (args.length > 1) dirName = args[1]; try { outputFile = new PrintWriter( new FileWriter(outputFileName) ); // set up a simple filter, because the WordNet dictionary files we're // interested in all end with .idx final String suffix = "idx"; FilenameFilter suffixFilter = new FilenameFilter() { public boolean accept(File dir, String name) { // only accept file names that end with the specified suffix return name.toLowerCase().endsWith(suffix.toLowerCase()); } }; // get all the .idx files in the directory, and look at each one File f = new File(dirName); String[] theFiles = f.list(suffixFilter); for (int i = 0; i < theFiles.length; i++) searchFile(dirName + "\\" + theFiles[i], outputFile); } catch (Exception e) { e.printStackTrace(); } finally { try { outputFile.close(); } catch (Exception e) {} } System.out.println("\nFinished! Output sent to " + outputFileName); } public static void searchFile (String fileName, PrintWriter out) { // This routine actually reads the words in the individual .idx files. // The format is that comment lines start with 2 spaces, and "word" lines // all start with a word, followed by a space, followed by other information // about the word. We'll read each line up to the first space, testing for // valid characters as we go BufferedReader in = null; String line; StringBuffer sb = new StringBuffer(64); int i; char[] cbuf; char c; String pseudoChars = "lisoz"; // 1=l,i; 5=s; 2=z; 0=o String hexChars = "abcdef" + ((usePseudoChars) ? pseudoChars : ""); System.out.println("Searching " + fileName + "..."); try { in = new BufferedReader( new FileReader(fileName) ); while ((line = in.readLine()) != null) { cbuf = line.toLowerCase().toCharArray(); sb.setLength(0); // check each character of the first word in the line // against our list of valid characters, stopping when // we get to an invalid character or the end of the word // (the word ends with the first space) for (i = 0; i < cbuf.length; i++) { c = cbuf[i]; if (c == ' ') { break; } else if (hexChars.indexOf(c) < 0) { sb.setLength(0); break; } else { sb.append(c); } } // if we got done and our StringBuffer is longer than // zero characters, then we had a valid word (for our // purposes, we want words longer than 1 character) if (sb.length() > 1) out.println(sb + ((usePseudoChars) ? " (" + convertPseudoChars(sb.toString()) + ")" : "")); } } catch (Exception e) { System.err.println("Error in searchFile: " + e); } finally { try { in.close(); } catch (Exception e) {} } } public static String convertPseudoChars (String s) { // This method converts all of the "pseudo" characters // in a String to their corresponding numbers StringBuffer sb = new StringBuffer(s.length()); char[] cbuf = s.toUpperCase().toCharArray(); for (int i = 0; i < cbuf.length; i++) { switch (cbuf[i]) { case 'L': case 'I': sb.append("1"); break; case 'S': sb.append("5"); break; case 'Z': sb.append("2"); break; case 'O': sb.append("0"); break; default: sb.append(cbuf[i]); } } return sb.toString(); } }