java.io.StreamTokenizer#lowerCaseMode

Source File: StreamTokenizerTest.java From j2objc with Apache License 2.0

6 votes

public void testLowerCase() throws Exception {
    Locale.setDefault(Locale.US);
    StreamTokenizer st = new StreamTokenizer(new StringReader("aIb aIb"));
    st.lowerCaseMode(true);
    st.nextToken();
    assertEquals("aib", st.sval);

    Locale oldDefault = Locale.getDefault();
    try {
      Locale.setDefault(new Locale("tr", "TR"));
      st.nextToken();
      assertEquals("a\u0131b", st.sval);
    } finally {
      Locale.setDefault(oldDefault);
    }
}

Source File: MatrixVectorReader.java From matrix-toolkits-java with GNU Lesser General Public License v3.0

6 votes

/**
 * Sets up the stream tokenizer
 */
private void setup() {
    st = new StreamTokenizer(this);
    st.resetSyntax();
    st.eolIsSignificant(false);
    st.lowerCaseMode(true);

    // Parse numbers as words
    st.wordChars('0', '9');
    st.wordChars('-', '.');

    // Characters as words
    st.wordChars('\u0000', '\u00FF');

    // Skip comments
    st.commentChar('%');

    // Skip whitespace and newlines
    st.whitespaceChars(' ', ' ');
    st.whitespaceChars('\u0009', '\u000e');
}

Source File: Lexer.java From vanillacore with Apache License 2.0

5 votes

/**
 * Creates a new lexical analyzer for the specified SQL statement.
 * 
 * @param s
 *            the SQL statement
 */
public Lexer(String s) {
	initKeywords();
	tok = new StreamTokenizer(new StringReader(s));
	tok.wordChars('_', '_');
	tok.ordinaryChar('.');
	/*
	 * Tokens in TT_WORD type like ids and keywords are converted into lower
	 * case.
	 */
	tok.lowerCaseMode(true);
	nextToken();
}

Source File: Lexer.java From jackrabbit-filevault with Apache License 2.0

5 votes

public Lexer(Reader r, String systemId) {
    this.systemId = systemId;
    st = new StreamTokenizer(r);

    st.eolIsSignificant(false);

    st.lowerCaseMode(false);

    st.slashSlashComments(true);
    st.slashStarComments(true);

    st.wordChars('a', 'z');
    st.wordChars('A', 'Z');
    st.wordChars(':', ':');
    st.wordChars('_', '_');

    st.quoteChar(SINGLE_QUOTE);
    st.quoteChar(DOUBLE_QUOTE);

    st.ordinaryChar(BEGIN_NODE_TYPE_NAME);
    st.ordinaryChar(END_NODE_TYPE_NAME);
    st.ordinaryChar(EXTENDS);
    st.ordinaryChar(LIST_DELIMITER);
    st.ordinaryChar(PROPERTY_DEFINITION);
    st.ordinaryChar(CHILD_NODE_DEFINITION);
    st.ordinaryChar(BEGIN_TYPE);
    st.ordinaryChar(END_TYPE);
    st.ordinaryChar(DEFAULT);
    st.ordinaryChar(CONSTRAINT);
}

Source File: URLRespectsRobots.java From BUbiNG with Apache License 2.0

4 votes

/** Parses the argument as if it were the content of a <code>robots.txt</code> file,
 * and returns a sorted array of prefixes of URLs that the agent should not follow.
 *
 * @param content the content of the  <code>robots.txt</code> file.
 * @param userAgent the string representing the user agent of interest.
 * @return an array of character arrays, which are prefixes of the URLs not to follow, in sorted order.
 */
public static char[][] parseRobotsReader(final Reader content, final String userAgent) throws IOException {
	/* The set of disallowed paths specifically aimed at userAgent. */
	Set<String> set = new ObjectOpenHashSet<>();
	/* The set of disallowed paths specifically aimed at *. */
	Set<String> setStar = new ObjectOpenHashSet<>();
	/* True if the currently examined record is targetted to us. */
	boolean doesMatter = false;
	/* True if we have seen a section targetted to our agent. */
	boolean specific = false;
	/* True if we have seen a section targetted to *. */
	boolean generic = false;
	/* True if we are in a star section. */
	boolean starSection = false;

	StreamTokenizer st = new StreamTokenizer(new FastBufferedReader(content));
	int token;

	st.resetSyntax();
	st.eolIsSignificant(true); // We need EOLs to separate records
	st.wordChars(33, 255); // All characters may appear
	st.whitespaceChars(0, 32);
	st.ordinaryChar('#'); // We must manually simulate comments 8^(
	st.lowerCaseMode(false);

	while (true) {
		int lineFirstToken = st.nextToken();
		if (lineFirstToken == StreamTokenizer.TT_EOF) break;

			switch (lineFirstToken) {
				// Blank line: a new block is starting
			case StreamTokenizer.TT_EOL:
				doesMatter = false;
				break;

			// Comment or number: ignore until the end of line
			case StreamTokenizer.TT_NUMBER:
			case '#':
				do {
					token = st.nextToken();
				} while (token != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF);
				break;

			// A string
			case StreamTokenizer.TT_WORD:
				if (st.sval.equalsIgnoreCase("user-agent:")) {
					token = st.nextToken();
					if (token == StreamTokenizer.TT_WORD)
						if (StringUtils.startsWithIgnoreCase(userAgent, st.sval)) {
							doesMatter = true;
							specific = true;
							starSection = false;
						}
						else if (st.sval.equals("*")) {
							starSection = true;
							generic = true;
						} else starSection = false;
					// Ignore the rest of the line
					while (token != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF)
						token = st.nextToken();
				} else if (st.sval.equalsIgnoreCase("disallow:")) {
					token = st.nextToken();
					//System.out.println(st.sval + " " + starSection + " " + set + " " + setStar);
					if (token == StreamTokenizer.TT_EOL) {
						if (doesMatter) set.clear();
						else if (starSection) setStar.clear();
					} else if (token == StreamTokenizer.TT_WORD) {
						String disallowed = st.sval;
						if (disallowed.endsWith("*")) disallowed = disallowed.substring(0, disallowed.length()-1); // Someone (erroneously) uses * to denote any suffix
						if (doesMatter) set.add(disallowed);
						else if (starSection) setStar.add(disallowed);
					}
					// Ignore the rest of the line
					while (token != StreamTokenizer.TT_EOL && token != StreamTokenizer.TT_EOF)
						token = st.nextToken();
				} else if (LOGGER.isTraceEnabled()) LOGGER.trace("Line first token {} ununderstandable in robots.txt", st.sval);
				break;

			// Something else: a syntax error
			default:
				if (LOGGER.isTraceEnabled()) LOGGER.trace("Found unknown token type {} in robots.txt", Integer.valueOf(lineFirstToken));
		}
	}

	if (specific) return toSortedPrefixFreeCharArrays(set); // Some instructions specific to us
	if (! specific && generic) return toSortedPrefixFreeCharArrays(setStar); // No specific instruction, but some generic ones
	return toSortedPrefixFreeCharArrays(set);
}

Source File: CETools.java From open-ig with GNU Lesser General Public License v3.0

4 votes

/**
 * Parse a filter definition string. The format is as follows:<br>
 * "Exact words" "including:colons" id:100 name:fighter*
 * @param filterStr the filter string
 * @return the list of fields and patterns to check
 */
public static List<Pair<String, Pattern>> parseFilter(String filterStr) {
	List<Pair<String, Pattern>> result = new ArrayList<>();

	StreamTokenizer st = new StreamTokenizer(new StringReader(filterStr));
	st.slashSlashComments(false);
	st.slashStarComments(false);
	st.lowerCaseMode(true);
	st.wordChars('*', '*');
	st.wordChars('?', '?');
	st.wordChars('.', '.');
	st.wordChars('@', '@');
	st.wordChars('-', '-');
	st.wordChars('_', '_');
	st.quoteChar('"');

	List<String> tokens = new ArrayList<>();
	try {
		while (true) {
			int tok = st.nextToken();
			if (tok == StreamTokenizer.TT_EOF) {
				break;
			} else
			if (tok == StreamTokenizer.TT_WORD || tok == '"') {
				tokens.add(st.sval);
			} else {
				tokens.add(String.valueOf((char)tok));
			}
		}
	} catch (IOException ex) {
		// ignored
	}
	for (int i = 0; i < tokens.size(); i++) {
		String key = tokens.get(i);
		if (i < tokens.size() - 1 && tokens.get(i + 1).equals(":")) {
			if (i < tokens.size() - 2) {
				result.add(Pair.of(key, wildcardToRegex(tokens.get(i + 2))));
				i += 2;
			} else {
				result.add(Pair.of(key, wildcardToRegex("")));
			}
		} else {
			result.add(Pair.of("", wildcardToRegex(key)));
		}
	}
	
	return result;
}

Source File: STExample.java From icafe with Eclipse Public License 1.0

4 votes

public static void main(String args[]) throws IOException {
      Hashtable<String, Double> variables = new Hashtable<String, Double>();
      @SuppressWarnings("deprecation")
StreamTokenizer st = new StreamTokenizer(System.in);
      st.eolIsSignificant(true);
      st.lowerCaseMode(true);
      st.ordinaryChar('/');
      st.ordinaryChar('-');

      while (true) {
          Expression res;
          int c = StreamTokenizer.TT_EOL;
          String varName = null;

          System.out.println("Enter an expression...");
          try {
              while (true) {
                  c = st.nextToken();
                  if (c == StreamTokenizer.TT_EOF) {
                      System.exit(1);
                  } else if (c == StreamTokenizer.TT_EOL) {
                      continue;
                  } else if (c == StreamTokenizer.TT_WORD) {
                      if (st.sval.compareTo("dump") == 0) {
                          dumpVariables(variables);
                          continue;
                      } else if (st.sval.compareTo("clear") == 0) {
                          variables = new Hashtable<String, Double>();
                          continue;
                      } else if (st.sval.compareTo("quit") == 0) {
                          System.exit(0);
                      } else if (st.sval.compareTo("exit") == 0) {
                          System.exit(0);
                      } else if (st.sval.compareTo("help") == 0) {
                          help();
                          continue;
                      }
                      varName = st.sval;
                      c = st.nextToken();
                  }
                  break;
              }
              if (c != '=') {
                  throw new SyntaxError("missing initial '=' sign.");
              }
              res = ParseExpression.expression(st);
          } catch (SyntaxError se) {
              res = null;
              varName = null;
              System.out.println("\nSyntax Error detected! - "+se.getMsg());
              while (c != StreamTokenizer.TT_EOL)
                  c = st.nextToken();
              continue;
          }

          c = st.nextToken();
          if (c != StreamTokenizer.TT_EOL) {
              if (c == ')')
                  System.out.println("\nSyntax Error detected! - To many closing parens.");
              else
                  System.out.println("\nBogus token on input - "+c);
              while (c != StreamTokenizer.TT_EOL)
                  c = st.nextToken();
          } else {
              try {
                  Double z;
                  System.out.println("Parsed expression : "+res.unparse());
                  z = new Double(res.value(variables));
                  System.out.println("Value is : "+z);
                  if (varName != null) {
                      variables.put(varName, z);
                      System.out.println("Assigned to : "+varName);
                  }
              } catch (ExecError ee) {
                  System.out.println("Execution error, "+ee.getMsg()+"!");
              }
          }
      }
  }

Source File: OldAndroidStreamTokenizerTest.java From j2objc with Apache License 2.0

4 votes

public void testStreamTokenizer() throws Exception {
    String str = "Testing 12345 \n alpha \r\n omega";
    String strb = "-3.8 'BLIND mice' \r sEe /* how */ they run";
    StringReader aa = new StringReader(str);
    StringReader ba = new StringReader(strb);
    StreamTokenizer a = new StreamTokenizer(aa);
    StreamTokenizer b = new StreamTokenizer(ba);

    assertEquals(1, a.lineno());
    assertEquals(StreamTokenizer.TT_WORD, a.nextToken());
    assertEquals("Token[Testing], line 1", a.toString());
    assertEquals(StreamTokenizer.TT_NUMBER, a.nextToken());
    assertEquals("Token[n=12345.0], line 1", a.toString());
    assertEquals(StreamTokenizer.TT_WORD, a.nextToken());
    assertEquals("Token[alpha], line 2", a.toString());
    assertEquals(StreamTokenizer.TT_WORD, a.nextToken());
    assertEquals("Token[omega], line 3", a.toString());
    assertEquals(StreamTokenizer.TT_EOF, a.nextToken());
    assertEquals("Token[EOF], line 3", a.toString());

    b.commentChar('u');
    b.eolIsSignificant(true);
    b.lowerCaseMode(true);
    b.ordinaryChar('y');
    b.slashStarComments(true);

    assertEquals(StreamTokenizer.TT_NUMBER, b.nextToken());
    assertEquals(-3.8, b.nval);
    assertEquals("Token[n=-3.8], line 1", b.toString());
    assertEquals(39, b.nextToken()); // '
    assertEquals("Token[BLIND mice], line 1", b.toString());
    assertEquals(10, b.nextToken()); // \n
    assertEquals("Token[EOL], line 2", b.toString());
    assertEquals(StreamTokenizer.TT_WORD, b.nextToken());
    assertEquals("Token[see], line 2", b.toString());
    assertEquals(StreamTokenizer.TT_WORD, b.nextToken());
    assertEquals("Token[the], line 2", b.toString());
    assertEquals(121, b.nextToken()); // y
    assertEquals("Token['y'], line 2", b.toString());
    assertEquals(StreamTokenizer.TT_WORD, b.nextToken());
    assertEquals("Token[r], line 2", b.toString());
    assertEquals(StreamTokenizer.TT_EOF, b.nextToken());
    assertEquals("Token[EOF], line 2", b.toString());

    // A harmony regression test
    byte[] data = new byte[]{(byte) '-'};
    StreamTokenizer tokenizer = new StreamTokenizer(new ByteArrayInputStream(data));
    tokenizer.nextToken();
    String result = tokenizer.toString();
    assertEquals("Token['-'], line 1", result);

    // another harmony regression test
    byte[] data2 = new byte[]{(byte) '"',
            (byte) 'H',
            (byte) 'e',
            (byte) 'l',
            (byte) 'l',
            (byte) 'o',
            (byte) '"'};
    StreamTokenizer tokenizer2 = new StreamTokenizer(new ByteArrayInputStream(data2));
    tokenizer2.nextToken();
    result = tokenizer2.toString();
    assertEquals("Token[Hello], line 1", result);
}

Source File: OldStreamTokenizerTest.java From j2objc with Apache License 2.0

4 votes

public void test_basicStringTokenizerMethods() throws IOException {
    String str = "Testing 12345 \n alpha \r\n omega";
    String strb = "-3.8 'BLIND mice' \r sEe /* how */ they run";
    StringReader aa = new StringReader(str);
    StringReader ba = new StringReader(strb);
    StreamTokenizer a = new StreamTokenizer(aa);
    StreamTokenizer b = new StreamTokenizer(ba);

    Assert.assertTrue(a.lineno() == 1);
    Assert.assertTrue(a.nextToken() == StreamTokenizer.TT_WORD);
    Assert.assertTrue(a.toString().equals("Token[Testing], line 1"));
    Assert.assertTrue(a.nextToken() == StreamTokenizer.TT_NUMBER);
    Assert.assertTrue(a.toString().equals("Token[n=12345.0], line 1"));
    Assert.assertTrue(a.nextToken() == StreamTokenizer.TT_WORD);
    Assert.assertTrue(a.toString().equals("Token[alpha], line 2"));
    Assert.assertTrue(a.nextToken() == StreamTokenizer.TT_WORD);
    Assert.assertTrue(a.toString().equals("Token[omega], line 3"));
    Assert.assertTrue(a.nextToken() == StreamTokenizer.TT_EOF);
    Assert.assertTrue(a.toString().equals("Token[EOF], line 3"));

    b.commentChar('u');
    b.eolIsSignificant(true);
    b.lowerCaseMode(true);
    b.ordinaryChar('y');
    b.slashStarComments(true);

    Assert.assertTrue(b.nextToken() == StreamTokenizer.TT_NUMBER);
    Assert.assertTrue(b.nval == -3.8);
    Assert.assertTrue(b.toString().equals("Token[n=-3.8], line 1"));
    Assert.assertTrue(b.nextToken() == 39); // '
    Assert.assertTrue(b.toString().equals("Token[BLIND mice], line 1"));
    Assert.assertTrue(b.nextToken() == 10); // \n
    Assert.assertTrue(b.toString().equals("Token[EOL], line 2"));
    Assert.assertTrue(b.nextToken() == StreamTokenizer.TT_WORD);
    Assert.assertTrue(b.toString().equals("Token[see], line 2"));
    Assert.assertTrue(b.nextToken() == StreamTokenizer.TT_WORD);
    Assert.assertTrue(b.toString().equals("Token[the], line 2"));
    Assert.assertTrue(b.nextToken() == 121); // y
    Assert.assertTrue(b.toString().equals("Token['y'], line 2"));
    Assert.assertTrue(b.nextToken() == StreamTokenizer.TT_WORD);
    Assert.assertTrue(b.toString().equals("Token[r], line 2"));
    Assert.assertTrue(b.nextToken() == StreamTokenizer.TT_EOF);
    Assert.assertTrue(b.toString().equals("Token[EOF], line 2"));
}

Java Code Examples for java.io.StreamTokenizer#lowerCaseMode()