java.io.StreamTokenizer#resetSyntax

Source File: Algorithm.java From KEEL with GNU General Public License v3.0

6 votes

/** Function to initialize the stream tokenizer.
 *
 * @param tokenizer The tokenizer.
 */
protected void initTokenizer( StreamTokenizer tokenizer )
{
  tokenizer.resetSyntax();
  tokenizer.whitespaceChars( 0, ' ' );
  tokenizer.wordChars( ' '+1,'\u00FF' );
  tokenizer.whitespaceChars( ',',',' );
  tokenizer.quoteChar( '"' );
  tokenizer.quoteChar( '\''  );
  tokenizer.ordinaryChar( '=' );
  tokenizer.ordinaryChar( '{' );
  tokenizer.ordinaryChar( '}' );
  tokenizer.ordinaryChar( '[' );
  tokenizer.ordinaryChar( ']' );
  tokenizer.eolIsSignificant( true );
}

Source File: Algorithm.java From KEEL with GNU General Public License v3.0

6 votes

/** Function to initialize the stream tokenizer.
 *
 * @param tokenizer The tokenizer.
 */
protected void initTokenizer( StreamTokenizer tokenizer )
{
  tokenizer.resetSyntax();
  tokenizer.whitespaceChars( 0, ' ' );
  tokenizer.wordChars( ' '+1,'\u00FF' );
  tokenizer.whitespaceChars( ',',',' );
  tokenizer.quoteChar( '"' );
  tokenizer.quoteChar( '\''  );
  tokenizer.ordinaryChar( '=' );
  tokenizer.ordinaryChar( '{' );
  tokenizer.ordinaryChar( '}' );
  tokenizer.ordinaryChar( '[' );
  tokenizer.ordinaryChar( ']' );
  tokenizer.eolIsSignificant( true );
}

Source File: Algorithm.java From KEEL with GNU General Public License v3.0

6 votes

/** Function to initialize the stream tokenizer.
 *
 * @param tokenizer The tokenizer.
 */
protected void initTokenizer( StreamTokenizer tokenizer )
{
  tokenizer.resetSyntax();
  tokenizer.whitespaceChars( 0, ' ' );
  tokenizer.wordChars( ' '+1,'\u00FF' );
  tokenizer.whitespaceChars( ',',',' );
  tokenizer.quoteChar( '"' );
  tokenizer.quoteChar( '\''  );
  tokenizer.ordinaryChar( '=' );
  tokenizer.ordinaryChar( '{' );
  tokenizer.ordinaryChar( '}' );
  tokenizer.ordinaryChar( '[' );
  tokenizer.ordinaryChar( ']' );
  tokenizer.eolIsSignificant( true );
}

Source File: CommandLine.java From TencentKona-8 with GNU General Public License v2.0

6 votes

private static void loadCmdFile(String name, List args)
    throws IOException
{
    Reader r = new BufferedReader(new FileReader(name));
    StreamTokenizer st = new StreamTokenizer(r);
    st.resetSyntax();
    st.wordChars(' ', 255);
    st.whitespaceChars(0, ' ');
    st.commentChar('#');
    st.quoteChar('"');
    st.quoteChar('\'');
    while (st.nextToken() != st.TT_EOF) {
        args.add(st.sval);
    }
    r.close();
}

Source File: CommandLine.java From jdk8u-dev-jdk with GNU General Public License v2.0

6 votes

private static void loadCmdFile(String name, List<String> args)
    throws IOException
{
    Reader r = new BufferedReader(new FileReader(name));
    StreamTokenizer st = new StreamTokenizer(r);
    st.resetSyntax();
    st.wordChars(' ', 255);
    st.whitespaceChars(0, ' ');
    st.commentChar('#');
    st.quoteChar('"');
    st.quoteChar('\'');
    while (st.nextToken() != StreamTokenizer.TT_EOF) {
        args.add(st.sval);
    }
    r.close();
}

Source File: CommandLine.java From jdk8u60 with GNU General Public License v2.0

6 votes

private static void loadCmdFile(String name, List<String> args)
    throws IOException
{
    Reader r = new BufferedReader(new FileReader(name));
    StreamTokenizer st = new StreamTokenizer(r);
    st.resetSyntax();
    st.wordChars(' ', 255);
    st.whitespaceChars(0, ' ');
    st.commentChar('#');
    st.quoteChar('"');
    st.quoteChar('\'');
    while (st.nextToken() != StreamTokenizer.TT_EOF) {
        args.add(st.sval);
    }
    r.close();
}

Source File: ArffLoader.java From samoa with Apache License 2.0

6 votes

private void initStreamTokenizer(Reader reader) {
    BufferedReader br = new BufferedReader(reader);

    //Init streamTokenizer
    streamTokenizer = new StreamTokenizer(br);

    streamTokenizer.resetSyntax();
    streamTokenizer.whitespaceChars(0, ' ');
    streamTokenizer.wordChars(' ' + 1, '\u00FF');
    streamTokenizer.whitespaceChars(',', ',');
    streamTokenizer.commentChar('%');
    streamTokenizer.quoteChar('"');
    streamTokenizer.quoteChar('\'');
    streamTokenizer.ordinaryChar('{');
    streamTokenizer.ordinaryChar('}');
    streamTokenizer.eolIsSignificant(true);

    this.instanceInformation = this.getHeader();
    if (classAttribute < 0) {
        this.instanceInformation.setClassIndex(this.instanceInformation.numAttributes() - 1);
        //System.out.print(this.instanceInformation.classIndex());
    } else if (classAttribute > 0) {
        this.instanceInformation.setClassIndex(classAttribute - 1);
    }
}

Source File: CommandLine.java From openjdk-jdk8u with GNU General Public License v2.0

6 votes

private static void loadCmdFile(String name, ListBuffer<String> args)
    throws IOException
{
    Reader r = new BufferedReader(new FileReader(name));
    StreamTokenizer st = new StreamTokenizer(r);
    st.resetSyntax();
    st.wordChars(' ', 255);
    st.whitespaceChars(0, ' ');
    st.commentChar('#');
    st.quoteChar('"');
    st.quoteChar('\'');
    while (st.nextToken() != StreamTokenizer.TT_EOF) {
        args.append(st.sval);
    }
    r.close();
}

Source File: CommandLine.java From openjdk-8-source with GNU General Public License v2.0

6 votes

private static void loadCmdFile(String name, List<String> args)
    throws IOException
{
    Reader r = new BufferedReader(new FileReader(name));
    StreamTokenizer st = new StreamTokenizer(r);
    st.resetSyntax();
    st.wordChars(' ', 255);
    st.whitespaceChars(0, ' ');
    st.commentChar('#');
    st.quoteChar('"');
    st.quoteChar('\'');
    while (st.nextToken() != StreamTokenizer.TT_EOF) {
        args.add(st.sval);
    }
    r.close();
}

Source File: CommandLine.java From openjdk-8-source with GNU General Public License v2.0

6 votes

private static void loadCmdFile(String name, List args)
    throws IOException
{
    Reader r = new BufferedReader(new FileReader(name));
    StreamTokenizer st = new StreamTokenizer(r);
    st.resetSyntax();
    st.wordChars(' ', 255);
    st.whitespaceChars(0, ' ');
    st.commentChar('#');
    st.quoteChar('"');
    st.quoteChar('\'');
    while (st.nextToken() != st.TT_EOF) {
        args.add(st.sval);
    }
    r.close();
}

Source File: Parser.java From MOE with Apache License 2.0

5 votes

public static StreamTokenizer tokenize(String input) {
  StreamTokenizer result = new StreamTokenizer(new StringReader(input));
  result.resetSyntax();
  result.wordChars('a', 'z');
  result.wordChars('A', 'Z');
  result.wordChars('0', '9');
  result.whitespaceChars(0, 32);
  result.quoteChar('"');

  return result;
}

Source File: ScriptingFunctions.java From jdk8u_nashorn with GNU General Public License v2.0

5 votes

/**
 * Break a string into tokens, honoring quoted arguments and escaped spaces.
 *
 * @param str a {@link String} to tokenize.
 * @return a {@link List} of {@link String}s representing the tokens that
 * constitute the string.
 */
public static List<String> tokenizeString(final String str) {
    final StreamTokenizer tokenizer = new StreamTokenizer(new StringReader(str));
    tokenizer.resetSyntax();
    tokenizer.wordChars(0, 255);
    tokenizer.whitespaceChars(0, ' ');
    tokenizer.commentChar('#');
    tokenizer.quoteChar('"');
    tokenizer.quoteChar('\'');
    final List<String> tokenList = new ArrayList<>();
    final StringBuilder toAppend = new StringBuilder();
    while (nextToken(tokenizer) != StreamTokenizer.TT_EOF) {
        final String s = tokenizer.sval;
        // The tokenizer understands about honoring quoted strings and recognizes
        // them as one token that possibly contains multiple space-separated words.
        // It does not recognize quoted spaces, though, and will split after the
        // escaping \ character. This is handled here.
        if (s.endsWith("\\")) {
            // omit trailing \, append space instead
            toAppend.append(s.substring(0, s.length() - 1)).append(' ');
        } else {
            tokenList.add(toAppend.append(s).toString());
            toAppend.setLength(0);
        }
    }
    if (toAppend.length() != 0) {
        tokenList.add(toAppend.toString());
    }
    return tokenList;
}

Source File: Instances.java From KEEL with GNU General Public License v3.0

5 votes

/**
 * Initializes the StreamTokenizer used for reading the ARFF file.
 * 
 * @param tokenizer
 *            the stream tokenizer
 */
protected void initTokenizer(StreamTokenizer tokenizer) {

	tokenizer.resetSyntax();
	tokenizer.whitespaceChars(0, ' ');
	tokenizer.wordChars(' ' + 1, '\u00FF');
	tokenizer.whitespaceChars(',', ',');
	tokenizer.commentChar('%');
	tokenizer.quoteChar('"');
	tokenizer.quoteChar('\'');
	tokenizer.ordinaryChar('{');
	tokenizer.ordinaryChar('}');
	tokenizer.eolIsSignificant(true);
}

Source File: ScriptingFunctions.java From openjdk-jdk8u with GNU General Public License v2.0

5 votes

/**
 * Break a string into tokens, honoring quoted arguments and escaped spaces.
 *
 * @param str a {@link String} to tokenize.
 * @return a {@link List} of {@link String}s representing the tokens that
 * constitute the string.
 */
public static List<String> tokenizeString(final String str) {
    final StreamTokenizer tokenizer = new StreamTokenizer(new StringReader(str));
    tokenizer.resetSyntax();
    tokenizer.wordChars(0, 255);
    tokenizer.whitespaceChars(0, ' ');
    tokenizer.commentChar('#');
    tokenizer.quoteChar('"');
    tokenizer.quoteChar('\'');
    final List<String> tokenList = new ArrayList<>();
    final StringBuilder toAppend = new StringBuilder();
    while (nextToken(tokenizer) != StreamTokenizer.TT_EOF) {
        final String s = tokenizer.sval;
        // The tokenizer understands about honoring quoted strings and recognizes
        // them as one token that possibly contains multiple space-separated words.
        // It does not recognize quoted spaces, though, and will split after the
        // escaping \ character. This is handled here.
        if (s.endsWith("\\")) {
            // omit trailing \, append space instead
            toAppend.append(s.substring(0, s.length() - 1)).append(' ');
        } else {
            tokenList.add(toAppend.append(s).toString());
            toAppend.setLength(0);
        }
    }
    if (toAppend.length() != 0) {
        tokenList.add(toAppend.toString());
    }
    return tokenList;
}

Source File: UnparsedTag.java From hlsparserj with Apache License 2.0

4 votes

/**
 * Parses the tag line.
 * @param line playlist line item
 */
private void parseTagLine(final String line) {
    final Matcher lineMatcher = TAGPATTERN.matcher(line);

    // Create a matcher that uses the TAGPATTERN
    if (lineMatcher.find()) {
        tagName = lineMatcher.group(1);

        final String attributeList = lineMatcher.group(2);

        final StreamTokenizer tokenizer = new StreamTokenizer(new StringReader(attributeList));
        tokenizer.resetSyntax();
        tokenizer.wordChars(' ', 255);
        tokenizer.quoteChar('"');
        tokenizer.ordinaryChar(',');
        tokenizer.ordinaryChar('=');

        String attributeName = null;
        String attributeValue = null;
        int noNameCount = 0;
        do {
            int ttype;
            try {
                ttype = tokenizer.nextToken();
            } catch (IOException e) {
                // Should never get here because reading from String
                throw new IllegalStateException(e);
            }

            if (ttype == ',' || ttype == StreamTokenizer.TT_EOF) {
                if (attributeValue == null) {
                    // Not actually an attribute - just a single value
                    attributes.put("NONAME" + noNameCount, attributeName);
                    noNameCount++;
                    attributeName = null;
                } else {
                    attributes.put(attributeName, attributeValue);
                    attributeName = null;
                    attributeValue = null;
                }
            } else if (ttype == StreamTokenizer.TT_WORD || ttype == '"') {
                if (attributeName == null) {
                    attributeName = tokenizer.sval;
                } else {
                    attributeValue = tokenizer.sval;
                }
            }
        } while (tokenizer.ttype != StreamTokenizer.TT_EOF);

        // Set the URI if a URI attribute is present
        if (attributes.containsKey(URI_ATTR)) {
            uri = attributes.get(URI_ATTR);
        }
    } else {
        // If the line startex with #EXT but does not contain a colon it is a
        // tag with no attributes
        tagName = line.substring(1);
    }
}

Source File: FilterTreeBuilder.java From carbon-identity-framework with Apache License 2.0

4 votes

public FilterTreeBuilder(String filterString) throws IOException {

        StreamTokenizer input = new StreamTokenizer(new StringReader(filterString));
        input.resetSyntax();
        // Default settings in StreamTokenizer syntax initializer.
        input.wordChars('a', 'z');
        input.wordChars('A', 'Z');
        // Specifies that all extended ASCII characters defined in HTML 4 standard, are word constituents.
        input.wordChars(128 + 32, 255);
        input.whitespaceChars(0, ' ');
        input.commentChar('/');
        input.quoteChar('"');
        input.quoteChar('\'');

        // Adding other string possible values.
        input.wordChars('@', '@');
        input.wordChars(':', ':');
        input.wordChars('_', '_');
        input.wordChars('0', '9');
        input.wordChars('-', '-');
        input.wordChars('+', '+');
        input.wordChars('.', '.');
        input.wordChars('*', '*');
        input.wordChars('/', '/');

        tokenList = new ArrayList<>();
        StringBuilder concatenatedString = new StringBuilder();

        while (input.nextToken() != StreamTokenizer.TT_EOF) {
            // The ttype 40 is for the '('.
            if (input.ttype == 40) {
                tokenList.add("(");
            } else if (input.ttype == 41) {
                // The ttype 41 is for the ')'.
                concatenatedString = new StringBuilder(concatenatedString.toString().trim());
                tokenList.add(concatenatedString.toString());
                concatenatedString = new StringBuilder();
                tokenList.add(")");
            } else if (input.ttype == StreamTokenizer.TT_WORD) {
                // Concatenate the string by adding spaces in between.
                if (!(input.sval.equalsIgnoreCase(IdentityCoreConstants.Filter.AND) || input.sval.equalsIgnoreCase
                        (IdentityCoreConstants.Filter.OR) || input.sval.equalsIgnoreCase(IdentityCoreConstants.Filter.NOT)))
                    concatenatedString.append(" ").append(input.sval);
                else {
                    concatenatedString = new StringBuilder(concatenatedString.toString().trim());
                    if (!concatenatedString.toString().equals("")) {
                        tokenList.add(concatenatedString.toString());
                        concatenatedString = new StringBuilder();
                    }
                    tokenList.add(input.sval);
                }
            } else if (input.ttype == '\"' || input.ttype == '\'') {
                concatenatedString.append(" ").append(input.sval);
            }
        }
        // Add to the list, if the filter is a simple filter.
        if (!(concatenatedString.toString().equals(""))) {
            tokenList.add(concatenatedString.toString());
        }
    }

Source File: CookieJar.java From lams with GNU General Public License v2.0

4 votes

/**
 * Tokenizes a cookie header and returns the tokens in a
 * <code>Vector</code>.
 * handles the broken syntax for expires= fields ...
 * @param cookieHeader - the header to read
 * @return a Vector of cookieTokens as name=value pairs
 **/
private Vector getCookieTokens(String cookieHeader) {
    StringReader sr = new StringReader(cookieHeader);
    StreamTokenizer st = new StreamTokenizer(sr);
    Vector tokens = new Vector();

    // clear syntax tables of the StreamTokenizer
    st.resetSyntax();

    // set all characters as word characters
    st.wordChars(0,Character.MAX_VALUE);

    // set up characters for quoting
    st.quoteChar( '"' ); //double quotes
    st.quoteChar( '\'' ); //single quotes

    // set up characters to separate tokens
    st.whitespaceChars(59,59); //semicolon
    // and here we run into trouble ...
    // see http://www.mnot.net/blog/2006/10/27/cookie_fun
    // ... Notice something about the above? It uses a comma inside of the date, 
    // without quoting the value. This makes it difficult for generic processors to handle the Set-Cookie header.
    st.whitespaceChars(44,44); //comma

    try {
        while (st.nextToken() != StreamTokenizer.TT_EOF) {
        	String tokenContent=st.sval;
        	// fix expires comma delimiter token problem
        	if (tokenContent.toLowerCase().startsWith("expires=")) {
        		if (st.nextToken() != StreamTokenizer.TT_EOF) {
        			tokenContent+=","+st.sval;
        		}	// if
        	} // if        		
        	tokenContent=tokenContent.trim();
          tokens.addElement( tokenContent );
        }
    }
    catch (IOException ioe) {
        // this will never happen with a StringReader
    }
    sr.close();
    return tokens;
}

Source File: Harness.java From openjdk-8 with GNU General Public License v2.0

3 votes

/**
 * Create new benchmark harness with given configuration and reporter.
 * Throws ConfigFormatException if there was an error parsing the config
 * file.
 * <p>
 * <b>Config file syntax:</b>
 * <p>
 * '#' marks the beginning of a comment.  Blank lines are ignored.  All
 * other lines should adhere to the following format:
 * <pre>
 *     &lt;weight&gt; &lt;name&gt; &lt;class&gt; [&lt;args&gt;]
 * </pre>
 * &lt;weight&gt; is a floating point value which is multiplied times the
 * benchmark's execution time to determine its weighted score.  The
 * total score of the benchmark suite is the sum of all weighted scores
 * of its benchmarks.
 * <p>
 * &lt;name&gt; is a name used to identify the benchmark on the benchmark
 * report.  If the name contains whitespace, the quote character '"' should
 * be used as a delimiter.
 * <p>
 * &lt;class&gt; is the full name (including the package) of the class
 * containing the benchmark implementation.  This class must implement
 * bench.Benchmark.
 * <p>
 * [&lt;args&gt;] is a variable-length list of runtime arguments to pass to
 * the benchmark.  Arguments containing whitespace should use the quote
 * character '"' as a delimiter.
 * <p>
 * <b>Example:</b>
 * <pre>
 *      3.5 "My benchmark" bench.serial.Test first second "third arg"
 * </pre>
 */
public Harness(InputStream in) throws IOException, ConfigFormatException {
    Vector bvec = new Vector();
    StreamTokenizer tokens = new StreamTokenizer(new InputStreamReader(in));

    tokens.resetSyntax();
    tokens.wordChars(0, 255);
    tokens.whitespaceChars(0, ' ');
    tokens.commentChar('#');
    tokens.quoteChar('"');
    tokens.eolIsSignificant(true);

    tokens.nextToken();
    while (tokens.ttype != StreamTokenizer.TT_EOF) {
        switch (tokens.ttype) {
            case StreamTokenizer.TT_WORD:
            case '"':                       // parse line
                bvec.add(parseBenchInfo(tokens));
                break;

            default:                        // ignore
                tokens.nextToken();
                break;
        }
    }
    binfo = (BenchInfo[]) bvec.toArray(new BenchInfo[bvec.size()]);
}

Source File: Harness.java From jdk8u60 with GNU General Public License v2.0

3 votes

/**
 * Create new benchmark harness with given configuration and reporter.
 * Throws ConfigFormatException if there was an error parsing the config
 * file.
 * <p>
 * <b>Config file syntax:</b>
 * <p>
 * '#' marks the beginning of a comment.  Blank lines are ignored.  All
 * other lines should adhere to the following format:
 * <pre>
 *     &lt;weight&gt; &lt;name&gt; &lt;class&gt; [&lt;args&gt;]
 * </pre>
 * &lt;weight&gt; is a floating point value which is multiplied times the
 * benchmark's execution time to determine its weighted score.  The
 * total score of the benchmark suite is the sum of all weighted scores
 * of its benchmarks.
 * <p>
 * &lt;name&gt; is a name used to identify the benchmark on the benchmark
 * report.  If the name contains whitespace, the quote character '"' should
 * be used as a delimiter.
 * <p>
 * &lt;class&gt; is the full name (including the package) of the class
 * containing the benchmark implementation.  This class must implement
 * bench.Benchmark.
 * <p>
 * [&lt;args&gt;] is a variable-length list of runtime arguments to pass to
 * the benchmark.  Arguments containing whitespace should use the quote
 * character '"' as a delimiter.
 * <p>
 * <b>Example:</b>
 * <pre>
 *      3.5 "My benchmark" bench.serial.Test first second "third arg"
 * </pre>
 */
public Harness(InputStream in) throws IOException, ConfigFormatException {
    Vector bvec = new Vector();
    StreamTokenizer tokens = new StreamTokenizer(new InputStreamReader(in));

    tokens.resetSyntax();
    tokens.wordChars(0, 255);
    tokens.whitespaceChars(0, ' ');
    tokens.commentChar('#');
    tokens.quoteChar('"');
    tokens.eolIsSignificant(true);

    tokens.nextToken();
    while (tokens.ttype != StreamTokenizer.TT_EOF) {
        switch (tokens.ttype) {
            case StreamTokenizer.TT_WORD:
            case '"':                       // parse line
                bvec.add(parseBenchInfo(tokens));
                break;

            default:                        // ignore
                tokens.nextToken();
                break;
        }
    }
    binfo = (BenchInfo[]) bvec.toArray(new BenchInfo[bvec.size()]);
}

Source File: Harness.java From jdk8u_jdk with GNU General Public License v2.0

3 votes

/**
 * Create new benchmark harness with given configuration and reporter.
 * Throws ConfigFormatException if there was an error parsing the config
 * file.
 * <p>
 * <b>Config file syntax:</b>
 * <p>
 * '#' marks the beginning of a comment.  Blank lines are ignored.  All
 * other lines should adhere to the following format:
 * <pre>
 *     &lt;weight&gt; &lt;name&gt; &lt;class&gt; [&lt;args&gt;]
 * </pre>
 * &lt;weight&gt; is a floating point value which is multiplied times the
 * benchmark's execution time to determine its weighted score.  The
 * total score of the benchmark suite is the sum of all weighted scores
 * of its benchmarks.
 * <p>
 * &lt;name&gt; is a name used to identify the benchmark on the benchmark
 * report.  If the name contains whitespace, the quote character '"' should
 * be used as a delimiter.
 * <p>
 * &lt;class&gt; is the full name (including the package) of the class
 * containing the benchmark implementation.  This class must implement
 * bench.Benchmark.
 * <p>
 * [&lt;args&gt;] is a variable-length list of runtime arguments to pass to
 * the benchmark.  Arguments containing whitespace should use the quote
 * character '"' as a delimiter.
 * <p>
 * <b>Example:</b>
 * <pre>
 *      3.5 "My benchmark" bench.serial.Test first second "third arg"
 * </pre>
 */
public Harness(InputStream in) throws IOException, ConfigFormatException {
    Vector bvec = new Vector();
    StreamTokenizer tokens = new StreamTokenizer(new InputStreamReader(in));

    tokens.resetSyntax();
    tokens.wordChars(0, 255);
    tokens.whitespaceChars(0, ' ');
    tokens.commentChar('#');
    tokens.quoteChar('"');
    tokens.eolIsSignificant(true);

    tokens.nextToken();
    while (tokens.ttype != StreamTokenizer.TT_EOF) {
        switch (tokens.ttype) {
            case StreamTokenizer.TT_WORD:
            case '"':                       // parse line
                bvec.add(parseBenchInfo(tokens));
                break;

            default:                        // ignore
                tokens.nextToken();
                break;
        }
    }
    binfo = (BenchInfo[]) bvec.toArray(new BenchInfo[bvec.size()]);
}

Java Code Examples for java.io.StreamTokenizer#resetSyntax()