codemining.languagetools.TokenizerUtils Java Examples
The following examples show how to use
codemining.languagetools.TokenizerUtils.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TokenCounter.java From api-mining with GNU General Public License v3.0 | 5 votes |
/** * @param args * @throws IOException * @throws ClassNotFoundException * @throws IllegalAccessException * @throws InstantiationException */ public static void main(final String[] args) throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException { if (args.length != 2) { System.err.println("Usage <codeDir> <TokenizerClass>"); return; } long tokenCount = 0; final ITokenizer tokenizer = TokenizerUtils.tokenizerForClass(args[1]); for (final File fi : FileUtils.listFiles(new File(args[0]), tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY)) { try { final char[] code = FileUtils.readFileToString(fi) .toCharArray(); tokenCount += tokenizer.tokenListFromCode(code).size() - 2; // Remove // sentence // start/end } catch (final IOException e) { LOGGER.warning(ExceptionUtils.getFullStackTrace(e)); } } System.out.println("Tokens: " + tokenCount); }
Example #2
Source File: TokenCounter.java From tassal with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * @param args * @throws IOException * @throws ClassNotFoundException * @throws IllegalAccessException * @throws InstantiationException */ public static void main(final String[] args) throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException { if (args.length != 2) { System.err.println("Usage <codeDir> <TokenizerClass>"); return; } long tokenCount = 0; final ITokenizer tokenizer = TokenizerUtils.tokenizerForClass(args[1]); for (final File fi : FileUtils.listFiles(new File(args[0]), tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY)) { try { final char[] code = FileUtils.readFileToString(fi) .toCharArray(); tokenCount += tokenizer.tokenListFromCode(code).size() - 2; // Remove // sentence // start/end } catch (final IOException e) { LOGGER.warning(ExceptionUtils.getFullStackTrace(e)); } } System.out.println("Tokens: " + tokenCount); }
Example #3
Source File: TokenCounter.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * @param args * @throws IOException * @throws ClassNotFoundException * @throws IllegalAccessException * @throws InstantiationException */ public static void main(final String[] args) throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException { if (args.length != 2) { System.err.println("Usage <codeDir> <TokenizerClass>"); return; } long tokenCount = 0; final ITokenizer tokenizer = TokenizerUtils.tokenizerForClass(args[1]); for (final File fi : FileUtils.listFiles(new File(args[0]), tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY)) { try { final char[] code = FileUtils.readFileToString(fi) .toCharArray(); tokenCount += tokenizer.tokenListFromCode(code).size() - 2; // Remove // sentence // start/end } catch (final IOException e) { LOGGER.warning(ExceptionUtils.getFullStackTrace(e)); } } System.out.println("Tokens: " + tokenCount); }
Example #4
Source File: DistinctTokenCount.java From api-mining with GNU General Public License v3.0 | 4 votes |
public DistinctTokenCount(final String tokenizerClass) throws InstantiationException, IllegalAccessException, ClassNotFoundException { tokenizer = TokenizerUtils.tokenizerForClass(tokenizerClass); }
Example #5
Source File: TokenizerTUI.java From api-mining with GNU General Public License v3.0 | 4 votes |
public static void main(final String[] args) throws InvalidInputException, IOException, InstantiationException, IllegalAccessException, ClassNotFoundException, IllegalArgumentException, SecurityException, InvocationTargetException, NoSuchMethodException { if (args.length < 2) { System.err .println("Usage <codeDir> <TokenizerClass> [TokenizerArgs]"); return; } final ITokenizer tok; final String tokenizerClass = args[1]; if (args.length == 2) { tok = TokenizerUtils.tokenizerForClass(tokenizerClass); } else { final String tokenizerArguments = args[2]; tok = TokenizerUtils.tokenizerForClass(tokenizerClass, tokenizerArguments); } final File baseFile = new File(args[0]); final Collection<File> allFiles; if (baseFile.isDirectory()) { allFiles = FileUtils.listFiles(baseFile, tok.getFileFilter(), DirectoryFileFilter.DIRECTORY); } else { allFiles = Lists.newArrayList(baseFile); } for (final File fi : allFiles) { final StringBuffer buf = new StringBuffer(); for (final FullToken token : tok.getTokenListFromCode(fi)) { buf.append(token); buf.append(System.getProperty("line.separator")); } System.out.println(buf.toString()); System.out.println(); } }
Example #6
Source File: DistinctTokenCount.java From tassal with BSD 3-Clause "New" or "Revised" License | 4 votes |
public DistinctTokenCount(final String tokenizerClass) throws InstantiationException, IllegalAccessException, ClassNotFoundException { tokenizer = TokenizerUtils.tokenizerForClass(tokenizerClass); }
Example #7
Source File: TokenizerTUI.java From tassal with BSD 3-Clause "New" or "Revised" License | 4 votes |
public static void main(final String[] args) throws InvalidInputException, IOException, InstantiationException, IllegalAccessException, ClassNotFoundException, IllegalArgumentException, SecurityException, InvocationTargetException, NoSuchMethodException { if (args.length < 2) { System.err .println("Usage <codeDir> <TokenizerClass> [TokenizerArgs]"); return; } final ITokenizer tok; final String tokenizerClass = args[1]; if (args.length == 2) { tok = TokenizerUtils.tokenizerForClass(tokenizerClass); } else { final String tokenizerArguments = args[2]; tok = TokenizerUtils.tokenizerForClass(tokenizerClass, tokenizerArguments); } final File baseFile = new File(args[0]); final Collection<File> allFiles; if (baseFile.isDirectory()) { allFiles = FileUtils.listFiles(baseFile, tok.getFileFilter(), DirectoryFileFilter.DIRECTORY); } else { allFiles = Lists.newArrayList(baseFile); } for (final File fi : allFiles) { final StringBuffer buf = new StringBuffer(); for (final FullToken token : tok.getTokenListFromCode(fi)) { buf.append(token); buf.append(System.getProperty("line.separator")); } System.out.println(buf.toString()); System.out.println(); } }
Example #8
Source File: DistinctTokenCount.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 4 votes |
public DistinctTokenCount(final String tokenizerClass) throws InstantiationException, IllegalAccessException, ClassNotFoundException { tokenizer = TokenizerUtils.tokenizerForClass(tokenizerClass); }
Example #9
Source File: TokenizerTUI.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 4 votes |
public static void main(final String[] args) throws InvalidInputException, IOException, InstantiationException, IllegalAccessException, ClassNotFoundException, IllegalArgumentException, SecurityException, InvocationTargetException, NoSuchMethodException { if (args.length < 2) { System.err .println("Usage <codeDir> <TokenizerClass> [TokenizerArgs]"); return; } final ITokenizer tok; final String tokenizerClass = args[1]; if (args.length == 2) { tok = TokenizerUtils.tokenizerForClass(tokenizerClass); } else { final String tokenizerArguments = args[2]; tok = TokenizerUtils.tokenizerForClass(tokenizerClass, tokenizerArguments); } final File baseFile = new File(args[0]); final Collection<File> allFiles; if (baseFile.isDirectory()) { allFiles = FileUtils.listFiles(baseFile, tok.getFileFilter(), DirectoryFileFilter.DIRECTORY); } else { allFiles = Lists.newArrayList(baseFile); } for (final File fi : allFiles) { final StringBuffer buf = new StringBuffer(); for (final FullToken token : tok.getTokenListFromCode(fi)) { buf.append(token); buf.append(System.getProperty("line.separator")); } System.out.println(buf.toString()); System.out.println(); } }