codemining.languagetools.ITokenizer Java Examples
The following examples show how to use
codemining.languagetools.ITokenizer.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: CommonNameRenamingEvaluator.java From naturalize with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * @param args */ public static void main(String[] args) { if (args.length < 5) { System.err.println("Usage <folder> <junkNames> ..."); return; } final File directory = new File(args[0]); final ITokenizer tokenizer = new JavaTokenizer(); final Class<? extends AbstractNGramLM> smoothedNgramClass = StupidBackoff.class; final CommonNameRenamingEvaluator evaluator = new CommonNameRenamingEvaluator( directory, tokenizer, smoothedNgramClass, Sets.newTreeSet(Arrays.asList(args).subList(1, args.length))); evaluator.evaluate(); evaluator.printResults(); }
Example #2
Source File: ScopedIdentifierRenaming.java From api-mining with GNU General Public License v3.0 | 6 votes |
/** * Crudely rename the name of an identifier by searching for similarly named * tokens. * * @param snippet * @param variableMapping * from, to * @return */ private String renameVariableInSnippet(final String snippet, final Map<String, String> variableMapping) { final List<String> tokens = tokenizer.tokenListFromCode(snippet .toCharArray()); final StringBuffer bf = new StringBuffer(); for (final String token : tokens) { if (variableMapping.containsKey(token)) { bf.append(variableMapping.get(token)); } else if (token.equals(ITokenizer.SENTENCE_START) || token.equals(ITokenizer.SENTENCE_END)) { continue; } else { bf.append(token); } bf.append(" "); } return bf.toString(); }
Example #3
Source File: JavaASTExtractor.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * Hacky way to compare snippets. * * @param snippet * @return */ private String normalizeCode(final char[] snippet) { final List<String> tokens = (new JavaTokenizer()) .tokenListFromCode(snippet); final StringBuffer bf = new StringBuffer(); for (final String token : tokens) { if (token.equals(ITokenizer.SENTENCE_START) || token.equals(ITokenizer.SENTENCE_END)) { continue; } else { bf.append(token); } bf.append(" "); } return bf.toString(); }
Example #4
Source File: HumanEvaluationOutput.java From naturalize with BSD 3-Clause "New" or "Revised" License | 6 votes |
/** * @param args * @throws IllegalAccessException * @throws InstantiationException * @throws ClassNotFoundException */ public static void main(String[] args) throws InstantiationException, IllegalAccessException, ClassNotFoundException { if (args.length < 5) { System.err .println("Usage: <projectDir> <tokenizerClass> variable|method|class examplesToGenerate <renamerClass> [renamerParams]"); return; } final File directory = new File(args[0]); final IScopeExtractor extractor = ScopesTUI .getScopeExtractorByName(args[2]); final long nExamples = Long.parseLong(args[3]); final Class<? extends ITokenizer> tokenizerName = (Class<? extends ITokenizer>) Class .forName(args[1]); final ITokenizer tokenizer = tokenizerName.newInstance(); final String renamerClass = args[4]; final HumanEvaluationOutput heo = new HumanEvaluationOutput(directory, extractor, renamerClass, args.length == 5 ? null : args[5], tokenizer, nExamples); heo.getOutput(); }
Example #5
Source File: SegmentRenamingSuggestion.java From naturalize with BSD 3-Clause "New" or "Revised" License | 6 votes |
public static SortedSet<Suggestion> getVariableSuggestions( final File currentFile, final File directory, final boolean useUNK) throws IOException { final ITokenizer tokenizer = new JavaTokenizer(); final AbstractIdentifierRenamings renamer = new BaseIdentifierRenamings( tokenizer); final Collection<java.io.File> trainingFiles = FileUtils.listFiles( directory, tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY); trainingFiles.remove(currentFile); renamer.buildRenamingModel(trainingFiles); final IScopeExtractor scopeExtractor = new VariableScopeExtractor.VariableScopeSnippetExtractor(); final SegmentRenamingSuggestion suggestion = new SegmentRenamingSuggestion( renamer, scopeExtractor, useUNK); return suggestion.rankSuggestions(currentFile); }
Example #6
Source File: SegmentRenamingSuggestion.java From naturalize with BSD 3-Clause "New" or "Revised" License | 5 votes |
public static void main(final String[] args) throws IllegalArgumentException, SecurityException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException, ClassNotFoundException, IOException { if (args.length < 4) { System.err .println("Usage <TestFile> <TrainDirectory> <renamerClass> variable|method"); return; } final ITokenizer tokenizer = new JavaTokenizer(); final AbstractIdentifierRenamings renamer = (AbstractIdentifierRenamings) Class .forName(args[2]).getDeclaredConstructor(ITokenizer.class) .newInstance(tokenizer); renamer.buildRenamingModel(FileUtils.listFiles(new File(args[1]), tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY)); final IScopeExtractor scopeExtractor = ScopesTUI .getScopeExtractorByName(args[3]); final SegmentRenamingSuggestion suggestion = new SegmentRenamingSuggestion( renamer, scopeExtractor, true); System.out.println(suggestion.rankSuggestions(new File(args[0]))); }
Example #7
Source File: CDTTokenizer.java From tassal with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public FullToken getTokenFromString(final String token) { if (token.equals(ITokenizer.SENTENCE_START)) { return new FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START); } if (token.equals(ITokenizer.SENTENCE_END)) { return new FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END); } return getTokenListFromCode(token.toCharArray()).get(1); }
Example #8
Source File: JavaTokenizer.java From tassal with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public FullToken getTokenFromString(final String token) { if (token.equals(ITokenizer.SENTENCE_START)) { return new FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START); } if (token.equals(ITokenizer.SENTENCE_END)) { return new FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END); } return getTokenListFromCode(token.toCharArray()).get(1); }
Example #9
Source File: TokenizeJavaCodeTest.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * @param tokenizer */ protected void testSample2(ITokenizer tokenizer) { final List<String> tok = tokenizer.tokenListFromCode(CODE_SAMPLE2); for (int i = 0; i < TOKENS_SAMPLE2.length; i++) { assertEquals(tok.get(i), TOKENS_SAMPLE2[i]); } }
Example #10
Source File: TokenizeJavaCodeTest.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testTokenTypes() { ITokenizer tokenizer = new JavaTokenizer(); assertEquals( tokenizer.getTokenFromString("hello"), new ITokenizer.FullToken("hello", tokenizer.getIdentifierType())); assertEquals( tokenizer.getTokenFromString("{"), new ITokenizer.FullToken("{", Integer .toString(ITerminalSymbols.TokenNameLBRACE))); }
Example #11
Source File: TokenizeJavascriptCodeTest.java From tassal with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * @param tokenizer */ protected void testSample3(final ITokenizer tokenizer) { final List<String> tok = tokenizer.tokenListFromCode(CODE_SAMPLE3); for (int i = 0; i < TOKENS_SAMPLE1.length; i++) { assertEquals(tok.get(i), TOKENS_SAMPLE1[i]); } assertEquals(tok.size(), TOKENS_SAMPLE1.length); }
Example #12
Source File: TokenizeJavaCodeTest.java From tassal with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * @param tokenizer */ protected void testSample3(ITokenizer tokenizer) { final List<String> tok = tokenizer.tokenListFromCode(CODE_SAMPLE3); for (int i = 0; i < TOKENS_SAMPLE1.length; i++) { assertEquals(tok.get(i), TOKENS_SAMPLE1[i]); } assertEquals(tok.size(), TOKENS_SAMPLE1.length); }
Example #13
Source File: DynamicRangeEval.java From naturalize with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * */ public DynamicRangeEval(final File directory, final ITokenizer tokenizer, final IScopeExtractor ex) { allFiles = FileUtils.listFiles(directory, tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY); scopeExtractor = ex; }
Example #14
Source File: PerturbationEvaluator.java From naturalize with BSD 3-Clause "New" or "Revised" License | 5 votes |
public PerturbationEvaluator(final File directory, final ITokenizer tokenizer, final IScopeExtractor scopeExtractor, final String renamerClass) { allFiles = FileUtils.listFiles(directory, tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY); this.tokenizer = tokenizer; this.scopeExtractor = scopeExtractor; this.renamerClass = renamerClass; varRenamer = new ScopedIdentifierRenaming(scopeExtractor, ParseType.COMPILATION_UNIT); }
Example #15
Source File: JavascriptTokenizer.java From tassal with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public FullToken getTokenFromString(final String token) { if (token.equals(ITokenizer.SENTENCE_START)) { return new FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START); } if (token.equals(ITokenizer.SENTENCE_END)) { return new FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END); } return getTokenListFromCode(token.toCharArray()).get(1); }
Example #16
Source File: JavaMethodDeclarationBindingExtractor.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 5 votes |
public JavaMethodDeclarationBindingExtractor(final ITokenizer tokenizer, final boolean includeOverrides, final File inputFolder) { super(tokenizer); this.includeOverrides = includeOverrides; if (!includeOverrides) { pti = buildProjectTypeInformation(inputFolder); } else { pti = null; } }
Example #17
Source File: JavaTokenizer.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public FullToken getTokenFromString(final String token) { if (token.equals(ITokenizer.SENTENCE_START)) { return new FullToken(ITokenizer.SENTENCE_START, ITokenizer.SENTENCE_START); } if (token.equals(ITokenizer.SENTENCE_END)) { return new FullToken(ITokenizer.SENTENCE_END, ITokenizer.SENTENCE_END); } return getTokenListFromCode(token.toCharArray()).get(1); }
Example #18
Source File: CommonNameRenamingEvaluator.java From naturalize with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * @param smoothedNgramClass * @param tokenizer * @param directory * */ public CommonNameRenamingEvaluator(File directory, ITokenizer tokenizer, Class<? extends AbstractNGramLM> smoothedNgramClass, Set<String> junkVariables) { allFiles = FileUtils.listFiles(directory, tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY); this.tokenizer = tokenizer; this.smoothedNgramClass = smoothedNgramClass; this.junkVariables = junkVariables; }
Example #19
Source File: CppWhitespaceTokenizer.java From tassal with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public FullToken getTokenFromString(final String token) { if (token.equals(ITokenizer.SENTENCE_START)) { return new FullToken(ITokenizer.SENTENCE_START, SENTENCE_START); } if (token.equals(ITokenizer.SENTENCE_END)) { return new FullToken(ITokenizer.SENTENCE_END, SENTENCE_END); } return getTokenListFromCode(token.toCharArray()).get(1); }
Example #20
Source File: BaseIdentifierRenamings.java From naturalize with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * */ public BaseIdentifierRenamings(final ITokenizer tokenizer) { super(); this.tokenizer = tokenizer; try { smoothedNgramClass = (Class<? extends AbstractNGramLM>) Class .forName(SettingsLoader.getStringSetting( "ngramSmootherClass", "codemining.lm.ngram.smoothing.StupidBackoff")); } catch (final ClassNotFoundException e) { LOGGER.severe(ExceptionUtils.getFullStackTrace(e)); throw new IllegalArgumentException(e); } }
Example #21
Source File: LeaveOneOutEvaluator.java From naturalize with BSD 3-Clause "New" or "Revised" License | 5 votes |
public LeaveOneOutEvaluator(final File directory, final ITokenizer tokenizer, final Class<? extends AbstractNGramLM> smoother) { allFiles = FileUtils.listFiles(directory, tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY); this.tokenizer = tokenizer; for (int i = 0; i < data.length; i++) { data[i] = new ResultObject(); } }
Example #22
Source File: LeaveOneOutEvaluator.java From naturalize with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * @param args * @throws IllegalAccessException * @throws InstantiationException * @throws ClassNotFoundException */ public static void main(String[] args) throws InstantiationException, IllegalAccessException, ClassNotFoundException, SerializationException { if (args.length < 5) { System.err .println("Usage <folder> <tokenizerClass> <wrapperClass> variable|method <renamingClass> [<renamerConstrParams> ..]"); return; } final File directory = new File(args[0]); final Class<? extends ITokenizer> tokenizerName = (Class<? extends ITokenizer>) Class .forName(args[1]); final ITokenizer tokenizer = tokenizerName.newInstance(); final Class<? extends AbstractNGramLM> smoothedNgramClass = (Class<? extends AbstractNGramLM>) Class .forName(args[2]); final LeaveOneOutEvaluator eval = new LeaveOneOutEvaluator(directory, tokenizer, smoothedNgramClass); final IScopeExtractor scopeExtractor = ScopesTUI .getScopeExtractorByName(args[3]); eval.performEvaluation(scopeExtractor, args[4], args.length == 6 ? args[5] : null); }
Example #23
Source File: TokenizeJavaCodeTest.java From tassal with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * @param tokenizer */ protected void testSample1Position(ITokenizer tokenizer) { final Map<Integer, String> toks = tokenizer .tokenListWithPos(CODE_SAMPLE1); for (int i = 0; i < TOKEN_POS_SAMPLE1.length; i++) { assertTrue(toks.containsKey(TOKEN_POS_SAMPLE1[i])); assertEquals(toks.get(TOKEN_POS_SAMPLE1[i]), TOKENS_SAMPLE1[i]); } assertEquals(toks.size(), TOKENS_SAMPLE1.length); }
Example #24
Source File: SelectionSuggestionEval.java From naturalize with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * */ public SelectionSuggestionEval(final File directory, final ITokenizer codeTokenizer, final IScopeExtractor extractor) { tokenizer = codeTokenizer; allFiles = FileUtils.listFiles(directory, tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY); scopeExtractor = extractor; }
Example #25
Source File: TokenizeJavascriptCodeTest.java From tassal with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Test public void testTokenTypes() { final ITokenizer tokenizer = new JavascriptTokenizer(); assertEquals( tokenizer.getTokenFromString("hello"), new ITokenizer.FullToken("hello", tokenizer.getIdentifierType())); assertEquals( tokenizer.getTokenFromString("{"), new ITokenizer.FullToken("{", Integer .toString(ITerminalSymbols.TokenNameLBRACE))); }
Example #26
Source File: TokenizeJavascriptCodeTest.java From api-mining with GNU General Public License v3.0 | 5 votes |
@Test public void testTokenTypes() { final ITokenizer tokenizer = new JavascriptTokenizer(); assertEquals( tokenizer.getTokenFromString("hello"), new ITokenizer.FullToken("hello", tokenizer.getIdentifierType())); assertEquals( tokenizer.getTokenFromString("{"), new ITokenizer.FullToken("{", Integer .toString(ITerminalSymbols.TokenNameLBRACE))); }
Example #27
Source File: TokenizeJavascriptCodeTest.java From tassal with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * @param tokenizer */ protected void testSample2(final ITokenizer tokenizer) { final List<String> tok = tokenizer.tokenListFromCode(CODE_SAMPLE2); for (int i = 0; i < TOKENS_SAMPLE2.length; i++) { assertEquals(tok.get(i), TOKENS_SAMPLE2[i]); } }
Example #28
Source File: TokenizeJavascriptCodeTest.java From api-mining with GNU General Public License v3.0 | 5 votes |
/** * @param tokenizer */ protected void testSample1Position(final ITokenizer tokenizer) { final Map<Integer, String> toks = tokenizer .tokenListWithPos(CODE_SAMPLE1); for (int i = 0; i < TOKEN_POS_SAMPLE1.length; i++) { assertTrue(toks.containsKey(TOKEN_POS_SAMPLE1[i])); assertEquals(toks.get(TOKEN_POS_SAMPLE1[i]), TOKENS_SAMPLE1[i]); } assertEquals(toks.size(), TOKENS_SAMPLE1.length); }
Example #29
Source File: TokenizeJavaCodeTest.java From api-mining with GNU General Public License v3.0 | 5 votes |
@Test public void testTokenTypes() { ITokenizer tokenizer = new JavaTokenizer(); assertEquals( tokenizer.getTokenFromString("hello"), new ITokenizer.FullToken("hello", tokenizer.getIdentifierType())); assertEquals( tokenizer.getTokenFromString("{"), new ITokenizer.FullToken("{", Integer .toString(ITerminalSymbols.TokenNameLBRACE))); }
Example #30
Source File: TokenCounter.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * @param args * @throws IOException * @throws ClassNotFoundException * @throws IllegalAccessException * @throws InstantiationException */ public static void main(final String[] args) throws IOException, InstantiationException, IllegalAccessException, ClassNotFoundException { if (args.length != 2) { System.err.println("Usage <codeDir> <TokenizerClass>"); return; } long tokenCount = 0; final ITokenizer tokenizer = TokenizerUtils.tokenizerForClass(args[1]); for (final File fi : FileUtils.listFiles(new File(args[0]), tokenizer.getFileFilter(), DirectoryFileFilter.DIRECTORY)) { try { final char[] code = FileUtils.readFileToString(fi) .toCharArray(); tokenCount += tokenizer.tokenListFromCode(code).size() - 2; // Remove // sentence // start/end } catch (final IOException e) { LOGGER.warning(ExceptionUtils.getFullStackTrace(e)); } } System.out.println("Tokens: " + tokenCount); }