codemining.languagetools.ITokenizer.FullToken Java Examples
The following examples show how to use
codemining.languagetools.ITokenizer.FullToken.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: TokenizerUtils.java From api-mining with GNU General Public License v3.0 | 5 votes |
/** * Crudely join tokens together. * * @param tokens * @param sb * @return */ public final static StringBuffer joinFullTokens( final List<FullToken> tokens, final StringBuffer sb) { for (final FullToken token : tokens) { sb.append(token.token); sb.append(" "); } return sb; }
Example #2
Source File: TokenizerUtils.java From api-mining with GNU General Public License v3.0 | 5 votes |
/** * Remove the sentence start/end FullTokens. * * @param tokenSequence */ public static final void removeSentenceStartEndFullTokens( final List<FullToken> tokenSequence) { checkArgument(tokenSequence.get(0).token .equals(ITokenizer.SENTENCE_START)); tokenSequence.remove(0); checkArgument(tokenSequence.get(tokenSequence.size() - 1).token .equals(ITokenizer.SENTENCE_END)); tokenSequence.remove(tokenSequence.size() - 1); }
Example #3
Source File: TokenizerUtils.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Remove the sentence start/end FullTokens. * * @param tokenSequence */ public static final void removeSentenceStartEndFullTokens( final List<FullToken> tokenSequence) { checkArgument(tokenSequence.get(0).token .equals(ITokenizer.SENTENCE_START)); tokenSequence.remove(0); checkArgument(tokenSequence.get(tokenSequence.size() - 1).token .equals(ITokenizer.SENTENCE_END)); tokenSequence.remove(tokenSequence.size() - 1); }
Example #4
Source File: IdentifierNeighborsNGramLM.java From naturalize with BSD 3-Clause "New" or "Revised" License | 5 votes |
public void addRelevantNGrams(final List<FullToken> lst) { final SortedSet<Integer> identifierPositions = new TreeSet<Integer>(); final List<String> sentence = Lists.newArrayList(); for (int i = 0; i < lst.size(); i++) { final FullToken fullToken = lst.get(i); sentence.add(fullToken.token); if (fullToken.tokenType.equals(tokenizer.getIdentifierType())) { identifierPositions.add(i); } } // Construct the rest for (int i = 0; i < sentence.size(); i++) { // Filter n-grams with no identifiers if (identifierPositions.subSet(i - getN() + 1, i + 1).isEmpty()) { continue; } final NGram<String> ngram = NGram.constructNgramAt(i, sentence, getN()); if (ngram.size() > 1) { addNgram(ngram, false); } } }
Example #5
Source File: IdentifierNeighborsNGramLM.java From naturalize with BSD 3-Clause "New" or "Revised" License | 5 votes |
@Override public void run() { LOGGER.finer("Reading file " + codeFile.getAbsolutePath()); try { final List<FullToken> tokens = tokenizer .getTokenListFromCode(codeFile); addRelevantNGrams(tokens); } catch (final IOException e) { LOGGER.warning(ExceptionUtils.getFullStackTrace(e)); } }
Example #6
Source File: AbstractJavaNameBindingsExtractor.java From tassal with BSD 3-Clause "New" or "Revised" License | 5 votes |
public final List<TokenNameBinding> getNameBindings(final ASTNode node, final File file) throws IOException { final Set<Set<ASTNode>> nodeBindings = getNameBindings(node); final SortedMap<Integer, String> tokenPositions = Maps.transformValues( tokenizer.tokenListWithPos(file), FullToken.TOKEN_NAME_CONVERTER); return getTokenBindings(tokenPositions, nodeBindings); }
Example #7
Source File: AbstractJavaNameBindingsExtractor.java From api-mining with GNU General Public License v3.0 | 5 votes |
public final List<TokenNameBinding> getNameBindings(final ASTNode node, final File file) throws IOException { final Set<Set<ASTNode>> nodeBindings = getNameBindings(node); final SortedMap<Integer, String> tokenPositions = Maps.transformValues( tokenizer.tokenListWithPos(file), FullToken.TOKEN_NAME_CONVERTER); return getTokenBindings(tokenPositions, nodeBindings); }
Example #8
Source File: TokenizerUtils.java From tassal with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Crudely join tokens together. * * @param tokens * @param sb * @return */ public final static StringBuffer joinFullTokens( final List<FullToken> tokens, final StringBuffer sb) { for (final FullToken token : tokens) { sb.append(token.token); sb.append(" "); } return sb; }
Example #9
Source File: TokenizerUtils.java From tassal with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Remove the sentence start/end FullTokens. * * @param tokenSequence */ public static final void removeSentenceStartEndFullTokens( final List<FullToken> tokenSequence) { checkArgument(tokenSequence.get(0).token .equals(ITokenizer.SENTENCE_START)); tokenSequence.remove(0); checkArgument(tokenSequence.get(tokenSequence.size() - 1).token .equals(ITokenizer.SENTENCE_END)); tokenSequence.remove(tokenSequence.size() - 1); }
Example #10
Source File: TokenizerUtils.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 5 votes |
/** * Crudely join tokens together. * * @param tokens * @param sb * @return */ public final static StringBuffer joinFullTokens( final List<FullToken> tokens, final StringBuffer sb) { for (final FullToken token : tokens) { sb.append(token.token); sb.append(" "); } return sb; }
Example #11
Source File: AbstractJavaNameBindingsExtractor.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 5 votes |
public final List<TokenNameBinding> getNameBindings(final ASTNode node, final File file) throws IOException { final Set<Set<ASTNode>> nodeBindings = getNameBindings(node); final SortedMap<Integer, String> tokenPositions = Maps.transformValues( tokenizer.tokenListWithPos(file), FullToken.TOKEN_NAME_CONVERTER); return getTokenBindings(tokenPositions, nodeBindings); }
Example #12
Source File: TokenizerTUI.java From tassal with BSD 3-Clause "New" or "Revised" License | 4 votes |
public static void main(final String[] args) throws InvalidInputException, IOException, InstantiationException, IllegalAccessException, ClassNotFoundException, IllegalArgumentException, SecurityException, InvocationTargetException, NoSuchMethodException { if (args.length < 2) { System.err .println("Usage <codeDir> <TokenizerClass> [TokenizerArgs]"); return; } final ITokenizer tok; final String tokenizerClass = args[1]; if (args.length == 2) { tok = TokenizerUtils.tokenizerForClass(tokenizerClass); } else { final String tokenizerArguments = args[2]; tok = TokenizerUtils.tokenizerForClass(tokenizerClass, tokenizerArguments); } final File baseFile = new File(args[0]); final Collection<File> allFiles; if (baseFile.isDirectory()) { allFiles = FileUtils.listFiles(baseFile, tok.getFileFilter(), DirectoryFileFilter.DIRECTORY); } else { allFiles = Lists.newArrayList(baseFile); } for (final File fi : allFiles) { final StringBuffer buf = new StringBuffer(); for (final FullToken token : tok.getTokenListFromCode(fi)) { buf.append(token); buf.append(System.getProperty("line.separator")); } System.out.println(buf.toString()); System.out.println(); } }
Example #13
Source File: CodePrinter.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** * Return a StringBuffer with colored tokens as specified from the * coloredTokens. There should be one-to-one correspondence with the actual * tokens. */ public StringBuffer getHTMLwithColors( final List<ColoredToken> coloredTokens, final File codeFile) throws IOException, InstantiationException, IllegalAccessException { final String code = FileUtils.readFileToString(codeFile); lineNumber = 1; final StringBuffer buf = new StringBuffer(); final SortedMap<Integer, FullToken> toks = tokenizer .fullTokenListWithPos(code.toCharArray()); int i = 0; int prevPos = 0; buf.append("<html>\n<head>\n<link href='http://fonts.googleapis.com/css?family=Source+Code+Pro:300,400,500,600,700,800,900' rel='stylesheet' type='text/css'>\n"); buf.append(CSS_STYLE); buf.append("</head>\n<body style='background-color:rgb(" + documentBackgroundColor.getRed() + "," + documentBackgroundColor.getGreen() + "," + documentBackgroundColor.getBlue() + ")'>"); appendLineDiv(buf, false); for (final Entry<Integer, FullToken> entry : toks.entrySet()) { if (i == 0 || entry.getKey() == Integer.MAX_VALUE) { i++; continue; } addSlack(code.substring(prevPos, entry.getKey()), buf); final ColoredToken tok = coloredTokens.get(i); buf.append("<span style='background-color:rgba(" + tok.bgColor.getRed() + "," + tok.bgColor.getGreen() + "," + tok.bgColor.getBlue() + "," + (ignoreTokBG ? "0" : "1") + "); color:rgb(" + tok.fontColor.getRed() + "," + tok.fontColor.getGreen() + "," + tok.fontColor.getBlue() + "); " + tok.extraStyle + "'>" + StringEscapeUtils.escapeHtml(entry.getValue().token) + "</span>"); i++; prevPos = entry.getKey() + entry.getValue().token.length(); } buf.append("</div></body></html>"); return buf; }
Example #14
Source File: TokenizerTUI.java From codemining-core with BSD 3-Clause "New" or "Revised" License | 4 votes |
public static void main(final String[] args) throws InvalidInputException, IOException, InstantiationException, IllegalAccessException, ClassNotFoundException, IllegalArgumentException, SecurityException, InvocationTargetException, NoSuchMethodException { if (args.length < 2) { System.err .println("Usage <codeDir> <TokenizerClass> [TokenizerArgs]"); return; } final ITokenizer tok; final String tokenizerClass = args[1]; if (args.length == 2) { tok = TokenizerUtils.tokenizerForClass(tokenizerClass); } else { final String tokenizerArguments = args[2]; tok = TokenizerUtils.tokenizerForClass(tokenizerClass, tokenizerArguments); } final File baseFile = new File(args[0]); final Collection<File> allFiles; if (baseFile.isDirectory()) { allFiles = FileUtils.listFiles(baseFile, tok.getFileFilter(), DirectoryFileFilter.DIRECTORY); } else { allFiles = Lists.newArrayList(baseFile); } for (final File fi : allFiles) { final StringBuffer buf = new StringBuffer(); for (final FullToken token : tok.getTokenListFromCode(fi)) { buf.append(token); buf.append(System.getProperty("line.separator")); } System.out.println(buf.toString()); System.out.println(); } }
Example #15
Source File: CodePrinter.java From tassal with BSD 3-Clause "New" or "Revised" License | 4 votes |
/** * Return a StringBuffer with colored tokens as specified from the * coloredTokens. There should be one-to-one correspondence with the actual * tokens. */ public StringBuffer getHTMLwithColors( final List<ColoredToken> coloredTokens, final File codeFile) throws IOException, InstantiationException, IllegalAccessException { final String code = FileUtils.readFileToString(codeFile); lineNumber = 1; final StringBuffer buf = new StringBuffer(); final SortedMap<Integer, FullToken> toks = tokenizer .fullTokenListWithPos(code.toCharArray()); int i = 0; int prevPos = 0; buf.append("<html>\n<head>\n<link href='http://fonts.googleapis.com/css?family=Source+Code+Pro:300,400,500,600,700,800,900' rel='stylesheet' type='text/css'>\n"); buf.append(CSS_STYLE); buf.append("</head>\n<body style='background-color:rgb(" + documentBackgroundColor.getRed() + "," + documentBackgroundColor.getGreen() + "," + documentBackgroundColor.getBlue() + ")'>"); appendLineDiv(buf, false); for (final Entry<Integer, FullToken> entry : toks.entrySet()) { if (i == 0 || entry.getKey() == Integer.MAX_VALUE) { i++; continue; } addSlack(code.substring(prevPos, entry.getKey()), buf); final ColoredToken tok = coloredTokens.get(i); buf.append("<span style='background-color:rgba(" + tok.bgColor.getRed() + "," + tok.bgColor.getGreen() + "," + tok.bgColor.getBlue() + "," + (ignoreTokBG ? "0" : "1") + "); color:rgb(" + tok.fontColor.getRed() + "," + tok.fontColor.getGreen() + "," + tok.fontColor.getBlue() + "); " + tok.extraStyle + "'>" + StringEscapeUtils.escapeHtml(entry.getValue().token) + "</span>"); i++; prevPos = entry.getKey() + entry.getValue().token.length(); } buf.append("</div></body></html>"); return buf; }
Example #16
Source File: CodePrinter.java From api-mining with GNU General Public License v3.0 | 4 votes |
/** * Return a StringBuffer with colored tokens as specified from the * coloredTokens. There should be one-to-one correspondence with the actual * tokens. */ public StringBuffer getHTMLwithColors( final List<ColoredToken> coloredTokens, final File codeFile) throws IOException, InstantiationException, IllegalAccessException { final String code = FileUtils.readFileToString(codeFile); lineNumber = 1; final StringBuffer buf = new StringBuffer(); final SortedMap<Integer, FullToken> toks = tokenizer .fullTokenListWithPos(code.toCharArray()); int i = 0; int prevPos = 0; buf.append("<html>\n<head>\n<link href='http://fonts.googleapis.com/css?family=Source+Code+Pro:300,400,500,600,700,800,900' rel='stylesheet' type='text/css'>\n"); buf.append(CSS_STYLE); buf.append("</head>\n<body style='background-color:rgb(" + documentBackgroundColor.getRed() + "," + documentBackgroundColor.getGreen() + "," + documentBackgroundColor.getBlue() + ")'>"); appendLineDiv(buf, false); for (final Entry<Integer, FullToken> entry : toks.entrySet()) { if (i == 0 || entry.getKey() == Integer.MAX_VALUE) { i++; continue; } addSlack(code.substring(prevPos, entry.getKey()), buf); final ColoredToken tok = coloredTokens.get(i); buf.append("<span style='background-color:rgba(" + tok.bgColor.getRed() + "," + tok.bgColor.getGreen() + "," + tok.bgColor.getBlue() + "," + (ignoreTokBG ? "0" : "1") + "); color:rgb(" + tok.fontColor.getRed() + "," + tok.fontColor.getGreen() + "," + tok.fontColor.getBlue() + "); " + tok.extraStyle + "'>" + StringEscapeUtils.escapeHtml(entry.getValue().token) + "</span>"); i++; prevPos = entry.getKey() + entry.getValue().token.length(); } buf.append("</div></body></html>"); return buf; }
Example #17
Source File: TokenizerTUI.java From api-mining with GNU General Public License v3.0 | 4 votes |
public static void main(final String[] args) throws InvalidInputException, IOException, InstantiationException, IllegalAccessException, ClassNotFoundException, IllegalArgumentException, SecurityException, InvocationTargetException, NoSuchMethodException { if (args.length < 2) { System.err .println("Usage <codeDir> <TokenizerClass> [TokenizerArgs]"); return; } final ITokenizer tok; final String tokenizerClass = args[1]; if (args.length == 2) { tok = TokenizerUtils.tokenizerForClass(tokenizerClass); } else { final String tokenizerArguments = args[2]; tok = TokenizerUtils.tokenizerForClass(tokenizerClass, tokenizerArguments); } final File baseFile = new File(args[0]); final Collection<File> allFiles; if (baseFile.isDirectory()) { allFiles = FileUtils.listFiles(baseFile, tok.getFileFilter(), DirectoryFileFilter.DIRECTORY); } else { allFiles = Lists.newArrayList(baseFile); } for (final File fi : allFiles) { final StringBuffer buf = new StringBuffer(); for (final FullToken token : tok.getTokenListFromCode(fi)) { buf.append(token); buf.append(System.getProperty("line.separator")); } System.out.println(buf.toString()); System.out.println(); } }