org.apache.oro.text.regex.PatternMatcherInput Java Examples
The following examples show how to use
org.apache.oro.text.regex.PatternMatcherInput.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: RegularMatch.java From anyline with Apache License 2.0 | 6 votes |
/** * 提取子串 * @param src 输入字符串 src 输入字符串 * @param regx 表达式 regx 表达式 * @return return */ public List<List<String>> fetchs(String src, String regx){ List<List<String>> list = new ArrayList<List<String>>(); try{ Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.DEFAULT_MASK); PatternMatcher matcher = new Perl5Matcher(); PatternMatcherInput input = new PatternMatcherInput(src); while(matcher.matches(input, pattern)){ MatchResult matchResult = matcher.getMatch(); int groups = matchResult.groups(); List<String> item = new ArrayList<String>(); for(int i=0; i<=groups; i++){ item.add(matchResult.group(i)); } list.add(item); } }catch(Exception e){ if(ConfigTable.isDebug() && log.isWarnEnabled()){ e.printStackTrace(); } } return list; }
Example #2
Source File: RegularMatch.java From anyline with Apache License 2.0 | 6 votes |
/** * 提取子串 * @param src 输入字符串 src 输入字符串 * @param regx 表达式 regx 表达式 * @param idx 指定提取位置 idx 指定提取位置 * @return return */ public List<String> fetch(String src, String regx, int idx){ List<String> list = new ArrayList<String>(); try{ Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.DEFAULT_MASK); PatternMatcher matcher = new Perl5Matcher(); PatternMatcherInput input = new PatternMatcherInput(src); while(matcher.matches(input, pattern)){ MatchResult matchResult = matcher.getMatch(); list.add(matchResult.group(idx)); } }catch(Exception e){ if(ConfigTable.isDebug() && log.isWarnEnabled()){ e.printStackTrace(); } } return list; }
Example #3
Source File: RegularUtil.java From anyline with Apache License 2.0 | 6 votes |
/** * 字符串下标 regx在src中首次出现的位置 * @param src src * @param regx regx * @param begin 有效开始位置 * @return return */ public static int indexOf(String src, String regx, int begin){ int idx = -1; try{ PatternCompiler patternCompiler = new Perl5Compiler(); Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); PatternMatcher matcher = new Perl5Matcher(); PatternMatcherInput input = new PatternMatcherInput(src); while(matcher.contains(input, pattern)){ MatchResult matchResult = matcher.getMatch(); int tmp = matchResult.beginOffset(0); if(tmp >= begin){//匹配位置从begin开始 idx = tmp; break; } } }catch(Exception e){ log.error("[提取异常][src:{}][regx:{}]", src, regx); e.printStackTrace(); } return idx; }
Example #4
Source File: RegularMatchPrefix.java From anyline with Apache License 2.0 | 6 votes |
/** * 提取子串 * @param src 输入字符串 src 输入字符串 * @param regx 表达式 regx 表达式 * @return return */ public List<List<String>> fetchs(String src, String regx){ List<List<String>> list = new ArrayList<List<String>>(); try{ Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); PatternMatcher matcher = new Perl5Matcher(); PatternMatcherInput input = new PatternMatcherInput(src); while(matcher.matchesPrefix(input, pattern)){ MatchResult matchResult = matcher.getMatch(); int groups = matchResult.groups(); List<String> item = new ArrayList<String>(); for(int i=0; i<=groups; i++){ item.add(matchResult.group(i)); } list.add(item); } }catch(Exception e){ if(ConfigTable.isDebug() && log.isWarnEnabled()){ e.printStackTrace(); } } return list; }
Example #5
Source File: RegularMatchPrefix.java From anyline with Apache License 2.0 | 6 votes |
/** * 提取子串 * @param src 输入字符串 src 输入字符串 * @param regx 表达式 regx 表达式 * @param idx 指定提取位置 idx 指定提取位置 * @return return */ public List<String> fetch(String src, String regx, int idx){ List<String> list = new ArrayList<String>(); try{ Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); PatternMatcher matcher = new Perl5Matcher(); PatternMatcherInput input = new PatternMatcherInput(src); while(matcher.matchesPrefix(input, pattern)){ MatchResult matchResult = matcher.getMatch(); list.add(matchResult.group(idx)); } }catch(Exception e){ if(ConfigTable.isDebug() && log.isWarnEnabled()){ e.printStackTrace(); } } return list; }
Example #6
Source File: RegularContain.java From anyline with Apache License 2.0 | 6 votes |
/** * 提取子串 * @param src 输入字符串 src 输入字符串 * @param regx 表达式 regx 表达式 * @return return */ public List<List<String>> fetchs(String src, String regx){ List<List<String>> list = new ArrayList<List<String>>(); try{ Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); PatternMatcher matcher = new Perl5Matcher(); PatternMatcherInput input = new PatternMatcherInput(src); while(matcher.contains(input, pattern)){ MatchResult matchResult = matcher.getMatch(); int groups = matchResult.groups(); List<String> item = new ArrayList<String>(); for(int i=0; i<groups; i++){ item.add(matchResult.group(i)); } list.add(item); } }catch(Exception e){ log.error("[提取异常][src:{}][reg:{}]", src, regx); if(ConfigTable.isDebug() && log.isWarnEnabled()){ e.printStackTrace(); } } return list; }
Example #7
Source File: RegularContain.java From anyline with Apache License 2.0 | 6 votes |
/** * 提取子串 * @param src 输入字符串 src 输入字符串 * @param regx 表达式 regx 表达式 * @param idx 指定提取位置 idx 指定提取位置 * @return return */ public List<String> fetch(String src, String regx, int idx) throws Exception{ List<String> list = new ArrayList<String>(); try{ Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); PatternMatcher matcher = new Perl5Matcher(); PatternMatcherInput input = new PatternMatcherInput(src); while(matcher.contains(input, pattern)){ MatchResult matchResult = matcher.getMatch(); list.add(matchResult.group(idx)); } }catch(Exception e){ log.error("[提取异常][src:{}][reg:{}]", src, regx); if(ConfigTable.isDebug() && log.isWarnEnabled()){ e.printStackTrace(); } throw e; } return list; }
Example #8
Source File: RegxpContain.java From anyline with Apache License 2.0 | 6 votes |
/** * 提取子串 * @param src 输入字符串 src 输入字符串 * @param regx 表达式 regx 表达式 * @return return */ public List<List<String>> fetchs(String src, String regx){ List<List<String>> list = new ArrayList<List<String>>(); try{ Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); PatternMatcher matcher = new Perl5Matcher(); PatternMatcherInput input = new PatternMatcherInput(src); while(matcher.contains(input, pattern)){ MatchResult matchResult = matcher.getMatch(); int groups = matchResult.groups(); List<String> item = new ArrayList<String>(); for(int i=0; i<groups; i++){ item.add(matchResult.group(i)); } list.add(item); } }catch(Exception e){ log.error("[提取异常][src:{}][reg:{}]", src, regx); e.printStackTrace(); } return list; }
Example #9
Source File: RegxpContain.java From anyline with Apache License 2.0 | 6 votes |
/** * 提取子串 * @param src 输入字符串 src 输入字符串 * @param regx 表达式 regx 表达式 * @param idx 指定提取位置 idx 指定提取位置 * @return return */ public List<String> fetch(String src, String regx, int idx) throws Exception{ List<String> list = new ArrayList<String>(); try{ Pattern pattern = patternCompiler.compile(regx, Perl5Compiler.CASE_INSENSITIVE_MASK); PatternMatcher matcher = new Perl5Matcher(); PatternMatcherInput input = new PatternMatcherInput(src); while(matcher.contains(input, pattern)){ MatchResult matchResult = matcher.getMatch(); list.add(matchResult.group(idx)); } }catch(Exception e){ log.error("[提取异常][src:{}][reg:{}]", src, regx); e.printStackTrace(); throw e; } return list; }
Example #10
Source File: string.java From openbd-core with GNU General Public License v3.0 | 5 votes |
public static boolean regexMatches(String str, String re) throws MalformedPatternException { PatternMatcher matcher = new Perl5Matcher(); PatternCompiler compiler = new Perl5Compiler(); PatternMatcherInput input = new PatternMatcherInput(str); Pattern pattern = compiler.compile(re, Perl5Compiler.SINGLELINE_MASK); return matcher.matches(input, pattern); }
Example #11
Source File: OutlinkExtractor.java From anthelion with Apache License 2.0 | 4 votes |
/** * Extracts <code>Outlink</code> from given plain text and adds anchor * to the extracted <code>Outlink</code>s * * @param plainText the plain text from wich URLs should be extracted. * @param anchor the anchor of the url * * @return Array of <code>Outlink</code>s within found in plainText */ public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) { long start = System.currentTimeMillis(); final List<Outlink> outlinks = new ArrayList<Outlink>(); try { final PatternCompiler cp = new Perl5Compiler(); final Pattern pattern = cp.compile(URL_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK); final PatternMatcher matcher = new Perl5Matcher(); final PatternMatcherInput input = new PatternMatcherInput(plainText); MatchResult result; String url; //loop the matches while (matcher.contains(input, pattern)) { // if this is taking too long, stop matching // (SHOULD really check cpu time used so that heavily loaded systems // do not unnecessarily hit this limit.) if (System.currentTimeMillis() - start >= 60000L) { if (LOG.isWarnEnabled()) { LOG.warn("Time limit exceeded for getOutLinks"); } break; } result = matcher.getMatch(); url = result.group(0); try { outlinks.add(new Outlink(url, anchor)); } catch (MalformedURLException mue) { LOG.warn("Invalid url: '" + url + "', skipping."); } } } catch (Exception ex) { // if the matcher fails (perhaps a malformed URL) we just log it and move on if (LOG.isErrorEnabled()) { LOG.error("getOutlinks", ex); } } final Outlink[] retval; //create array of the Outlinks if (outlinks != null && outlinks.size() > 0) { retval = outlinks.toArray(new Outlink[0]); } else { retval = new Outlink[0]; } return retval; }
Example #12
Source File: cfCACHE.java From openbd-core with GNU General Public License v3.0 | 4 votes |
private void expireFiles( File directory, String expireURL, String virtualServer ) throws cfmRunTimeException { File[] listOfFiles = directory.listFiles(cfCacheFileFilter); boolean deleteAll = (expireURL.equals("*") || expireURL.length() == 0); //use of this var is the fix for NA bug #3308 boolean ignoreHost = (! deleteAll && expireURL.startsWith("*")); File thisFile; String firstline; Perl5Compiler compiler = new Perl5Compiler(); Perl5Matcher matcher = new Perl5Matcher(); Pattern pattern = null; if ( !deleteAll ){ try { if(!ignoreHost) { /* The string in the cache file is always <servername>/<uri>; we need to add in the servername and adjust the expireURL accordingly */ if ( expireURL.startsWith("/") ) expireURL = virtualServer + expireURL; else expireURL = virtualServer + "/" + expireURL; } pattern = compiler.compile( escapeExpireUrl( expireURL ) ); } catch (MalformedPatternException e) { throw new cfmRunTimeException( catchDataFactory.extendedException( "errorCode.runtimeError", "cfcache.expireUrl", new String[]{expireURL}, e.getMessage()) ); } } for ( int x=0; x < listOfFiles.length; x++ ){ thisFile = listOfFiles[x]; if ( deleteAll ) { boolean success = false; int tries = 0; for ( ; (tries < 10) && (!success); tries++ ) { if ( deleteCachedFile( thisFile ) ) success = true; } if ( !success ) { throw newRunTimeException( "Failed to delete cache file: " + thisFile ); } } else{ firstline = getURIFromFile( thisFile ); if ( firstline != null ){ if( pattern != null && matcher.contains( new PatternMatcherInput( firstline ), pattern ) ) deleteCachedFile( thisFile ); } } } }
Example #13
Source File: reMatch.java From openbd-core with GNU General Public License v3.0 | 4 votes |
public cfData execute(cfSession _session, cfArgStructData argStruct ) throws cfmRunTimeException { String regexp = getNamedStringParam(argStruct, "regular", ""); String strToSearch = getNamedStringParam(argStruct, "string", ""); boolean bUnique = getNamedBooleanParam(argStruct, "unique",false); HashSet<String> uniqueTrack = null; if ( bUnique ){ uniqueTrack = new HashSet<String>(); } /* Setup the RegEx */ PatternCompiler compiler = new Perl5Compiler(); Pattern pattern; try { if (caseSensitiveMatch) { pattern = compiler.compile(regexp, Perl5Compiler.SINGLELINE_MASK); } else { pattern = compiler.compile(regexp, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.SINGLELINE_MASK); } } catch (MalformedPatternException e) { cfCatchData catchD = new cfCatchData(); catchD.setType("Function"); catchD.setMessage("REMatch - invalid parameter"); catchD.setDetail("Invalid regular expression ( " + regexp + " )"); throw new cfmRunTimeException(catchD); } /* Perform the search */ cfArrayData array = cfArrayData.createArray(1); PatternMatcher matcher = new Perl5Matcher(); MatchResult result; PatternMatcherInput input = new PatternMatcherInput( strToSearch ); while ( matcher.contains(input, pattern) ) { result = matcher.getMatch(); String strResult = result.toString(); if ( bUnique ){ if ( !uniqueTrack.contains( strResult ) ){ array.addElement( new cfStringData( strResult ) ); uniqueTrack.add( strResult ); } }else array.addElement( new cfStringData( strResult ) ); } return array; }
Example #14
Source File: OutlinkExtractor.java From nutch-htmlunit with Apache License 2.0 | 4 votes |
/** * Extracts <code>Outlink</code> from given plain text and adds anchor * to the extracted <code>Outlink</code>s * * @param plainText the plain text from wich URLs should be extracted. * @param anchor the anchor of the url * * @return Array of <code>Outlink</code>s within found in plainText */ public static Outlink[] getOutlinks(final String plainText, String anchor, Configuration conf) { long start = System.currentTimeMillis(); final List<Outlink> outlinks = new ArrayList<Outlink>(); try { final PatternCompiler cp = new Perl5Compiler(); final Pattern pattern = cp.compile(URL_PATTERN, Perl5Compiler.CASE_INSENSITIVE_MASK | Perl5Compiler.READ_ONLY_MASK | Perl5Compiler.MULTILINE_MASK); final PatternMatcher matcher = new Perl5Matcher(); final PatternMatcherInput input = new PatternMatcherInput(plainText); MatchResult result; String url; //loop the matches while (matcher.contains(input, pattern)) { // if this is taking too long, stop matching // (SHOULD really check cpu time used so that heavily loaded systems // do not unnecessarily hit this limit.) if (System.currentTimeMillis() - start >= 60000L) { if (LOG.isWarnEnabled()) { LOG.warn("Time limit exceeded for getOutLinks"); } break; } result = matcher.getMatch(); url = result.group(0); try { outlinks.add(new Outlink(url, anchor)); } catch (MalformedURLException mue) { LOG.warn("Invalid url: '" + url + "', skipping."); } } } catch (Exception ex) { // if the matcher fails (perhaps a malformed URL) we just log it and move on if (LOG.isErrorEnabled()) { LOG.error("getOutlinks", ex); } } final Outlink[] retval; //create array of the Outlinks if (outlinks != null && outlinks.size() > 0) { retval = outlinks.toArray(new Outlink[0]); } else { retval = new Outlink[0]; } return retval; }