java.util.regex.MatchResult#start

Source File: HeidelTimeOpenNLP.java From newsleak with GNU Affero General Public License v3.0

6 votes

/**
 * Check whether the part of speech constraint defined in a rule is satisfied.
 * 
 * @param s
 * @param posConstraint
 * @param m
 * @param jcas
 * @return
 */
public boolean checkPosConstraint(Sentence s, String posConstraint, MatchResult m, JCas jcas) {
	Pattern paConstraint = Pattern.compile("group\\(([0-9]+)\\):(.*?):");
	for (MatchResult mr : Toolbox.findMatches(paConstraint, posConstraint)) {
		int groupNumber = Integer.parseInt(mr.group(1));
		int tokenBegin = s.getBegin() + m.start(groupNumber);
		int tokenEnd = s.getBegin() + m.end(groupNumber);
		String pos = mr.group(2);
		String pos_as_is = getPosFromMatchResult(tokenBegin, tokenEnd, s, jcas);
		if (pos_as_is.matches(pos)) {
			Logger.printDetail("POS CONSTRAINT IS VALID: pos should be " + pos + " and is " + pos_as_is);
		} else {
			return false;
		}
	}
	return true;
}

Source File: CallbackMatcher.java From EDDI with Apache License 2.0

6 votes

public String replaceMatches(CharSequence charSequence, Callback callback) throws CallbackMatcherException {
    StringBuilder result = new StringBuilder(charSequence);
    final Matcher matcher = this.pattern.matcher(charSequence);
    int offset = 0;

    while (matcher.find()) {
        final MatchResult matchResult = matcher.toMatchResult();
        final String replacement = callback.foundMatch(matchResult);
        if (replacement == null) {
            continue;
        }

        int matchStart = offset + matchResult.start();
        int matchEnd = offset + matchResult.end();

        result.replace(matchStart, matchEnd, replacement);

        int matchLength = matchResult.end() - matchResult.start();
        int lengthChange = replacement.length() - matchLength;

        offset += lengthChange;
    }

    return result.toString();
}

Source File: FormatterPreviewUtils.java From APICloud-Studio with GNU General Public License v3.0

6 votes

/**
 * Do a content substitution by looking at the array size and looking for {0}...{n} strings and replace them with
 * the array's content.<br>
 * (Note - we use this method and not the NLS.bind() because it does not handle well code blocks existence)
 * 
 * @param content
 * @param substitutions
 * @return A string, substituted with the array's content.
 */
private static String substitute(String content, String[] substitutions)
{
	StringBuilder buffer = new StringBuilder(content);
	Matcher matcher = SUBSTITUTION_PATTERN.matcher(content);
	int offset = 0;
	while (matcher.find())
	{
		MatchResult matchResult = matcher.toMatchResult();
		int beginIndex = matchResult.start();
		int endIndex = matchResult.end();
		int index = Integer.parseInt(content.substring(beginIndex + 1, endIndex - 1));
		if (index >= 0 && index < substitutions.length)
		{
			String replacement = substitutions[index];
			int matchLength = endIndex - beginIndex;
			buffer.replace(offset + beginIndex, offset + endIndex, replacement);
			offset += (replacement.length() - matchLength);
		}
	}
	return buffer.toString();
}

Source File: RegExpPrototype.java From es6draft with MIT License

6 votes

private static Object RegExpSearch(ExecutionContext cx, RegExpObject rx, String s) {
    // Directly throw TypeErrors instead of saving and restoring the "lastIndex" property.
    Object previousLastIndex = rx.getLastIndex().getValue();
    boolean lastIndexIsZero = SameValue(previousLastIndex, 0);
    if (!lastIndexIsZero) {
        RegExpThrowIfLastIndexNonWritable(cx, rx);
    }
    /* steps 1-3 (not applicable) */
    /* steps 4-7 */
    boolean sticky = rx.isSet(RegExpObject.Flags.Sticky);
    boolean global = rx.isSet(RegExpObject.Flags.Global);
    MatchResult result = matchResultOrNull(cx, rx, s, 0, sticky, true);
    if (lastIndexIsZero && (global || sticky)) {
        // Emulate the lastIndex update from RegExpBuiltinExec.
        RegExpThrowIfLastIndexNonWritable(cx, rx);
    }
    /* step 8 */
    if (result == null) {
        return -1;
    }
    /* step 9 */
    return result.start();
}

Source File: FindReplaceDialog.java From nextreports-designer with Apache License 2.0

6 votes

/**
 * Search from same startIndex as the previous search. 
 * Checks if the match is different from the last (either 
 * extended/reduced) at the same position. Returns true
 * if the current match result represents a different match 
 * than the last, false if no match or the same.
 */
private boolean foundExtendedMatch(Pattern pattern, int start) {
    if (pattern.pattern().equals(lastRegex)) {
        return false;
    }
    
    int length = target.getDocument().getLength() - start;
    try {
        target.getDocument().getText(start, length, segment);
    } catch (BadLocationException e) {
    	e.printStackTrace();
    }
    
    Matcher matcher = pattern.matcher(segment.toString());
    MatchResult matchResult = getMatchResult(matcher, true);
    if (matchResult != null) {
        if ((matchResult.start() == 0) &&  (!lastMatchResult.group().equals(matchResult.group()))) {
            updateStateAfterFound(matchResult, start);
            return true;
        } 
    }
    
    return false;
}

Source File: DustAnnotator.java From Intellij-Dust with MIT License

6 votes

@Override
public void annotate(@NotNull final PsiElement element, @NotNull AnnotationHolder holder) {
  if (element instanceof DustOpenTag) {
    DustOpenTag openTag = (DustOpenTag) element;
    checkMatchingCloseTag(openTag, holder);
  }

  if (element.getNode().getElementType() == DustTypes.COMMENT) {
    String commentStr = element.getText();

    if (commentStr.length() >= 8) {
      commentStr = commentStr.substring(0, commentStr.length() - 2);
      Pattern p = Pattern.compile("TODO[^\n]*");
      Matcher m = p.matcher(commentStr);

      int startOffset = element.getTextRange().getStartOffset();
      while (m.find()) {
        MatchResult mr = m.toMatchResult();
        TextRange tr = new TextRange(startOffset + mr.start(), startOffset + mr.end());
        holder.createInfoAnnotation(tr, null).setTextAttributes(DustSyntaxHighlighter.TODO);
      }
    }
  }
}

Source File: ElasticGazetteerService.java From CogStack-Pipeline with Apache License 2.0

5 votes

private String replaceStrings(List<MatchResult> results, String document) {
    StringBuffer sb = new StringBuffer(document);
    for(MatchResult m : results) {
        int startOffset =m.start();
        int endOffset = m.end();
        StringBuffer outputBuffer = new StringBuffer();
        for (int i = 0; i < (endOffset - startOffset); i++) {
            outputBuffer.append("X");
        }
        sb.replace(startOffset, endOffset, outputBuffer.toString());
    }
    return sb.toString();
}

Source File: CommentScanner.java From ipst with Mozilla Public License 2.0

5 votes

int skipStart(String line, Matcher quote, Matcher blockCommentEnd) {
    MatchResult m = null;
    if (isInsideString) {
        m = find(quote, 0);
    } else if (isInsideBlockComment) {
        m = find(blockCommentEnd, 0);
    }

    int p = 0;
    if (m != null) {
        // If we were inside block comment emit a block comment, remove the flag
        if (isInsideBlockComment) {
            if (m.start() > 0) {
                eventHandler.onBlockComment(line.substring(0, m.start()), false, true);
            }
            isInsideBlockComment = false;
        } else if (isInsideString) {
            // If we were inside string emit a quoted string, remove the flag
            if (m.start() > 0) {
                eventHandler.onQuoted(line.substring(0, quoteStart(m)), false, true);
            }
            isInsideString = false;
        }
        p = m.end();
    }
    return p;
}

Source File: HeidelTimeOpenNLP.java From newsleak with GNU Affero General Public License v3.0

5 votes

/**
 * Check token boundaries of expressions.
 * 
 * @param r
 *            MatchResult
 * @param s
 *            Respective sentence
 * @return whether or not the MatchResult is a clean one
 */
public static Boolean checkInfrontBehind(MatchResult r, Sentence s) {
	Boolean ok = true;

	// get rid of expressions such as "1999" in 53453.1999
	if (r.start() > 1) {
		if ((s.getCoveredText().substring(r.start() - 2, r.start()).matches("\\d\\."))) {
			ok = false;
		}
	}

	// get rid of expressions if there is a character or symbol ($+)
	// directly in front of the expression
	if (r.start() > 0) {
		if (((s.getCoveredText().substring(r.start() - 1, r.start()).matches("[\\w\\$\\+]")))
				&& (!(s.getCoveredText().substring(r.start() - 1, r.start()).matches("\\(")))) {
			ok = false;
		}
	}

	if (r.end() < s.getCoveredText().length()) {
		if ((s.getCoveredText().substring(r.end(), r.end() + 1).matches("[°\\w]"))
				&& (!(s.getCoveredText().substring(r.end(), r.end() + 1).matches("\\)")))) {
			ok = false;
		}
		if (r.end() + 1 < s.getCoveredText().length()) {
			if (s.getCoveredText().substring(r.end(), r.end() + 2).matches("[\\.,]\\d")) {
				ok = false;
			}
		}
	}
	return ok;
}

Source File: RegExpPrototype.java From es6draft with MIT License

5 votes

/**
 * 21.2.5.9 RegExp.prototype[ @@search ] ( string )
 * 
 * @param cx
 *            the execution context
 * @param thisValue
 *            the function this-value
 * @param string
 *            the string
 * @return the string index of the first match
 */
@Function(name = "[Symbol.search]", symbol = BuiltinSymbol.search, arity = 1)
public static Object search(ExecutionContext cx, Object thisValue, Object string) {
    /* step 2 */
    if (!Type.isObject(thisValue)) {
        throw newTypeError(cx, Messages.Key.NotObjectType);
    }
    /* step 1 */
    ScriptObject rx = Type.objectValue(thisValue);
    /* step 3 */
    String s = ToFlatString(cx, string);
    /* steps 4-9 (fast path) */
    if (isDefaultRegExpObjectForExec(cx, rx)) {
        return RegExpSearch(cx, (RegExpObject) rx, s);
    }
    /* step 4 */
    Object previousLastIndex = Get(cx, rx, "lastIndex");
    /* step 5 */
    if (!SameValue(previousLastIndex, 0)) {
        Set(cx, rx, "lastIndex", 0, true);
    }
    /* step 6 */
    MatchResult result = matchResultOrNull(cx, rx, s, true);
    /* step 7 */
    Object currentLastIndex = Get(cx, rx, "lastIndex");
    if (!SameValue(currentLastIndex, previousLastIndex)) {
        Set(cx, rx, "lastIndex", previousLastIndex, true);
    }
    /* step 8 */
    if (result == null) {
        return -1;
    }
    /* step 9 */
    if (result instanceof ScriptObjectMatchResult) {
        // Extract wrapped script object to ensure no ToInteger conversion takes place
        ScriptObject object = ((ScriptObjectMatchResult) result).object;
        return Get(cx, object, "index");
    }
    return result.start();
}

Source File: FindReplaceDialog.java From nextreports-designer with Apache License 2.0

5 votes

private int updateStateAfterFound(MatchResult matchResult, int offset) {
    int end = matchResult.end() + offset;
    int found = matchResult.start() + offset; 
    
    target.select(found, end);
    target.getCaret().setSelectionVisible(true);
    
    // update state variables
    lastFoundIndex = found;
    lastMatchResult = matchResult;
    lastRegex = ((Matcher) lastMatchResult).pattern().pattern();
    
    return found;
}

Source File: StreamRegexMatcher.java From windup with Eclipse Public License 1.0

5 votes

@Override
public MatchProcessorResult process(StringBuilder characterBuffer, int firstModifiableCharacterInBuffer, MatchResult matchResult)
{
    long unmatchedStartLine = lineColumnAwareModificationFactory.getCurrentLine();
    long unmatchedStartColumn = lineColumnAwareModificationFactory.getCurrentColumn();
    int unmatchedStart = firstCharIndex;
    int unmatchedEnd = matchResult.start();
    String unmatched = characterBuffer.substring(unmatchedStart, unmatchedEnd);

    Matcher matcher = Pattern.compile("\r\n|\r|\n").matcher(unmatched);
    int numLines = 0;
    int endOfLastLineBreak = 0;
    while (matcher.find())
    {
        numLines++;
        endOfLastLineBreak = matcher.end();
    }
    long lineNumber = unmatchedStartLine + numLines;
    long columnNumber;
    if (numLines == 0)
    {
        columnNumber = unmatchedStartColumn + unmatched.length();
    }
    else
    {
        columnNumber = unmatched.length() - endOfLastLineBreak; // length of last line in 'unmatched'
    }

    String matchText = matchResult.group();
    StreamRegexMatchedEvent event = new StreamRegexMatchedEvent(matchText, lineNumber, columnNumber);
    listener.regexMatched(event);
    return super.process(characterBuffer, firstModifiableCharacterInBuffer, matchResult);
}

Source File: CommentScanner.java From ipst with Mozilla Public License 2.0

4 votes

void scan(String line) {
    // We will allow comment characters inside a constant string
    // We will also quoted strings inside a comment
    // We will emit block comments that span multiple lines as a sequence of comment blocks, one for every line
    // Same for multiple line quoted strings

    Matcher quote = quoteRegex.matcher(line);
    Matcher lineComment = lineCommentRegex.matcher(line);
    Matcher blockCommentStart = blockCommentStartRegex.matcher(line);
    Matcher blockCommentEnd = blockCommentEndRegex.matcher(line);

    // Look for the line starting point, taking into account special situations where
    // we are inside a string, line really begins at first occurrence of quote character (if it occurs in the line)
    // we are inside a block comment, line really begins at first occurrence of block comment end (if it occurs in the line)
    int p = skipStart(line, quote, blockCommentEnd);

    // If after skipping the start we continue inside a block comment emit whole line as a block comment and finish
    // Similar if we are still (totally) inside a string, emit whole line as a string and finish scan
    if (isInsideBlockComment) {
        eventHandler.onBlockComment(line, false, false);
        return;
    }
    if (isInsideString) {
        eventHandler.onQuoted(line, false, false);
        return;
    }

    // Go across the line
    while (p < line.length()) {
        MatchResult q;
        MatchResult bcs;
        MatchResult lc;

        // From current position find first of occurrence of {string start, block comment start, line comment start}
        q = find(quote, p);
        bcs = find(blockCommentStart, p);
        lc = find(lineComment, p);

        // Check what comes first
        boolean isQuote = q != null && (bcs == null || q.start() < bcs.start()) && (lc == null || q.start() < lc.start());
        boolean isBlockComment = bcs != null && (q == null || bcs.start() < q.start()) && (lc == null || bcs.start() < lc.start());
        boolean isLineComment = lc != null && (q == null || lc.start() < q.start()) && (bcs == null || lc.start() < bcs.start());

        if (isQuote) {
            // Emit as text from last pointer to here
            if (q.start() > 0) {
                eventHandler.onText(line.substring(p, quoteStart(q)));
            }
            p = skipQuoted(line, quote, q);
        } else if (isBlockComment) {
            // Emit as text from last pointer to here
            if (bcs.start() > 0) {
                eventHandler.onText(line.substring(p, bcs.start()));
            }
            p = skipBlockComment(line, blockCommentEnd, bcs);
        } else if (isLineComment) {
            if (lc.start() > 0) {
                eventHandler.onText(line.substring(p, lc.start()));
            }
            if (lc.end() < line.length()) {
                eventHandler.onLineComment(line.substring(lc.end()));
            }
            break;
        } else {
            // Emit the rest of the line as text and finish
            eventHandler.onText(line.substring(p));
            break;
        }
    }
}

Source File: CommentScanner.java From ipst with Mozilla Public License 2.0

4 votes

int quoteStart(MatchResult q) {
    // If quote match start is greater than zero it must have been matched by a non-quote + quote
    // The position of the quote character is adjusted
    return q.start() > 0 ? q.start() + 1 : 0;
}

Source File: HeidelTimeOpenNLP.java From newsleak with GNU Affero General Public License v3.0

4 votes

/**
 * Check token boundaries using token information
 * 
 * @param r
 *            MatchResult
 * @param s
 *            respective Sentence
 * @param jcas
 *            current CAS object
 * @return whether or not the MatchResult is a clean one
 */
public static Boolean checkTokenBoundaries(MatchResult r, Sentence s, JCas jcas) {
	Boolean beginOK = false;
	Boolean endOK = false;

	// whole expression is marked as a sentence
	if ((r.end() - r.start()) == (s.getEnd() - s.getBegin())) {
		return true;
	}

	// Only check Token boundaries if no white-spaces in front of and behind
	// the match-result
	if ((r.start() > 0) && ((s.getCoveredText().subSequence(r.start() - 1, r.start()).equals(" ")))
			&& ((r.end() < s.getCoveredText().length())
					&& ((s.getCoveredText().subSequence(r.end(), r.end() + 1).equals(" "))))) {
		return true;
	}

	// other token boundaries than white-spaces
	else {
		FSIterator iterToken = jcas.getAnnotationIndex(Token.type).subiterator(s);
		while (iterToken.hasNext()) {
			Token t = (Token) iterToken.next();

			// Check begin
			if ((r.start() + s.getBegin()) == t.getBegin()) {
				beginOK = true;
			}
			// Tokenizer does not split number from some symbols (".", "/",
			// "-", "–"),
			// e.g., "...12 August-24 Augsut..."
			else if ((r.start() > 0) && ((s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("."))
					|| (s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("/"))
					|| (s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("–"))
					|| (s.getCoveredText().subSequence(r.start() - 1, r.start()).equals("-")))) {
				beginOK = true;
			}

			// Check end
			if ((r.end() + s.getBegin()) == t.getEnd()) {
				endOK = true;
			}
			// Tokenizer does not split number from some symbols (".", "/",
			// "-", "–"),
			// e.g., "... in 1990. New Sentence ..."
			else if ((r.end() < s.getCoveredText().length())
					&& ((s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("."))
							|| (s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("/"))
							|| (s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("–"))
							|| (s.getCoveredText().subSequence(r.end(), r.end() + 1).equals("-")))) {
				endOK = true;
			}

			if (beginOK && endOK)
				return true;
		}
	}
	return false;
}

Source File: PercentToBraceConverter.java From Pydev with Eclipse Public License 1.0

4 votes

/**
 * <p>Create a new {@link #PercentConversion} instance.</p>
 * 
 * <p>A <tt>PercentConversion</tt> instance is created from one
 * particular specifier match result and is fixed after creation.</p>
 * 
 * This is because for some format strings, it is expected that 
 * multiple <tt>PercentConversions</tt> will be needed to fully convert
 * the format string and each <tt>PercentConversion</tt> should represent
 * one specifier and one specifier only in the format string.
 * 
 * @param aConverter - the enclosing {@link #PercentToBraceConverter} instance
 * @param aMatch - a specific {@link java.util.regex#MatchResult MatchResult} that holds 
 *                 information about the matched specifier token.
 * @throws IllegalArgumentException
 *          if <tt>aConverter</tt> or <tt>aMatch</tt> is <tt>null</tt>
 *          
 * @throws IllegalStateException 
 *          if <tt>aMatch</tt> is passed before a successful match could be made
 *          it is said to have inconsistent state.
 */
public PercentConversion(PercentToBraceConverter aConverter, MatchResult aMatch)
        throws IllegalArgumentException, IllegalStateException {

    if (null == aConverter) {
        throw new IllegalArgumentException("Converter can't be null!");
    }
    if (null == aMatch) {
        throw new IllegalArgumentException("Match can't be null!");
    }

    source = aMatch.group(0);
    span = new int[] { aMatch.start(), aMatch.end() };

    final Map<String, String> groups = extractTokenGroups(aMatch);

    String spec = groups.get("Key");
    if (null == spec) {
        if ("%%".equals(source)) {
            key = "";
        } else {
            key = aConverter.nextIndex();
        }
    } else {
        key = spec;
    }

    spec = groups.get("Width");
    if (null != spec && "*".equals(spec)) {
        // TODO: {} representation is hard-wired, could generalize this if needed
        width = String.format("{%s}", aConverter.nextIndex());
    } else {
        width = spec;
    }

    spec = groups.get("Precision");
    if (null != spec && "*".equals(spec)) {
        precision = String.format("{%s}", aConverter.nextIndex());
    } else {
        precision = spec;
    }

    flags = groups.get("Flags");
    conversion = groups.get("Conversion");
}

Java Code Examples for java.util.regex.MatchResult#start()