java.text.BreakIterator Java Examples
The following examples show how to use
java.text.BreakIterator.
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example #1
Source File: SelectionImpl.java From RichTextFX with BSD 2-Clause "Simplified" License | 6 votes |
@Override public void selectWord(int wordPositionInArea) { if(area.getLength() == 0) { return; } BreakIterator breakIterator = BreakIterator.getWordInstance( getArea().getLocale() ); breakIterator.setText(area.getText()); breakIterator.preceding(wordPositionInArea); breakIterator.next(); int wordStart = breakIterator.current(); breakIterator.following(wordPositionInArea); breakIterator.next(); int wordEnd = breakIterator.current(); selectRange(wordStart, wordEnd); }
Example #2
Source File: BreakIteratorTest.java From dragonwell8_jdk with GNU General Public License v2.0 | 6 votes |
private Vector testLastAndPrevious(BreakIterator bi, String text) { int p = bi.last(); int lastP = p; Vector<String> result = new Vector<String>(); if (p != text.length()) errln("last() returned " + p + " instead of " + text.length()); while (p != BreakIterator.DONE) { p = bi.previous(); if (p != BreakIterator.DONE) { if (p >= lastP) errln("previous() failed to move backward: previous() on position " + lastP + " yielded " + p); result.insertElementAt(text.substring(p, lastP), 0); } else { if (lastP != 0) errln("previous() returned DONE prematurely: offset was " + lastP + " instead of 0"); } lastP = p; } return result; }
Example #3
Source File: Chapter3.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 6 votes |
public static void usingBreakIterator() { Locale currentLocale = new Locale("en", "US"); BreakIterator sentenceIterator = BreakIterator.getSentenceInstance(); sentenceIterator.setText(paragraph); int boundary = sentenceIterator.first(); while (boundary != BreakIterator.DONE) { int begin = boundary; System.out.print(boundary + "-"); boundary = sentenceIterator.next(); int end = boundary; if (end == BreakIterator.DONE) { break; } System.out.println(boundary + " [" + paragraph.substring(begin, end) + "]"); } }
Example #4
Source File: BreakIteratorTest.java From dragonwell8_jdk with GNU General Public License v2.0 | 6 votes |
private void testPreceding(BreakIterator bi, String text, int[] boundaries) { logln("testPreceding():"); int p = 0; int i = 0; try { for (i = 0; i <= text.length(); i++) { // change to <= when new BI code goes in int b = bi.preceding(i); logln("bi.preceding(" + i + ") -> " + b); if (b != boundaries[p]) errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p] + ", got " + b); if (i == boundaries[p + 1]) ++p; } } catch (IllegalArgumentException illargExp) { errln("IllegalArgumentException caught from preceding() for offset: " + i); } }
Example #5
Source File: BreakIteratorTest.java From dragonwell8_jdk with GNU General Public License v2.0 | 6 votes |
private void testIsBoundary(BreakIterator bi, String text, int[] boundaries) { logln("testIsBoundary():"); int p = 1; boolean isB; for (int i = 0; i <= text.length(); i++) { // change to <= when new BI code goes in isB = bi.isBoundary(i); logln("bi.isBoundary(" + i + ") -> " + isB); if (i == boundaries[p]) { if (!isB) errln("Wrong result from isBoundary() for " + i + ": expected true, got false"); ++p; } else { if (isB) errln("Wrong result from isBoundary() for " + i + ": expected false, got true"); } } }
Example #6
Source File: TestSplittingBreakIterator.java From lucene-solr with Apache License 2.0 | 6 votes |
private void testBreakIterator(BreakIterator bi, String text, String boundaries) { bi.setText(text); //Test first & last testFirstAndLast(bi, text, boundaries); //Test if expected boundaries are consistent with reading them from next() in a loop: assertEquals(boundaries, readBoundariesToString(bi, text)); //Test following() and preceding(): // get each index, randomized in case their is a sequencing bug: List<Integer> indexes = randomIntsBetweenInclusive(text.length() + 1); testFollowing(bi, text, boundaries, indexes); testPreceding(bi, text, boundaries, indexes); //Test previous(): testPrevious(bi, text, boundaries); }
Example #7
Source File: V8BreakIterator.java From HtmlUnit-Android with Apache License 2.0 | 6 votes |
/** * Returns the type of the break. * @return {@code none}, {@code number}, {@code letter}, {@code kana}, {@code ideo} or {@code unknown} */ @JsxFunction public String breakType() { if (!typeAlwaysNone_) { final int current = current(); final int previous = breakIterator_.previous(); if (previous == BreakIterator.DONE) { first(); } else { next(); } if (current != BreakIterator.DONE && previous != BreakIterator.DONE) { final String token = text_.substring(previous, current); if (token.matches(".*[a-zA-Z]+.*")) { return "letter"; } if (token.matches("[0-9]+")) { return "number"; } } } return "none"; }
Example #8
Source File: GranularityIterator.java From talkback with Apache License 2.0 | 6 votes |
@Override public @Nullable int[] preceding(int offset) { final int textLegth = getIteratorText().length(); if (textLegth <= 0) { return null; } if (offset <= 0) { return null; } int end = offset; if (end > textLegth) { end = textLegth; } while (!breakIterator.isBoundary(end)) { end = breakIterator.preceding(end); if (end == BreakIterator.DONE) { return null; } } final int start = breakIterator.preceding(end); if (start == BreakIterator.DONE) { return null; } return getRange(start, end); }
Example #9
Source File: BreakIteratorTest.java From openjdk-jdk9 with GNU General Public License v2.0 | 6 votes |
private void testPreceding(BreakIterator bi, String text, int[] boundaries) { logln("testPreceding():"); int p = 0; int i = 0; try { for (i = 0; i <= text.length(); i++) { // change to <= when new BI code goes in int b = bi.preceding(i); logln("bi.preceding(" + i + ") -> " + b); if (b != boundaries[p]) errln("Wrong result from preceding() for " + i + ": expected " + boundaries[p] + ", got " + b); if (i == boundaries[p + 1]) ++p; } } catch (IllegalArgumentException illargExp) { errln("IllegalArgumentException caught from preceding() for offset: " + i); } }
Example #10
Source File: TextComponent.java From openjdk-8 with GNU General Public License v2.0 | 6 votes |
/** * Needed to unify forward and backward searching. * The method assumes that s is the text assigned to words. */ private int findWordLimit(int index, BreakIterator words, boolean direction, String s) { // Fix for 4256660 and 4256661. // Words iterator is different from character and sentence iterators // in that end of one word is not necessarily start of another word. // Please see java.text.BreakIterator JavaDoc. The code below is // based on nextWordStartAfter example from BreakIterator.java. int last = (direction == NEXT) ? words.following(index) : words.preceding(index); int current = (direction == NEXT) ? words.next() : words.previous(); while (current != BreakIterator.DONE) { for (int p = Math.min(last, current); p < Math.max(last, current); p++) { if (Character.isLetter(s.charAt(p))) { return last; } } last = current; current = (direction == NEXT) ? words.next() : words.previous(); } return BreakIterator.DONE; }
Example #11
Source File: SplitSentence.java From ignite-book-code-samples with GNU General Public License v3.0 | 6 votes |
@Override public void execute(Tuple tuple, BasicOutputCollector collector) { //Get the sentence content from the tuple String sentence = tuple.getString(0); //An iterator to get each word BreakIterator boundary=BreakIterator.getWordInstance(); //Give the iterator the sentence boundary.setText(sentence); //Find the beginning first word int start=boundary.first(); //Iterate over each word and emit it to the output stream for (int end = boundary.next(); end != BreakIterator.DONE; start=end, end=boundary.next()) { //get the word String word=sentence.substring(start,end); //If a word is whitespace characters, replace it with empty word=word.replaceAll("\\s+",""); //if it's an actual word, emit it if (!word.equals("")) { collector.emit(new Values(word)); } } }
Example #12
Source File: Bug4533872.java From dragonwell8_jdk with GNU General Public License v2.0 | 6 votes |
void TestNext() { iter = BreakIterator.getWordInstance(Locale.US); for (int i = 0; i < given.length; i++) { iter.setText(given[i]); start = iter.first(); int j = expected[i].length - 1; start = iter.next(j); end = iter.next(); if (!expected[i][j].equals(given[i].substring(start, end))) { errln("Word break failure: printEachForward() expected:<" + expected[i][j] + ">, got:<" + given[i].substring(start, end) + "> start=" + start + " end=" + end); } } }
Example #13
Source File: BreakIteratorTest.java From TencentKona-8 with GNU General Public License v2.0 | 5 votes |
public BreakIteratorTest() { characterBreak = BreakIterator.getCharacterInstance(); wordBreak = BreakIterator.getWordInstance(); lineBreak = BreakIterator.getLineInstance(); sentenceBreak = BreakIterator.getSentenceInstance(); }
Example #14
Source File: TestOpenNLPSentenceBreakIterator.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSliceMiddle() throws Exception { NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile); BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp); bi.setText(getCharArrayIterator(PADDING + SENTENCES[0] + PADDING, PADDING.length(), SENTENCES[0].length())); test1Sentence(bi, SENTENCES[0]); }
Example #15
Source File: Bug4533872.java From openjdk-jdk8u with GNU General Public License v2.0 | 5 votes |
void TestPrintAt_2() { iter = BreakIterator.getWordInstance(Locale.US); int[][] index = { {2, 9, 10, 15, 17}, {1, 9, 10, 13, 16, 18, 20}, {4, 9, 10, 13, 16, 18, 20}, {6, 7, 10, 11, 15}, }; for (int i = 0; i < given.length; i++) { iter.setText(given[i]); // Check preceding(0)'s return value - should equals BreakIterator.DONE. if (iter.preceding(0) != BreakIterator.DONE) { errln("Word break failure: printAt_2() expected:-1(BreakIterator.DONE), got:" + iter.preceding(0)); } for (int j = 0; j < index[i].length; j++) { start = iter.preceding(index[i][j]); end = iter.next(); if (!expected[i][j].equals(given[i].substring(start, end))) { errln("Word break failure: printAt_2() expected:<" + expected[i][j] + ">, got:<" + given[i].substring(start, end) + "> start=" + start + " end=" + end); } } // Check next()'s return value - should equals BreakIterator.DONE. end = iter.last(); start = iter.next(); if (start != BreakIterator.DONE) { errln("Word break failure: printAt_2() expected:-1(BreakIterator.DONE), got:" + start); } } }
Example #16
Source File: SubWordActions.java From Pydev with Eclipse Public License 1.0 | 5 votes |
@Override public void run() { // Check whether we are in a java code partition and the preference is enabled final IPreferenceStore store = getPreferenceStore(); if (store.getString(SubWordPreferences.WORD_NAVIGATION_STYLE) .equals(SubWordPreferences.WORD_NAVIGATION_STYLE_NATIVE)) { super.run(); return; } final ISourceViewer viewer = getSourceViewer(); final IDocument document = viewer.getDocument(); try { fIterator.setText((CharacterIterator) new DocumentCharacterIterator(document)); int position = widgetOffset2ModelOffset(viewer, viewer.getTextWidget().getCaretOffset()); if (position == -1) { return; } int next = findNextPosition(position); if (isBlockSelectionModeEnabled() && document.getLineOfOffset(next) != document.getLineOfOffset(position)) { super.run(); // may navigate into virtual white space } else if (next != BreakIterator.DONE) { setCaretPosition(next); getTextWidget().showSelection(); fireSelectionChanged(); } } catch (BadLocationException x) { // ignore } }
Example #17
Source File: BreakIteratorTest.java From jdk8u_jdk with GNU General Public License v2.0 | 5 votes |
private void generalIteratorTest(BreakIterator bi, Vector expectedResult) { StringBuffer buffer = new StringBuffer(); String text; for (int i = 0; i < expectedResult.size(); i++) { text = (String)expectedResult.elementAt(i); buffer.append(text); } text = buffer.toString(); bi.setText(text); Vector nextResults = testFirstAndNext(bi, text); Vector previousResults = testLastAndPrevious(bi, text); logln("comparing forward and backward..."); int errs = getErrorCount(); compareFragmentLists("forward iteration", "backward iteration", nextResults, previousResults); if (getErrorCount() == errs) { logln("comparing expected and actual..."); compareFragmentLists("expected result", "actual result", expectedResult, nextResults); } int[] boundaries = new int[expectedResult.size() + 3]; boundaries[0] = BreakIterator.DONE; boundaries[1] = 0; for (int i = 0; i < expectedResult.size(); i++) boundaries[i + 2] = boundaries[i + 1] + ((String)expectedResult.elementAt(i)). length(); boundaries[boundaries.length - 1] = BreakIterator.DONE; testFollowing(bi, text, boundaries); testPreceding(bi, text, boundaries); testIsBoundary(bi, text, boundaries); doMultipleSelectionTest(bi, text); }
Example #18
Source File: Bug4912404.java From jdk8u_jdk with GNU General Public License v2.0 | 5 votes |
public static void main(String[] args) { BreakIterator b = BreakIterator.getWordInstance(); b.setText("abc"); if (b.equals(null)) { throw new RuntimeException("BreakIterator.equals(null) should return false."); } }
Example #19
Source File: AccessibleHTML.java From jdk8u-jdk with GNU General Public License v2.0 | 5 votes |
/** * Returns the Segment at <code>index</code> representing either * the paragraph or sentence as identified by <code>part</code>, or * null if a valid paragraph/sentence can't be found. The offset * will point to the start of the word/sentence in the array, and * the modelOffset will point to the location of the word/sentence * in the model. */ private IndexedSegment getSegmentAt(int part, int index) throws BadLocationException { IndexedSegment seg = getParagraphElementText(index); if (seg == null) { return null; } BreakIterator iterator; switch (part) { case AccessibleText.WORD: iterator = BreakIterator.getWordInstance(getLocale()); break; case AccessibleText.SENTENCE: iterator = BreakIterator.getSentenceInstance(getLocale()); break; default: return null; } seg.first(); iterator.setText(seg); int end = iterator.following(index - seg.modelOffset + seg.offset); if (end == BreakIterator.DONE) { return null; } if (end > seg.offset + seg.count) { return null; } int begin = iterator.previous(); if (begin == BreakIterator.DONE || begin >= seg.offset + seg.count) { return null; } seg.modelOffset = seg.modelOffset + begin - seg.offset; seg.offset = begin; seg.count = end - begin; return seg; }
Example #20
Source File: ConditionalSpecialCasing.java From openjdk-jdk9 with GNU General Public License v2.0 | 5 votes |
/** * Implements the "Final_Cased" condition * * Specification: Within the closest word boundaries containing C, there is a cased * letter before C, and there is no cased letter after C. * * Regular Expression: * Before C: [{cased==true}][{wordBoundary!=true}]* * After C: !([{wordBoundary!=true}]*[{cased}]) */ private static boolean isFinalCased(String src, int index, Locale locale) { BreakIterator wordBoundary = BreakIterator.getWordInstance(locale); wordBoundary.setText(src); int ch; // Look for a preceding 'cased' letter for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i); i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (isCased(ch)) { int len = src.length(); // Check that there is no 'cased' letter after the index for (i = index + Character.charCount(src.codePointAt(index)); (i < len) && !wordBoundary.isBoundary(i); i += Character.charCount(ch)) { ch = src.codePointAt(i); if (isCased(ch)) { return false; } } return true; } } return false; }
Example #21
Source File: CommandExecutionUtils.java From APICloud-Studio with GNU General Public License v3.0 | 5 votes |
/** * Tries to find the word at the given offset. * * @param line * the line * @param offset * the offset * @return the word or <code>null</code> if none */ protected static IRegion findWordRegion(String line, int offset) { BreakIterator breakIter = BreakIterator.getWordInstance(); breakIter.setText(line); int start = breakIter.preceding(offset); if (start == BreakIterator.DONE) start = 0; int end = breakIter.following(offset); if (end == BreakIterator.DONE) end = line.length(); if (breakIter.isBoundary(offset)) { if (end - offset > offset - start) { start = offset; } else { end = offset; } } if (end == start) { return new Region(start, 0); } return new Region(start, end - start); }
Example #22
Source File: Bug4533872.java From openjdk-jdk8u with GNU General Public License v2.0 | 5 votes |
void TestPrintEachBackward() { iter = BreakIterator.getWordInstance(Locale.US); for (int i = 0; i < given.length; i++) { iter.setText(given[i]); end = iter.last(); // Check current()'s return value - should be same as last()'s. current = iter.current(); if (end != current) { errln("Word break failure: printEachBackward() Unexpected current value: current()=" + current + ", expected(=last())=" + end); } int j; for (start = iter.previous(), j = expected[i].length-1; start != BreakIterator.DONE; end = start, start = iter.previous(), j--) { // Check current()'s return value - should be same as previous()'s. current = iter.current(); if (start != current) { errln("Word break failure: printEachBackward() Unexpected current value: current()=" + current + ", expected(=previous())=" + start); } if (!expected[i][j].equals(given[i].substring(start, end))) { errln("Word break failure: printEachBackward() expected:<" + expected[i][j] + ">, got:<" + given[i].substring(start, end) + "> start=" + start + " end=" + end); } } } }
Example #23
Source File: BreakIteratorProviderImpl.java From dragonwell8_jdk with GNU General Public License v2.0 | 5 votes |
private BreakIterator getBreakInstance(Locale locale, int type, String dataName, String dictionaryName) { if (locale == null) { throw new NullPointerException(); } LocaleResources lr = LocaleProviderAdapter.forJRE().getLocaleResources(locale); String[] classNames = (String[]) lr.getBreakIteratorInfo("BreakIteratorClasses"); String dataFile = (String) lr.getBreakIteratorInfo(dataName); try { switch (classNames[type]) { case "RuleBasedBreakIterator": return new RuleBasedBreakIterator(dataFile); case "DictionaryBasedBreakIterator": String dictionaryFile = (String) lr.getBreakIteratorInfo(dictionaryName); return new DictionaryBasedBreakIterator(dataFile, dictionaryFile); default: throw new IllegalArgumentException("Invalid break iterator class \"" + classNames[type] + "\""); } } catch (IOException | MissingResourceException | IllegalArgumentException e) { throw new InternalError(e.toString(), e); } }
Example #24
Source File: DocLocale.java From TencentKona-8 with GNU General Public License v2.0 | 5 votes |
/** * Constructor */ DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) { this.docenv = docenv; this.localeName = localeName; this.useBreakIterator = useBreakIterator; locale = getLocale(); if (locale == null) { docenv.exit(); } else { Locale.setDefault(locale); // NOTE: updating global state } collator = Collator.getInstance(locale); sentenceBreaker = BreakIterator.getSentenceInstance(locale); }
Example #25
Source File: DocSplitterFallbackImpl.java From relex with Apache License 2.0 | 5 votes |
public DocSplitterFallbackImpl() { buffer = ""; bdry = BreakIterator.getSentenceInstance(Locale.US); bdry.setText(""); start = bdry.first(); }
Example #26
Source File: CustomPostingsHighlighter.java From Elasticsearch with Apache License 2.0 | 5 votes |
@Override protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) { if (returnNonHighlightedSnippets) { //we want to return the first sentence of the first snippet only return super.getEmptyHighlight(fieldName, bi, 1); } return EMPTY_PASSAGE; }
Example #27
Source File: DocLocale.java From openjdk-8-source with GNU General Public License v2.0 | 5 votes |
/** * Constructor */ DocLocale(DocEnv docenv, String localeName, boolean useBreakIterator) { this.docenv = docenv; this.localeName = localeName; this.useBreakIterator = useBreakIterator; locale = getLocale(); if (locale == null) { docenv.exit(); } else { Locale.setDefault(locale); // NOTE: updating global state } collator = Collator.getInstance(locale); sentenceBreaker = BreakIterator.getSentenceInstance(locale); }
Example #28
Source File: AccessibleHTML.java From jdk8u-jdk with GNU General Public License v2.0 | 5 votes |
/** * Returns the Segment at <code>index</code> representing either * the paragraph or sentence as identified by <code>part</code>, or * null if a valid paragraph/sentence can't be found. The offset * will point to the start of the word/sentence in the array, and * the modelOffset will point to the location of the word/sentence * in the model. */ private IndexedSegment getSegmentAt(int part, int index) throws BadLocationException { IndexedSegment seg = getParagraphElementText(index); if (seg == null) { return null; } BreakIterator iterator; switch (part) { case AccessibleText.WORD: iterator = BreakIterator.getWordInstance(getLocale()); break; case AccessibleText.SENTENCE: iterator = BreakIterator.getSentenceInstance(getLocale()); break; default: return null; } seg.first(); iterator.setText(seg); int end = iterator.following(index - seg.modelOffset + seg.offset); if (end == BreakIterator.DONE) { return null; } if (end > seg.offset + seg.count) { return null; } int begin = iterator.previous(); if (begin == BreakIterator.DONE || begin >= seg.offset + seg.count) { return null; } seg.modelOffset = seg.modelOffset + begin - seg.offset; seg.offset = begin; seg.count = end - begin; return seg; }
Example #29
Source File: RuleBasedBreakIterator.java From jdk8u-dev-jdk with GNU General Public License v2.0 | 5 votes |
/** * Sets the iterator to refer to the first boundary position following * the specified position. * @offset The position from which to begin searching for a break position. * @return The position of the first break after the current position. */ @Override public int following(int offset) { CharacterIterator text = getText(); checkOffset(offset, text); // Set our internal iteration position (temporarily) // to the position passed in. If this is the _beginning_ position, // then we can just use next() to get our return value text.setIndex(offset); if (offset == text.getBeginIndex()) { cachedLastKnownBreak = handleNext(); return cachedLastKnownBreak; } // otherwise, we have to sync up first. Use handlePrevious() to back // us up to a known break position before the specified position (if // we can determine that the specified position is a break position, // we don't back up at all). This may or may not be the last break // position at or before our starting position. Advance forward // from here until we've passed the starting position. The position // we stop on will be the first break position after the specified one. int result = cachedLastKnownBreak; if (result >= offset || result <= BreakIterator.DONE) { result = handlePrevious(); } else { //it might be better to check if handlePrevious() give us closer //safe value but handlePrevious() is slow too //So, this has to be done carefully text.setIndex(result); } while (result != BreakIterator.DONE && result <= offset) { result = handleNext(); } cachedLastKnownBreak = result; return result; }
Example #30
Source File: RuleBasedBreakIterator.java From openjdk-jdk8u with GNU General Public License v2.0 | 5 votes |
/** * Set the iterator to analyze a new piece of text. This function resets * the current iteration position to the beginning of the text. * @param newText An iterator over the text to analyze. */ @Override public void setText(CharacterIterator newText) { // Test iterator to see if we need to wrap it in a SafeCharIterator. // The correct behavior for CharacterIterators is to allow the // position to be set to the endpoint of the iterator. Many // CharacterIterators do not uphold this, so this is a workaround // to permit them to use this class. int end = newText.getEndIndex(); boolean goodIterator; try { newText.setIndex(end); // some buggy iterators throw an exception here goodIterator = newText.getIndex() == end; } catch(IllegalArgumentException e) { goodIterator = false; } if (goodIterator) { text = newText; } else { text = new SafeCharIterator(newText); } text.first(); cachedLastKnownBreak = BreakIterator.DONE; }