Java Code Examples for java.text.BreakIterator#setText()
The following examples show how to use
java.text.BreakIterator#setText() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: MyFormatter.java From triplea with GNU General Public License v3.0 | 6 votes |
/** * Adds HTML line breaks and indentation to a string so it wraps for things like long tooltips. * * <pre> * string part 1 * string part 2 * ... * string part X * </pre> */ public static String addHtmlBreaksAndIndents( final String target, final int firstLineMaxLength, final int maxLength) { final StringBuilder sb = new StringBuilder(); final BreakIterator breakIterator = BreakIterator.getLineInstance(); breakIterator.setText(target); int start = breakIterator.first(); int end = breakIterator.next(); int lineLength = 0; int currentMaxLength = firstLineMaxLength; while (end != BreakIterator.DONE) { final String word = target.substring(start, end); lineLength = lineLength + word.length(); if (lineLength >= currentMaxLength) { sb.append("<br /> "); lineLength = word.length() + 5; // Add 5 for the indent currentMaxLength = maxLength; } sb.append(word); start = end; end = breakIterator.next(); } return sb.toString(); }
Example 2
Source File: Chapter2.java From Natural-Language-Processing-with-Java-Second-Edition with MIT License | 6 votes |
private static void usingTheBreakIterator() { Locale currentLocale = new Locale("en", "US"); BreakIterator wordIterator = BreakIterator.getWordInstance(); String text = "Let's pause, and then reflect."; wordIterator.setText(text); int boundary = wordIterator.first(); while (boundary != BreakIterator.DONE) { int begin = boundary; System.out.print(boundary + "-"); boundary = wordIterator.next(); int end = boundary; if(end == BreakIterator.DONE) break; System.out.println(boundary + " [" + text.substring(begin, end) + "]"); } }
Example 3
Source File: BreakIteratorTest.java From dragonwell8_jdk with GNU General Public License v2.0 | 6 votes |
public void TestBug4153072() { BreakIterator iter = BreakIterator.getWordInstance(); String str = "...Hello, World!..."; int begin = 3; int end = str.length() - 3; boolean gotException = false; boolean dummy; iter.setText(new StringCharacterIterator(str, begin, end, begin)); for (int index = -1; index < begin + 1; ++index) { try { dummy = iter.isBoundary(index); if (index < begin) errln("Didn't get exception with offset = " + index + " and begin index = " + begin); } catch (IllegalArgumentException e) { if (index >= begin) errln("Got exception with offset = " + index + " and begin index = " + begin); } } }
Example 4
Source File: BreakIteratorTest.java From openjdk-jdk8u with GNU General Public License v2.0 | 6 votes |
public void TestBug4153072() { BreakIterator iter = BreakIterator.getWordInstance(); String str = "...Hello, World!..."; int begin = 3; int end = str.length() - 3; boolean gotException = false; boolean dummy; iter.setText(new StringCharacterIterator(str, begin, end, begin)); for (int index = -1; index < begin + 1; ++index) { try { dummy = iter.isBoundary(index); if (index < begin) errln("Didn't get exception with offset = " + index + " and begin index = " + begin); } catch (IllegalArgumentException e) { if (index >= begin) errln("Got exception with offset = " + index + " and begin index = " + begin); } } }
Example 5
Source File: CapitalizeWordsInSentence.java From levelup-java-examples with Apache License 2.0 | 5 votes |
private static int nextWordStartAfter(int pos, String text) { BreakIterator wb = BreakIterator.getWordInstance(); wb.setText(text); int last = wb.following(pos); int current = wb.next(); while (current != BreakIterator.DONE) { for (int p = last; p < current; p++) { if (Character.isLetter(text.codePointAt(p))) return last; } last = current; current = wb.next(); } return BreakIterator.DONE; }
Example 6
Source File: AccessibleHTML.java From openjdk-8 with GNU General Public License v2.0 | 5 votes |
/** * Returns the Segment at <code>index</code> representing either * the paragraph or sentence as identified by <code>part</code>, or * null if a valid paragraph/sentence can't be found. The offset * will point to the start of the word/sentence in the array, and * the modelOffset will point to the location of the word/sentence * in the model. */ private IndexedSegment getSegmentAt(int part, int index) throws BadLocationException { IndexedSegment seg = getParagraphElementText(index); if (seg == null) { return null; } BreakIterator iterator; switch (part) { case AccessibleText.WORD: iterator = BreakIterator.getWordInstance(getLocale()); break; case AccessibleText.SENTENCE: iterator = BreakIterator.getSentenceInstance(getLocale()); break; default: return null; } seg.first(); iterator.setText(seg); int end = iterator.following(index - seg.modelOffset + seg.offset); if (end == BreakIterator.DONE) { return null; } if (end > seg.offset + seg.count) { return null; } int begin = iterator.previous(); if (begin == BreakIterator.DONE || begin >= seg.offset + seg.count) { return null; } seg.modelOffset = seg.modelOffset + begin - seg.offset; seg.offset = begin; seg.count = end - begin; return seg; }
Example 7
Source File: AccessibleHTML.java From jdk8u-dev-jdk with GNU General Public License v2.0 | 5 votes |
/** * Returns the Segment at <code>index</code> representing either * the paragraph or sentence as identified by <code>part</code>, or * null if a valid paragraph/sentence can't be found. The offset * will point to the start of the word/sentence in the array, and * the modelOffset will point to the location of the word/sentence * in the model. */ private IndexedSegment getSegmentAt(int part, int index) throws BadLocationException { IndexedSegment seg = getParagraphElementText(index); if (seg == null) { return null; } BreakIterator iterator; switch (part) { case AccessibleText.WORD: iterator = BreakIterator.getWordInstance(getLocale()); break; case AccessibleText.SENTENCE: iterator = BreakIterator.getSentenceInstance(getLocale()); break; default: return null; } seg.first(); iterator.setText(seg); int end = iterator.following(index - seg.modelOffset + seg.offset); if (end == BreakIterator.DONE) { return null; } if (end > seg.offset + seg.count) { return null; } int begin = iterator.previous(); if (begin == BreakIterator.DONE || begin >= seg.offset + seg.count) { return null; } seg.modelOffset = seg.modelOffset + begin - seg.offset; seg.offset = begin; seg.count = end - begin; return seg; }
Example 8
Source File: CaretSelectionBindImpl.java From RichTextFX with BSD 2-Clause "Simplified" License | 5 votes |
@Override public void updateEndByBreaksForward(int numOfBreaks, BreakIterator breakIterator) { if (getAreaLength() == 0) { return; } breakIterator.setText(getArea().getText()); int position = calculatePositionViaBreakingForwards(numOfBreaks, breakIterator, getStartPosition()); updateEndTo(position); }
Example 9
Source File: ConditionalSpecialCasing.java From jdk8u-jdk with GNU General Public License v2.0 | 5 votes |
/** * Implements the "Final_Cased" condition * * Specification: Within the closest word boundaries containing C, there is a cased * letter before C, and there is no cased letter after C. * * Regular Expression: * Before C: [{cased==true}][{wordBoundary!=true}]* * After C: !([{wordBoundary!=true}]*[{cased}]) */ private static boolean isFinalCased(String src, int index, Locale locale) { BreakIterator wordBoundary = BreakIterator.getWordInstance(locale); wordBoundary.setText(src); int ch; // Look for a preceding 'cased' letter for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i); i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (isCased(ch)) { int len = src.length(); // Check that there is no 'cased' letter after the index for (i = index + Character.charCount(src.codePointAt(index)); (i < len) && !wordBoundary.isBoundary(i); i += Character.charCount(ch)) { ch = src.codePointAt(i); if (isCased(ch)) { return false; } } return true; } } return false; }
Example 10
Source File: Bug4912404.java From dragonwell8_jdk with GNU General Public License v2.0 | 5 votes |
public static void main(String[] args) { BreakIterator b = BreakIterator.getWordInstance(); b.setText("abc"); if (b.equals(null)) { throw new RuntimeException("BreakIterator.equals(null) should return false."); } }
Example 11
Source File: AccessibleHTML.java From dragonwell8_jdk with GNU General Public License v2.0 | 5 votes |
/** * Returns the Segment at <code>index</code> representing either * the paragraph or sentence as identified by <code>part</code>, or * null if a valid paragraph/sentence can't be found. The offset * will point to the start of the word/sentence in the array, and * the modelOffset will point to the location of the word/sentence * in the model. */ private IndexedSegment getSegmentAt(int part, int index) throws BadLocationException { IndexedSegment seg = getParagraphElementText(index); if (seg == null) { return null; } BreakIterator iterator; switch (part) { case AccessibleText.WORD: iterator = BreakIterator.getWordInstance(getLocale()); break; case AccessibleText.SENTENCE: iterator = BreakIterator.getSentenceInstance(getLocale()); break; default: return null; } seg.first(); iterator.setText(seg); int end = iterator.following(index - seg.modelOffset + seg.offset); if (end == BreakIterator.DONE) { return null; } if (end > seg.offset + seg.count) { return null; } int begin = iterator.previous(); if (begin == BreakIterator.DONE || begin >= seg.offset + seg.count) { return null; } seg.modelOffset = seg.modelOffset + begin - seg.offset; seg.offset = begin; seg.count = end - begin; return seg; }
Example 12
Source File: AbstractWordAwareDoubleClickStrategy.java From xtext-eclipse with Eclipse Public License 2.0 | 5 votes |
@Override protected IRegion findWord(IDocument document, int offset) { try { IRegion line = document.getLineInformationOfOffset(offset); if (offset == line.getOffset() + line.getLength()) return null; BreakIterator breakIter = createBreakIterator(); CharacterIterator characterIterator = new DocumentCharacterIterator(document); breakIter.setText(characterIterator); int start = breakIter.preceding(offset); if (start == BreakIterator.DONE) start = line.getOffset(); int end = breakIter.following(offset); if (end == BreakIterator.DONE) end = line.getOffset() + line.getLength(); if (breakIter.isBoundary(offset)) { if (end - offset > offset - start) start = offset; else end = offset; } if (end == start) return null; return new Region(start, end - start); } catch (BadLocationException e) { return null; } }
Example 13
Source File: ConditionalSpecialCasing.java From jdk-1.7-annotated with Apache License 2.0 | 5 votes |
/** * Implements the "Final_Cased" condition * * Specification: Within the closest word boundaries containing C, there is a cased * letter before C, and there is no cased letter after C. * * Regular Expression: * Before C: [{cased==true}][{wordBoundary!=true}]* * After C: !([{wordBoundary!=true}]*[{cased}]) */ private static boolean isFinalCased(String src, int index, Locale locale) { BreakIterator wordBoundary = BreakIterator.getWordInstance(locale); wordBoundary.setText(src); int ch; // Look for a preceding 'cased' letter for (int i = index; (i >= 0) && !wordBoundary.isBoundary(i); i -= Character.charCount(ch)) { ch = src.codePointBefore(i); if (isCased(ch)) { int len = src.length(); // Check that there is no 'cased' letter after the index for (i = index + Character.charCount(src.codePointAt(index)); (i < len) && !wordBoundary.isBoundary(i); i += Character.charCount(ch)) { ch = src.codePointAt(i); if (isCased(ch)) { return false; } } return true; } } return false; }
Example 14
Source File: AccessibleHTML.java From jdk1.8-source-analysis with Apache License 2.0 | 5 votes |
/** * Returns the Segment at <code>index</code> representing either * the paragraph or sentence as identified by <code>part</code>, or * null if a valid paragraph/sentence can't be found. The offset * will point to the start of the word/sentence in the array, and * the modelOffset will point to the location of the word/sentence * in the model. */ private IndexedSegment getSegmentAt(int part, int index) throws BadLocationException { IndexedSegment seg = getParagraphElementText(index); if (seg == null) { return null; } BreakIterator iterator; switch (part) { case AccessibleText.WORD: iterator = BreakIterator.getWordInstance(getLocale()); break; case AccessibleText.SENTENCE: iterator = BreakIterator.getSentenceInstance(getLocale()); break; default: return null; } seg.first(); iterator.setText(seg); int end = iterator.following(index - seg.modelOffset + seg.offset); if (end == BreakIterator.DONE) { return null; } if (end > seg.offset + seg.count) { return null; } int begin = iterator.previous(); if (begin == BreakIterator.DONE || begin >= seg.offset + seg.count) { return null; } seg.modelOffset = seg.modelOffset + begin - seg.offset; seg.offset = begin; seg.count = end - begin; return seg; }
Example 15
Source File: TestOpenNLPSentenceBreakIterator.java From lucene-solr with Apache License 2.0 | 5 votes |
public void testSliceEnd() throws Exception { NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile); BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp); bi.setText(getCharArrayIterator(SENTENCES[0] + PADDING, 0, SENTENCES[0].length())); test1Sentence(bi, SENTENCES[0]); }
Example 16
Source File: LogWriterImpl.java From gemfirexd-oss with Apache License 2.0 | 4 votes |
static void formatText(PrintWriter writer, String target, int initialLength) { BreakIterator boundary = BreakIterator.getLineInstance(); boundary.setText(target); int start = boundary.first(); int end = boundary.next(); int lineLength = initialLength; while (end != BreakIterator.DONE) { // Look at the end and only accept whitespace breaks char endChar = target.charAt(end-1); while (!Character.isWhitespace(endChar)) { int lastEnd = end; end = boundary.next(); if (end == BreakIterator.DONE) { // give up. We are at the end of the string end = lastEnd; break; } endChar = target.charAt(end-1); } int wordEnd = end; if (endChar == '\n') { // trim off the \n since println will do it for us wordEnd--; if (wordEnd > 0 && target.charAt(wordEnd-1) == '\r') { wordEnd--; } } else if (endChar == '\t') { // figure tabs use 8 characters lineLength += 7; } String word = target.substring(start, wordEnd); lineLength += word.length(); writer.print(word); if (endChar == '\n' || endChar == '\r') { // force end of line writer.println(); writer.print(" "); lineLength = 2; } start = end; end = boundary.next(); } if (lineLength != 0) { writer.println(); } }
Example 17
Source File: WordCountService.java From mojito with Apache License 2.0 | 4 votes |
/** * Gets the number of words in the string assuming the string is in English. * * This implementation doesn't know about placeholders. They are counted as * word. Later, we can do something more clever using an * Okapi step later to exclude them. * * @param string * @return number of word */ public int getEnglishWordCount(String string) { int wordCount = 0; BreakIterator wordBreakIterator = BreakIterator.getWordInstance(Locale.ENGLISH); wordBreakIterator.setText(string); int start = wordBreakIterator.first(); int end = wordBreakIterator.next(); while (end != BreakIterator.DONE) { if (Character.isLetterOrDigit(string.charAt(start))) { wordCount += 1; } start = end; end = wordBreakIterator.next(); } return wordCount; }
Example 18
Source File: GlyphView.java From dragonwell8_jdk with GNU General Public License v2.0 | 4 votes |
/** * Returns a location to break at in the passed in region, or * BreakIterator.DONE if there isn't a good location to break at * in the specified region. */ private int getBreakSpot(int p0, int p1) { if (breakSpots == null) { // Re-calculate breakpoints for the whole view int start = getStartOffset(); int end = getEndOffset(); int[] bs = new int[end + 1 - start]; int ix = 0; // Breaker should work on the parent element because there may be // a valid breakpoint at the end edge of the view (space, etc.) Element parent = getElement().getParentElement(); int pstart = (parent == null ? start : parent.getStartOffset()); int pend = (parent == null ? end : parent.getEndOffset()); Segment s = getText(pstart, pend); s.first(); BreakIterator breaker = getBreaker(); breaker.setText(s); // Backward search should start from end+1 unless there's NO end+1 int startFrom = end + (pend > end ? 1 : 0); for (;;) { startFrom = breaker.preceding(s.offset + (startFrom - pstart)) + (pstart - s.offset); if (startFrom > start) { // The break spot is within the view bs[ix++] = startFrom; } else { break; } } SegmentCache.releaseSharedSegment(s); breakSpots = new int[ix]; System.arraycopy(bs, 0, breakSpots, 0, ix); } int breakSpot = BreakIterator.DONE; for (int i = 0; i < breakSpots.length; i++) { int bsp = breakSpots[i]; if (bsp <= p1) { if (bsp > p0) { breakSpot = bsp; } break; } } return breakSpot; }
Example 19
Source File: GlyphView.java From openjdk-jdk9 with GNU General Public License v2.0 | 4 votes |
/** * Returns a location to break at in the passed in region, or * BreakIterator.DONE if there isn't a good location to break at * in the specified region. */ private int getBreakSpot(int p0, int p1) { if (breakSpots == null) { // Re-calculate breakpoints for the whole view int start = getStartOffset(); int end = getEndOffset(); int[] bs = new int[end + 1 - start]; int ix = 0; // Breaker should work on the parent element because there may be // a valid breakpoint at the end edge of the view (space, etc.) Element parent = getElement().getParentElement(); int pstart = (parent == null ? start : parent.getStartOffset()); int pend = (parent == null ? end : parent.getEndOffset()); Segment s = getText(pstart, pend); s.first(); BreakIterator breaker = getBreaker(); breaker.setText(s); // Backward search should start from end+1 unless there's NO end+1 int startFrom = end + (pend > end ? 1 : 0); for (;;) { startFrom = breaker.preceding(s.offset + (startFrom - pstart)) + (pstart - s.offset); if (startFrom > start) { // The break spot is within the view bs[ix++] = startFrom; } else { break; } } SegmentCache.releaseSharedSegment(s); breakSpots = new int[ix]; System.arraycopy(bs, 0, breakSpots, 0, ix); } int breakSpot = BreakIterator.DONE; for (int i = 0; i < breakSpots.length; i++) { int bsp = breakSpots[i]; if (bsp <= p1) { if (bsp > p0) { breakSpot = bsp; } break; } } return breakSpot; }
Example 20
Source File: GlyphView.java From jdk8u-dev-jdk with GNU General Public License v2.0 | 4 votes |
/** * Returns a location to break at in the passed in region, or * BreakIterator.DONE if there isn't a good location to break at * in the specified region. */ private int getBreakSpot(int p0, int p1) { if (breakSpots == null) { // Re-calculate breakpoints for the whole view int start = getStartOffset(); int end = getEndOffset(); int[] bs = new int[end + 1 - start]; int ix = 0; // Breaker should work on the parent element because there may be // a valid breakpoint at the end edge of the view (space, etc.) Element parent = getElement().getParentElement(); int pstart = (parent == null ? start : parent.getStartOffset()); int pend = (parent == null ? end : parent.getEndOffset()); Segment s = getText(pstart, pend); s.first(); BreakIterator breaker = getBreaker(); breaker.setText(s); // Backward search should start from end+1 unless there's NO end+1 int startFrom = end + (pend > end ? 1 : 0); for (;;) { startFrom = breaker.preceding(s.offset + (startFrom - pstart)) + (pstart - s.offset); if (startFrom > start) { // The break spot is within the view bs[ix++] = startFrom; } else { break; } } SegmentCache.releaseSharedSegment(s); breakSpots = new int[ix]; System.arraycopy(bs, 0, breakSpots, 0, ix); } int breakSpot = BreakIterator.DONE; for (int i = 0; i < breakSpots.length; i++) { int bsp = breakSpots[i]; if (bsp <= p1) { if (bsp > p0) { breakSpot = bsp; } break; } } return breakSpot; }