Java Code Examples for java.text.Normalizer#normalize()
The following examples show how to use
java.text.Normalizer#normalize() .
You can vote up the ones you like or vote down the ones you don't like,
and go to the original project or source file by following the links above each example. You may check out the related API usage on the sidebar.
Example 1
Source File: ParsedIRI.java From rdf4j with BSD 3-Clause "New" or "Revised" License | 6 votes |
private String normalizePctEncoding(String encoded) { int cidx = Arrays.binarySearch(common_pct, encoded); if (cidx >= 0 && isMember(unreserved, common[cidx])) { return new String(Character.toChars(common[cidx])); // quickly decode unreserved encodings } else if (cidx >= 0) { return encoded; // pass through reserved encodings } String decoded = pctDecode(encoded); String ns = Normalizer.normalize(decoded, Normalizer.Form.NFC); StringBuilder sb = new StringBuilder(ns.length()); for (int c = 0, n = ns.codePointCount(0, ns.length()); c < n; c++) { int chr = ns.codePointAt(ns.offsetByCodePoints(0, c)); if (isMember(unreserved, chr)) { sb.appendCodePoint(chr); } else if (n == 1) { return toUpperCase(encoded); } else { sb.append(pctEncode(chr)); } } return sb.toString(); }
Example 2
Source File: PubAnnotationConvertUtil.java From bioasq with Apache License 2.0 | 6 votes |
static String normalizeText(String text) { String ret = new String(text); // replace spaces, double-quotes, percentage, ® with spaces ret = ret.replaceAll("[\\s\"%®]", " "); // replace vbar with 'I' for "Deiodinase type || (DIO2)" ret = ret.replaceAll("\\|", "I"); // replace multiplication mark '×' with 'x' ret = ret.replaceAll("×", "x"); // sharp-s to beta ret = ret.replaceAll("ß", "β"); // replace '·' with '.' ret = ret.replaceAll("·", "."); // remove '±' with '+' ret = ret.replaceAll("±", "+"); // remove ending whitespaces ret = ret.replaceAll("\\s+$", ""); // remove non ASCII characters // ret = nonAscii.replaceFrom(ret, ' '); // replace diacritical marks plus symbols that look alike, see // http://stackoverflow.com/questions/20674577/how-to-compare-unicode-characters-that-look-alike ret = Normalizer.normalize(ret, Normalizer.Form.NFKD); ret = diacriticalMarksPattern.matcher(ret).replaceAll(""); return ret; }
Example 3
Source File: MCRPath.java From mycore with GNU General Public License v3.0 | 6 votes |
/** * removes redundant slashes and checks for invalid characters * @param uncleanPath path to check * @return normalized path * @throws InvalidPathException if <code>uncleanPath</code> contains invalid characters */ static String normalizeAndCheck(final String uncleanPath) { String unicodeNormalizedUncleanPath = Normalizer.normalize(uncleanPath, Normalizer.Form.NFC); char prevChar = 0; final boolean afterSeparator = false; for (int i = 0; i < unicodeNormalizedUncleanPath.length(); i++) { final char c = unicodeNormalizedUncleanPath.charAt(i); checkCharacter(unicodeNormalizedUncleanPath, c, afterSeparator); if (c == SEPARATOR && prevChar == SEPARATOR) { return normalize(unicodeNormalizedUncleanPath, unicodeNormalizedUncleanPath.length(), i - 1); } prevChar = c; } if (prevChar == SEPARATOR) { //remove final slash return normalize(unicodeNormalizedUncleanPath, unicodeNormalizedUncleanPath.length(), unicodeNormalizedUncleanPath.length() - 1); } return unicodeNormalizedUncleanPath; }
Example 4
Source File: StringUtil.java From Kepler with GNU Lesser General Public License v3.0 | 6 votes |
/** * Filter input. * * @param input the input * @param filerNewline if new lines (ENTER) should be filtered * @return the string */ public static String filterInput(String input, boolean filerNewline) { input = input.replace((char)1, ' '); input = input.replace((char)2, ' '); input = input.replace((char)9, ' '); input = input.replace((char)10, ' '); input = input.replace((char)12, ' '); if (filerNewline) { input = input.replace((char)13, ' '); } if (GameConfiguration.getInstance().getBoolean("normalise.input.strings")) { input = Normalizer.normalize(input, Normalizer.Form.NFD); } return input; }
Example 5
Source File: SlugUtil.java From mapr-music with Apache License 2.0 | 5 votes |
/** * Converts specified string to it's slug representation, which can be used to generate readable and SEO-friendly * URLs. * * @param input string, which will be converted. * @return slug representation of string, which can be used to generate readable and SEO-friendly * URLs. */ public static String toSlug(String input) { String transliterated = transliterator.transform(input); String noWhitespace = WHITESPACE.matcher(transliterated).replaceAll("-"); String normalized = Normalizer.normalize(noWhitespace, Normalizer.Form.NFD); String slug = NONLATIN.matcher(normalized).replaceAll(""); slug = EDGESDHASHES.matcher(slug).replaceAll(""); return slug.toLowerCase(Locale.ENGLISH); }
Example 6
Source File: KeyCharacterMap.java From android_9.0.0_r45 with Apache License 2.0 | 5 votes |
/** * Get the character that is produced by combining the dead key producing accent * with the key producing character c. * For example, getDeadChar('`', 'e') returns è. * getDeadChar('^', ' ') returns '^' and getDeadChar('^', '^') returns '^'. * * @param accent The accent character. eg. '`' * @param c The basic character. * @return The combined character, or 0 if the characters cannot be combined. */ public static int getDeadChar(int accent, int c) { if (c == accent || CHAR_SPACE == c) { // The same dead character typed twice or a dead character followed by a // space should both produce the non-combining version of the combining char. // In this case we don't even need to compute the combining character. return accent; } int combining = sAccentToCombining.get(accent); if (combining == 0) { return 0; } final int combination = (combining << 16) | c; int combined; synchronized (sDeadKeyCache) { combined = sDeadKeyCache.get(combination, -1); if (combined == -1) { sDeadKeyBuilder.setLength(0); sDeadKeyBuilder.append((char)c); sDeadKeyBuilder.append((char)combining); String result = Normalizer.normalize(sDeadKeyBuilder, Normalizer.Form.NFC); combined = result.codePointCount(0, result.length()) == 1 ? result.codePointAt(0) : 0; sDeadKeyCache.put(combination, combined); } } return combined; }
Example 7
Source File: Bip39.java From AndroidWallet with GNU General Public License v3.0 | 5 votes |
@SuppressWarnings("NewApi") private MasterSeed(byte[] bip39RawEntropy, String bip39Passphrase, byte[] bip32MasterSeed) { _bip39RawEntropy = bip39RawEntropy; _bip39Passphrase = Normalizer.normalize(bip39Passphrase, Normalizer.Form.NFKD); _bip32MasterSeed = bip32MasterSeed; _wordListType = ENGLISH_WORD_LIST_TYPE; }
Example 8
Source File: RegularFileObject.java From openjdk-8 with GNU General Public License v2.0 | 5 votes |
@Override public boolean isNameCompatible(String cn, JavaFileObject.Kind kind) { cn.getClass(); // null check if (kind == Kind.OTHER && getKind() != kind) { return false; } String n = cn + kind.extension; if (name.equals(n)) { return true; } if (isMacOS && Normalizer.isNormalized(name, Normalizer.Form.NFD) && Normalizer.isNormalized(n, Normalizer.Form.NFC)) { // On Mac OS X it is quite possible to file name and class // name normalized in a different way - in that case we have to normalize file name // to the Normal Form Compised (NFC) String normName = Normalizer.normalize(name, Normalizer.Form.NFC); if (normName.equals(n)) { this.name = normName; return true; } } if (name.equalsIgnoreCase(n)) { try { // allow for Windows return file.getCanonicalFile().getName().equals(n); } catch (IOException e) { } } return false; }
Example 9
Source File: SmsMsg.java From XposedSmsCode with GNU General Public License v3.0 | 5 votes |
public static SmsMsg fromIntent(Intent intent) { SmsMessage[] smsMessageParts = SmsMessageUtils.fromIntent(intent); String sender = smsMessageParts[0].getDisplayOriginatingAddress(); String body = SmsMessageUtils.getMessageBody(smsMessageParts); sender = Normalizer.normalize(sender, Normalizer.Form.NFC); body = Normalizer.normalize(body, Normalizer.Form.NFC); SmsMsg message = new SmsMsg(); message.setSender(sender).setBody(body); return message; }
Example 10
Source File: PinHasher.java From mollyim-android with GNU General Public License v3.0 | 5 votes |
public static byte[] normalize(String pin) { pin = pin.trim(); if (PinString.allNumeric(pin)) { pin = PinString.toArabic(pin); } pin = Normalizer.normalize(pin, Normalizer.Form.NFKD); return pin.getBytes(StandardCharsets.UTF_8); }
Example 11
Source File: BowlerStudioMenu.java From BowlerStudio with GNU General Public License v3.0 | 5 votes |
public static String slugify(String input) { String nowhitespace = WHITESPACE.matcher(input).replaceAll("-"); String normalized = Normalizer.normalize(nowhitespace, Form.NFD); String slug = NONLATIN.matcher(normalized).replaceAll("").replace('-', '_'); return slug; }
Example 12
Source File: ReScuePattern.java From ReScue with GNU General Public License v2.0 | 5 votes |
/** * The pattern is converted to normalizedD form and then a pure group * is constructed to match canonical equivalences of the characters. */ private void normalize() { boolean inCharClass = false; int lastCodePoint = -1; // Convert pattern into normalizedD form normalizedPattern = Normalizer.normalize(pattern, Normalizer.Form.NFD); patternLength = normalizedPattern.length(); // Modify pattern to match canonical equivalences StringBuilder newPattern = new StringBuilder(patternLength); for(int i=0; i<patternLength; ) { int c = normalizedPattern.codePointAt(i); StringBuilder sequenceBuffer; if ((Character.getType(c) == Character.NON_SPACING_MARK) && (lastCodePoint != -1)) { sequenceBuffer = new StringBuilder(); sequenceBuffer.appendCodePoint(lastCodePoint); sequenceBuffer.appendCodePoint(c); while(Character.getType(c) == Character.NON_SPACING_MARK) { i += Character.charCount(c); if (i >= patternLength) break; c = normalizedPattern.codePointAt(i); sequenceBuffer.appendCodePoint(c); } String ea = produceEquivalentAlternation( sequenceBuffer.toString()); newPattern.setLength(newPattern.length()-Character.charCount(lastCodePoint)); newPattern.append("(?:").append(ea).append(")"); } else if (c == '[' && lastCodePoint != '\\') { i = normalizeCharClass(newPattern, i); } else { newPattern.appendCodePoint(c); } lastCodePoint = c; i += Character.charCount(c); } normalizedPattern = newPattern.toString(); }
Example 13
Source File: ReScuePattern.java From ReScue with GNU General Public License v2.0 | 5 votes |
/** * Attempts to compose input by combining the first character * with the first combining mark following it. Returns a String * that is the composition of the leading character with its first * combining mark followed by the remaining combining marks. Returns * null if the first two characters cannot be further composed. */ private String composeOneStep(String input) { int len = countChars(input, 0, 2); String firstTwoCharacters = input.substring(0, len); String result = Normalizer.normalize(firstTwoCharacters, Normalizer.Form.NFC); if (result.equals(firstTwoCharacters)) return null; else { String remainder = input.substring(len); return result + remainder; } }
Example 14
Source File: Frag.java From prayer-times-android with Apache License 2.0 | 4 votes |
private static String normalize(CharSequence str) { String string = Normalizer.normalize(str, Normalizer.Form.NFD); string = string.replaceAll("[^\\p{ASCII}]", "_"); return string.toLowerCase(Locale.ENGLISH); }
Example 15
Source File: Utils.java From javaee8-jaxrs-sample with GNU General Public License v3.0 | 4 votes |
public static String slugify(String input) { String nowhitespace = WHITESPACE.matcher(input).replaceAll("-"); String normalized = Normalizer.normalize(nowhitespace, Form.NFD); String slug = NONLATIN.matcher(normalized).replaceAll(""); return slug.toLowerCase(Locale.ENGLISH); }
Example 16
Source File: RedisSentinelURI.java From redis-rdb-cli with Apache License 2.0 | 4 votes |
private static ByteBuffer normalize(String s) { String v = Normalizer.normalize(s, Normalizer.Form.NFC); return StandardCharsets.UTF_8.encode(CharBuffer.wrap(v)); }
Example 17
Source File: LegacyAssignmentPolicy.java From marklogic-contentpump with Apache License 2.0 | 4 votes |
protected static String normalize(String uri) { return Normalizer.normalize(uri, Normalizer.Form.NFC); }
Example 18
Source File: LauncherHelper.java From Bytecoder with Apache License 2.0 | 4 votes |
/** * Returns the main class for a module. The query is either a module name * or module-name/main-class. For the former then the module's main class * is obtained from the module descriptor (MainClass attribute). */ private static Class<?> loadModuleMainClass(String what) { int i = what.indexOf('/'); String mainModule; String mainClass; if (i == -1) { mainModule = what; mainClass = null; } else { mainModule = what.substring(0, i); mainClass = what.substring(i+1); } // main module is in the boot layer ModuleLayer layer = ModuleLayer.boot(); Optional<Module> om = layer.findModule(mainModule); if (!om.isPresent()) { // should not happen throw new InternalError("Module " + mainModule + " not in boot Layer"); } Module m = om.get(); // get main class if (mainClass == null) { Optional<String> omc = m.getDescriptor().mainClass(); if (!omc.isPresent()) { abort(null, "java.launcher.module.error1", mainModule); } mainClass = omc.get(); } // load the class from the module Class<?> c = null; try { c = Class.forName(m, mainClass); if (c == null && System.getProperty("os.name", "").contains("OS X") && Normalizer.isNormalized(mainClass, Normalizer.Form.NFD)) { String cn = Normalizer.normalize(mainClass, Normalizer.Form.NFC); c = Class.forName(m, cn); } } catch (LinkageError le) { abort(null, "java.launcher.module.error3", mainClass, m.getName(), le.getClass().getName() + ": " + le.getLocalizedMessage()); } if (c == null) { abort(null, "java.launcher.module.error2", mainClass, mainModule); } System.setProperty("jdk.module.main.class", c.getName()); return c; }
Example 19
Source File: DomainServiceImpl.java From graviteeio-access-management with Apache License 2.0 | 4 votes |
private String generateContextPath(String domainName) { String nfdNormalizedString = Normalizer.normalize(domainName, Normalizer.Form.NFD); Pattern pattern = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); domainName = pattern.matcher(nfdNormalizedString).replaceAll(""); return domainName.toLowerCase().trim().replaceAll("\\s{1,}", "-"); }
Example 20
Source File: MCRXMLFunctions.java From mycore with GNU General Public License v3.0 | 2 votes |
/** * returns the given String in unicode NFC normal form. * * @param arg0 String to be normalized * @see Normalizer#normalize(CharSequence, java.text.Normalizer.Form) */ public static String normalizeUnicode(String arg0) { return Normalizer.normalize(arg0, Normalizer.Form.NFC); }