001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017package org.apache.commons.text; 018 019import java.util.HashSet; 020import java.util.Set; 021import java.util.regex.Matcher; 022import java.util.regex.Pattern; 023 024import org.apache.commons.lang3.ArrayUtils; 025import org.apache.commons.lang3.StringUtils; 026import org.apache.commons.lang3.Validate; 027 028/** 029 * Operations on Strings that contain words. 030 * 031 * <p> 032 * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a 033 * {@code null} input. Each method documents its behavior in more detail. 034 * </p> 035 * 036 * @since 1.1 037 */ 038public class WordUtils { 039 040 /** 041 * Abbreviates the words nicely. 042 * 043 * <p> 044 * This method searches for the first space after the lower limit and abbreviates 045 * the String there. It will also append any String passed as a parameter 046 * to the end of the String. The upper limit can be specified to forcibly 047 * abbreviate a String. 048 * </p> 049 * 050 * @param str the string to be abbreviated. If null is passed, null is returned. 051 * If the empty String is passed, the empty string is returned. 052 * @param lower the lower limit; negative value is treated as zero. 053 * @param upper the upper limit; specify -1 if no limit is desired. 054 * The upper limit cannot be lower than the lower limit. 055 * @param appendToEnd String to be appended to the end of the abbreviated string. 056 * This is appended ONLY if the string was indeed abbreviated. 057 * The append does not count towards the lower or upper limits. 058 * @return The abbreviated String. 059 * 060 * <pre> 061 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null)); = "Now" 062 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null)); = "Now is the" 063 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null)); = "Now is the time for all" 064 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, "")); = "Now" 065 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, "")); = "Now is the" 066 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, "")); = "Now is the time for all" 067 * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ...")); = "Now ..." 068 * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ...")); = "Now is the ..." 069 * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ...")); = "Now is the time for all ..." 070 * WordUtils.abbreviate("Now is the time for all good men", 0, -1, "")); = "Now" 071 * WordUtils.abbreviate("Now is the time for all good men", 10, -1, "")); = "Now is the" 072 * WordUtils.abbreviate("Now is the time for all good men", 20, -1, "")); = "Now is the time for all" 073 * WordUtils.abbreviate("Now is the time for all good men", 50, -1, "")); = "Now is the time for all good men" 074 * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, "")); = "Now is the time for all good men" 075 * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null)); = IllegalArgumentException 076 * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null)); = IllegalArgumentException 077 * </pre> 078 */ 079 public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) { 080 Validate.isTrue(upper >= -1, "upper value cannot be less than -1"); 081 Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value"); 082 if (StringUtils.isEmpty(str)) { 083 return str; 084 } 085 086 // if the lower value is greater than the length of the string, 087 // set to the length of the string 088 if (lower > str.length()) { 089 lower = str.length(); 090 } 091 092 // if the upper value is -1 (i.e. no limit) or is greater 093 // than the length of the string, set to the length of the string 094 if (upper == -1 || upper > str.length()) { 095 upper = str.length(); 096 } 097 098 final StringBuilder result = new StringBuilder(); 099 final int index = StringUtils.indexOf(str, " ", lower); 100 if (index == -1) { 101 result.append(str, 0, upper); 102 // only if abbreviation has occurred do we append the appendToEnd value 103 if (upper != str.length()) { 104 result.append(StringUtils.defaultString(appendToEnd)); 105 } 106 } else { 107 result.append(str, 0, Math.min(index, upper)); 108 result.append(StringUtils.defaultString(appendToEnd)); 109 } 110 111 return result.toString(); 112 } 113 114 /** 115 * Capitalizes all the whitespace separated words in a String. 116 * Only the first character of each word is changed. To convert the 117 * rest of each word to lowercase at the same time, 118 * use {@link #capitalizeFully(String)}. 119 * 120 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 121 * A {@code null} input String returns {@code null}. 122 * Capitalization uses the Unicode title case, normally equivalent to 123 * upper case.</p> 124 * 125 * <pre> 126 * WordUtils.capitalize(null) = null 127 * WordUtils.capitalize("") = "" 128 * WordUtils.capitalize("i am FINE") = "I Am FINE" 129 * </pre> 130 * 131 * @param str the String to capitalize, may be null 132 * @return capitalized String, {@code null} if null String input 133 * @see #uncapitalize(String) 134 * @see #capitalizeFully(String) 135 */ 136 public static String capitalize(final String str) { 137 return capitalize(str, null); 138 } 139 140 /** 141 * Capitalizes all the delimiter separated words in a String. 142 * Only the first character of each word is changed. To convert the 143 * rest of each word to lowercase at the same time, 144 * use {@link #capitalizeFully(String, char[])}. 145 * 146 * <p>The delimiters represent a set of characters understood to separate words. 147 * The first string character and the first non-delimiter character after a 148 * delimiter will be capitalized.</p> 149 * 150 * <p>A {@code null} input String returns {@code null}. 151 * Capitalization uses the Unicode title case, normally equivalent to 152 * upper case.</p> 153 * 154 * <pre> 155 * WordUtils.capitalize(null, *) = null 156 * WordUtils.capitalize("", *) = "" 157 * WordUtils.capitalize(*, new char[0]) = * 158 * WordUtils.capitalize("i am fine", null) = "I Am Fine" 159 * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine" 160 * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine" 161 * </pre> 162 * 163 * @param str the String to capitalize, may be null 164 * @param delimiters set of characters to determine capitalization, null means whitespace 165 * @return capitalized String, {@code null} if null String input 166 * @see #uncapitalize(String) 167 * @see #capitalizeFully(String) 168 */ 169 public static String capitalize(final String str, final char... delimiters) { 170 if (StringUtils.isEmpty(str)) { 171 return str; 172 } 173 final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); 174 final int strLen = str.length(); 175 final int[] newCodePoints = new int[strLen]; 176 int outOffset = 0; 177 178 boolean capitalizeNext = true; 179 for (int index = 0; index < strLen;) { 180 final int codePoint = str.codePointAt(index); 181 182 if (delimiterSet.contains(codePoint)) { 183 capitalizeNext = true; 184 newCodePoints[outOffset++] = codePoint; 185 index += Character.charCount(codePoint); 186 } else if (capitalizeNext) { 187 final int titleCaseCodePoint = Character.toTitleCase(codePoint); 188 newCodePoints[outOffset++] = titleCaseCodePoint; 189 index += Character.charCount(titleCaseCodePoint); 190 capitalizeNext = false; 191 } else { 192 newCodePoints[outOffset++] = codePoint; 193 index += Character.charCount(codePoint); 194 } 195 } 196 return new String(newCodePoints, 0, outOffset); 197 } 198 199 /** 200 * Converts all the whitespace separated words in a String into capitalized words, 201 * that is each word is made up of a titlecase character and then a series of 202 * lowercase characters. 203 * 204 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 205 * A {@code null} input String returns {@code null}. 206 * Capitalization uses the Unicode title case, normally equivalent to 207 * upper case.</p> 208 * 209 * <pre> 210 * WordUtils.capitalizeFully(null) = null 211 * WordUtils.capitalizeFully("") = "" 212 * WordUtils.capitalizeFully("i am FINE") = "I Am Fine" 213 * </pre> 214 * 215 * @param str the String to capitalize, may be null 216 * @return capitalized String, {@code null} if null String input 217 */ 218 public static String capitalizeFully(final String str) { 219 return capitalizeFully(str, null); 220 } 221 222 /** 223 * Converts all the delimiter separated words in a String into capitalized words, 224 * that is each word is made up of a titlecase character and then a series of 225 * lowercase characters. 226 * 227 * <p>The delimiters represent a set of characters understood to separate words. 228 * The first string character and the first non-delimiter character after a 229 * delimiter will be capitalized.</p> 230 * 231 * <p>A {@code null} input String returns {@code null}. 232 * Capitalization uses the Unicode title case, normally equivalent to 233 * upper case.</p> 234 * 235 * <pre> 236 * WordUtils.capitalizeFully(null, *) = null 237 * WordUtils.capitalizeFully("", *) = "" 238 * WordUtils.capitalizeFully(*, null) = * 239 * WordUtils.capitalizeFully(*, new char[0]) = * 240 * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine" 241 * </pre> 242 * 243 * @param str the String to capitalize, may be null 244 * @param delimiters set of characters to determine capitalization, null means whitespace 245 * @return capitalized String, {@code null} if null String input 246 */ 247 public static String capitalizeFully(String str, final char... delimiters) { 248 if (StringUtils.isEmpty(str)) { 249 return str; 250 } 251 str = str.toLowerCase(); 252 return capitalize(str, delimiters); 253 } 254 255 /** 256 * Checks if the String contains all words in the given array. 257 * 258 * <p> 259 * A {@code null} String will return {@code false}. A {@code null}, zero 260 * length search array or if one element of array is null will return {@code false}. 261 * </p> 262 * 263 * <pre> 264 * WordUtils.containsAllWords(null, *) = false 265 * WordUtils.containsAllWords("", *) = false 266 * WordUtils.containsAllWords(*, null) = false 267 * WordUtils.containsAllWords(*, []) = false 268 * WordUtils.containsAllWords("abcd", "ab", "cd") = false 269 * WordUtils.containsAllWords("abc def", "def", "abc") = true 270 * </pre> 271 * 272 * @param word The CharSequence to check, may be null 273 * @param words The array of String words to search for, may be null 274 * @return {@code true} if all search words are found, {@code false} otherwise 275 */ 276 public static boolean containsAllWords(final CharSequence word, final CharSequence... words) { 277 if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) { 278 return false; 279 } 280 for (final CharSequence w : words) { 281 if (StringUtils.isBlank(w)) { 282 return false; 283 } 284 final Pattern p = Pattern.compile(".*\\b" + w + "\\b.*"); 285 if (!p.matcher(word).matches()) { 286 return false; 287 } 288 } 289 return true; 290 } 291 292 /** 293 * Converts an array of delimiters to a hash set of code points. Code point of space(32) is added as the default 294 * value if delimiters is null. The generated hash set provides O(1) lookup time. 295 * 296 * @param delimiters set of characters to determine capitalization, null means whitespace 297 * @return Set<Integer> 298 */ 299 private static Set<Integer> generateDelimiterSet(final char[] delimiters) { 300 final Set<Integer> delimiterHashSet = new HashSet<>(); 301 if (delimiters == null || delimiters.length == 0) { 302 if (delimiters == null) { 303 delimiterHashSet.add(Character.codePointAt(new char[] {' '}, 0)); 304 } 305 306 return delimiterHashSet; 307 } 308 309 for (int index = 0; index < delimiters.length; index++) { 310 delimiterHashSet.add(Character.codePointAt(delimiters, index)); 311 } 312 return delimiterHashSet; 313 } 314 315 /** 316 * Extracts the initial characters from each word in the String. 317 * 318 * <p>All first characters after whitespace are returned as a new string. 319 * Their case is not changed.</p> 320 * 321 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 322 * A {@code null} input String returns {@code null}.</p> 323 * 324 * <pre> 325 * WordUtils.initials(null) = null 326 * WordUtils.initials("") = "" 327 * WordUtils.initials("Ben John Lee") = "BJL" 328 * WordUtils.initials("Ben J.Lee") = "BJ" 329 * </pre> 330 * 331 * @param str the String to get initials from, may be null 332 * @return String of initial letters, {@code null} if null String input 333 * @see #initials(String,char[]) 334 */ 335 public static String initials(final String str) { 336 return initials(str, null); 337 } 338 339 /** 340 * Extracts the initial characters from each word in the String. 341 * 342 * <p>All first characters after the defined delimiters are returned as a new string. 343 * Their case is not changed.</p> 344 * 345 * <p>If the delimiters array is null, then Whitespace is used. 346 * Whitespace is defined by {@link Character#isWhitespace(char)}. 347 * A {@code null} input String returns {@code null}. 348 * An empty delimiter array returns an empty String.</p> 349 * 350 * <pre> 351 * WordUtils.initials(null, *) = null 352 * WordUtils.initials("", *) = "" 353 * WordUtils.initials("Ben John Lee", null) = "BJL" 354 * WordUtils.initials("Ben J.Lee", null) = "BJ" 355 * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL" 356 * WordUtils.initials(*, new char[0]) = "" 357 * </pre> 358 * 359 * @param str the String to get initials from, may be null 360 * @param delimiters set of characters to determine words, null means whitespace 361 * @return String of initial characters, {@code null} if null String input 362 * @see #initials(String) 363 */ 364 public static String initials(final String str, final char... delimiters) { 365 if (StringUtils.isEmpty(str)) { 366 return str; 367 } 368 if (delimiters != null && delimiters.length == 0) { 369 return StringUtils.EMPTY; 370 } 371 final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); 372 final int strLen = str.length(); 373 final int[] newCodePoints = new int[strLen / 2 + 1]; 374 int count = 0; 375 boolean lastWasGap = true; 376 for (int i = 0; i < strLen;) { 377 final int codePoint = str.codePointAt(i); 378 379 if (delimiterSet.contains(codePoint) || delimiters == null && Character.isWhitespace(codePoint)) { 380 lastWasGap = true; 381 } else if (lastWasGap) { 382 newCodePoints[count++] = codePoint; 383 lastWasGap = false; 384 } 385 386 i += Character.charCount(codePoint); 387 } 388 return new String(newCodePoints, 0, count); 389 } 390 391 /** 392 * Is the character a delimiter. 393 * 394 * @param ch the character to check 395 * @param delimiters the delimiters 396 * @return true if it is a delimiter 397 * @deprecated as of 1.2 and will be removed in 2.0 398 */ 399 @Deprecated 400 public static boolean isDelimiter(final char ch, final char[] delimiters) { 401 if (delimiters == null) { 402 return Character.isWhitespace(ch); 403 } 404 for (final char delimiter : delimiters) { 405 if (ch == delimiter) { 406 return true; 407 } 408 } 409 return false; 410 } 411 412 /** 413 * Is the codePoint a delimiter. 414 * 415 * @param codePoint the codePint to check 416 * @param delimiters the delimiters 417 * @return true if it is a delimiter 418 * @deprecated as of 1.2 and will be removed in 2.0 419 */ 420 @Deprecated 421 public static boolean isDelimiter(final int codePoint, final char[] delimiters) { 422 if (delimiters == null) { 423 return Character.isWhitespace(codePoint); 424 } 425 for (int index = 0; index < delimiters.length; index++) { 426 final int delimiterCodePoint = Character.codePointAt(delimiters, index); 427 if (delimiterCodePoint == codePoint) { 428 return true; 429 } 430 } 431 return false; 432 } 433 434 /** 435 * Swaps the case of a String using a word based algorithm. 436 * 437 * <ul> 438 * <li>Upper case character converts to Lower case</li> 439 * <li>Title case character converts to Lower case</li> 440 * <li>Lower case character after Whitespace or at start converts to Title case</li> 441 * <li>Other Lower case character converts to Upper case</li> 442 * </ul> 443 * 444 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 445 * A {@code null} input String returns {@code null}.</p> 446 * 447 * <pre> 448 * StringUtils.swapCase(null) = null 449 * StringUtils.swapCase("") = "" 450 * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone" 451 * </pre> 452 * 453 * @param str the String to swap case, may be null 454 * @return The changed String, {@code null} if null String input 455 */ 456 public static String swapCase(final String str) { 457 if (StringUtils.isEmpty(str)) { 458 return str; 459 } 460 final int strLen = str.length(); 461 final int[] newCodePoints = new int[strLen]; 462 int outOffset = 0; 463 boolean whitespace = true; 464 for (int index = 0; index < strLen;) { 465 final int oldCodepoint = str.codePointAt(index); 466 final int newCodePoint; 467 if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) { 468 newCodePoint = Character.toLowerCase(oldCodepoint); 469 whitespace = false; 470 } else if (Character.isLowerCase(oldCodepoint)) { 471 if (whitespace) { 472 newCodePoint = Character.toTitleCase(oldCodepoint); 473 whitespace = false; 474 } else { 475 newCodePoint = Character.toUpperCase(oldCodepoint); 476 } 477 } else { 478 whitespace = Character.isWhitespace(oldCodepoint); 479 newCodePoint = oldCodepoint; 480 } 481 newCodePoints[outOffset++] = newCodePoint; 482 index += Character.charCount(newCodePoint); 483 } 484 return new String(newCodePoints, 0, outOffset); 485 } 486 487 /** 488 * Uncapitalizes all the whitespace separated words in a String. 489 * Only the first character of each word is changed. 490 * 491 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 492 * A {@code null} input String returns {@code null}.</p> 493 * 494 * <pre> 495 * WordUtils.uncapitalize(null) = null 496 * WordUtils.uncapitalize("") = "" 497 * WordUtils.uncapitalize("I Am FINE") = "i am fINE" 498 * </pre> 499 * 500 * @param str the String to uncapitalize, may be null 501 * @return uncapitalized String, {@code null} if null String input 502 * @see #capitalize(String) 503 */ 504 public static String uncapitalize(final String str) { 505 return uncapitalize(str, null); 506 } 507 508 /** 509 * Uncapitalizes all the whitespace separated words in a String. 510 * Only the first character of each word is changed. 511 * 512 * <p>The delimiters represent a set of characters understood to separate words. 513 * The first string character and the first non-delimiter character after a 514 * delimiter will be uncapitalized.</p> 515 * 516 * <p>Whitespace is defined by {@link Character#isWhitespace(char)}. 517 * A {@code null} input String returns {@code null}.</p> 518 * 519 * <pre> 520 * WordUtils.uncapitalize(null, *) = null 521 * WordUtils.uncapitalize("", *) = "" 522 * WordUtils.uncapitalize(*, null) = * 523 * WordUtils.uncapitalize(*, new char[0]) = * 524 * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE" 525 * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine" 526 * </pre> 527 * 528 * @param str the String to uncapitalize, may be null 529 * @param delimiters set of characters to determine uncapitalization, null means whitespace 530 * @return uncapitalized String, {@code null} if null String input 531 * @see #capitalize(String) 532 */ 533 public static String uncapitalize(final String str, final char... delimiters) { 534 if (StringUtils.isEmpty(str)) { 535 return str; 536 } 537 final Set<Integer> delimiterSet = generateDelimiterSet(delimiters); 538 final int strLen = str.length(); 539 final int[] newCodePoints = new int[strLen]; 540 int outOffset = 0; 541 542 boolean uncapitalizeNext = true; 543 for (int index = 0; index < strLen;) { 544 final int codePoint = str.codePointAt(index); 545 546 if (delimiterSet.contains(codePoint)) { 547 uncapitalizeNext = true; 548 newCodePoints[outOffset++] = codePoint; 549 index += Character.charCount(codePoint); 550 } else if (uncapitalizeNext) { 551 final int titleCaseCodePoint = Character.toLowerCase(codePoint); 552 newCodePoints[outOffset++] = titleCaseCodePoint; 553 index += Character.charCount(titleCaseCodePoint); 554 uncapitalizeNext = false; 555 } else { 556 newCodePoints[outOffset++] = codePoint; 557 index += Character.charCount(codePoint); 558 } 559 } 560 return new String(newCodePoints, 0, outOffset); 561 } 562 563 /** 564 * Wraps a single line of text, identifying words by {@code ' '}. 565 * 566 * <p>New lines will be separated by the system property line separator. 567 * Very long words, such as URLs will <i>not</i> be wrapped.</p> 568 * 569 * <p>Leading spaces on a new line are stripped. 570 * Trailing spaces are not stripped.</p> 571 * 572 * <table border="1"> 573 * <caption>Examples</caption> 574 * <tr> 575 * <th>input</th> 576 * <th>wrapLength</th> 577 * <th>result</th> 578 * </tr> 579 * <tr> 580 * <td>null</td> 581 * <td>*</td> 582 * <td>null</td> 583 * </tr> 584 * <tr> 585 * <td>""</td> 586 * <td>*</td> 587 * <td>""</td> 588 * </tr> 589 * <tr> 590 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 591 * <td>20</td> 592 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 593 * </tr> 594 * <tr> 595 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 596 * <td>20</td> 597 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td> 598 * </tr> 599 * <tr> 600 * <td>"Click here, https://commons.apache.org, to jump to the commons website"</td> 601 * <td>20</td> 602 * <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td> 603 * </tr> 604 * </table> 605 * 606 * (assuming that '\n' is the systems line separator) 607 * 608 * @param str the String to be word wrapped, may be null 609 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 610 * @return a line with newlines inserted, {@code null} if null input 611 */ 612 public static String wrap(final String str, final int wrapLength) { 613 return wrap(str, wrapLength, null, false); 614 } 615 616 /** 617 * Wraps a single line of text, identifying words by {@code ' '}. 618 * 619 * <p>Leading spaces on a new line are stripped. 620 * Trailing spaces are not stripped.</p> 621 * 622 * <table border="1"> 623 * <caption>Examples</caption> 624 * <tr> 625 * <th>input</th> 626 * <th>wrapLength</th> 627 * <th>newLineString</th> 628 * <th>wrapLongWords</th> 629 * <th>result</th> 630 * </tr> 631 * <tr> 632 * <td>null</td> 633 * <td>*</td> 634 * <td>*</td> 635 * <td>true/false</td> 636 * <td>null</td> 637 * </tr> 638 * <tr> 639 * <td>""</td> 640 * <td>*</td> 641 * <td>*</td> 642 * <td>true/false</td> 643 * <td>""</td> 644 * </tr> 645 * <tr> 646 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 647 * <td>20</td> 648 * <td>"\n"</td> 649 * <td>true/false</td> 650 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 651 * </tr> 652 * <tr> 653 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 654 * <td>20</td> 655 * <td>"<br />"</td> 656 * <td>true/false</td> 657 * <td>"Here is one line of<br />text that is going< 658 * br />to be wrapped after<br />20 columns."</td> 659 * </tr> 660 * <tr> 661 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 662 * <td>20</td> 663 * <td>null</td> 664 * <td>true/false</td> 665 * <td>"Here is one line of" + systemNewLine + "text that is going" 666 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 667 * </tr> 668 * <tr> 669 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 670 * <td>20</td> 671 * <td>"\n"</td> 672 * <td>false</td> 673 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td> 674 * </tr> 675 * <tr> 676 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 677 * <td>20</td> 678 * <td>"\n"</td> 679 * <td>true</td> 680 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td> 681 * </tr> 682 * </table> 683 * 684 * @param str the String to be word wrapped, may be null 685 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 686 * @param newLineStr the string to insert for a new line, 687 * {@code null} uses the system property line separator 688 * @param wrapLongWords true if long words (such as URLs) should be wrapped 689 * @return a line with newlines inserted, {@code null} if null input 690 */ 691 public static String wrap(final String str, 692 final int wrapLength, 693 final String newLineStr, 694 final boolean wrapLongWords) { 695 return wrap(str, wrapLength, newLineStr, wrapLongWords, " "); 696 } 697 698 /** 699 * Wraps a single line of text, identifying words by {@code wrapOn}. 700 * 701 * <p>Leading spaces on a new line are stripped. 702 * Trailing spaces are not stripped.</p> 703 * 704 * <table border="1"> 705 * <caption>Examples</caption> 706 * <tr> 707 * <th>input</th> 708 * <th>wrapLength</th> 709 * <th>newLineString</th> 710 * <th>wrapLongWords</th> 711 * <th>wrapOn</th> 712 * <th>result</th> 713 * </tr> 714 * <tr> 715 * <td>null</td> 716 * <td>*</td> 717 * <td>*</td> 718 * <td>true/false</td> 719 * <td>*</td> 720 * <td>null</td> 721 * </tr> 722 * <tr> 723 * <td>""</td> 724 * <td>*</td> 725 * <td>*</td> 726 * <td>true/false</td> 727 * <td>*</td> 728 * <td>""</td> 729 * </tr> 730 * <tr> 731 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 732 * <td>20</td> 733 * <td>"\n"</td> 734 * <td>true/false</td> 735 * <td>" "</td> 736 * <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td> 737 * </tr> 738 * <tr> 739 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 740 * <td>20</td> 741 * <td>"<br />"</td> 742 * <td>true/false</td> 743 * <td>" "</td> 744 * <td>"Here is one line of<br />text that is going<br /> 745 * to be wrapped after<br />20 columns."</td> 746 * </tr> 747 * <tr> 748 * <td>"Here is one line of text that is going to be wrapped after 20 columns."</td> 749 * <td>20</td> 750 * <td>null</td> 751 * <td>true/false</td> 752 * <td>" "</td> 753 * <td>"Here is one line of" + systemNewLine + "text that is going" 754 * + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td> 755 * </tr> 756 * <tr> 757 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 758 * <td>20</td> 759 * <td>"\n"</td> 760 * <td>false</td> 761 * <td>" "</td> 762 * <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td> 763 * </tr> 764 * <tr> 765 * <td>"Click here to jump to the commons website - https://commons.apache.org"</td> 766 * <td>20</td> 767 * <td>"\n"</td> 768 * <td>true</td> 769 * <td>" "</td> 770 * <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td> 771 * </tr> 772 * <tr> 773 * <td>"flammable/inflammable"</td> 774 * <td>20</td> 775 * <td>"\n"</td> 776 * <td>true</td> 777 * <td>"/"</td> 778 * <td>"flammable\ninflammable"</td> 779 * </tr> 780 * </table> 781 * @param str the String to be word wrapped, may be null 782 * @param wrapLength the column to wrap the words at, less than 1 is treated as 1 783 * @param newLineStr the string to insert for a new line, 784 * {@code null} uses the system property line separator 785 * @param wrapLongWords true if long words (such as URLs) should be wrapped 786 * @param wrapOn regex expression to be used as a breakable characters, 787 * if blank string is provided a space character will be used 788 * @return a line with newlines inserted, {@code null} if null input 789 */ 790 public static String wrap(final String str, 791 int wrapLength, 792 String newLineStr, 793 final boolean wrapLongWords, 794 String wrapOn) { 795 if (str == null) { 796 return null; 797 } 798 if (newLineStr == null) { 799 newLineStr = System.lineSeparator(); 800 } 801 if (wrapLength < 1) { 802 wrapLength = 1; 803 } 804 if (StringUtils.isBlank(wrapOn)) { 805 wrapOn = " "; 806 } 807 final Pattern patternToWrapOn = Pattern.compile(wrapOn); 808 final int inputLineLength = str.length(); 809 int offset = 0; 810 final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32); 811 int matcherSize = -1; 812 813 while (offset < inputLineLength) { 814 int spaceToWrapAt = -1; 815 Matcher matcher = patternToWrapOn.matcher(str.substring(offset, 816 Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength))); 817 if (matcher.find()) { 818 if (matcher.start() == 0) { 819 matcherSize = matcher.end(); 820 if (matcherSize != 0) { 821 offset += matcher.end(); 822 continue; 823 } 824 offset += 1; 825 } 826 spaceToWrapAt = matcher.start() + offset; 827 } 828 829 // only last line without leading spaces is left 830 if (inputLineLength - offset <= wrapLength) { 831 break; 832 } 833 834 while (matcher.find()) { 835 spaceToWrapAt = matcher.start() + offset; 836 } 837 838 if (spaceToWrapAt >= offset) { 839 // normal case 840 wrappedLine.append(str, offset, spaceToWrapAt); 841 wrappedLine.append(newLineStr); 842 offset = spaceToWrapAt + 1; 843 844 } else // really long word or URL 845 if (wrapLongWords) { 846 if (matcherSize == 0) { 847 offset--; 848 } 849 // wrap really long word one line at a time 850 wrappedLine.append(str, offset, wrapLength + offset); 851 wrappedLine.append(newLineStr); 852 offset += wrapLength; 853 matcherSize = -1; 854 } else { 855 // do not wrap really long word, just extend beyond limit 856 matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength)); 857 if (matcher.find()) { 858 matcherSize = matcher.end() - matcher.start(); 859 spaceToWrapAt = matcher.start() + offset + wrapLength; 860 } 861 862 if (spaceToWrapAt >= 0) { 863 if (matcherSize == 0 && offset != 0) { 864 offset--; 865 } 866 wrappedLine.append(str, offset, spaceToWrapAt); 867 wrappedLine.append(newLineStr); 868 offset = spaceToWrapAt + 1; 869 } else { 870 if (matcherSize == 0 && offset != 0) { 871 offset--; 872 } 873 wrappedLine.append(str, offset, str.length()); 874 offset = inputLineLength; 875 matcherSize = -1; 876 } 877 } 878 } 879 880 if (matcherSize == 0 && offset < inputLineLength) { 881 offset--; 882 } 883 884 // Whatever is left in line is short enough to just pass through 885 wrappedLine.append(str, offset, str.length()); 886 887 return wrappedLine.toString(); 888 } 889 890 /** 891 * {@code WordUtils} instances should NOT be constructed in 892 * standard programming. Instead, the class should be used as 893 * {@code WordUtils.wrap("foo bar", 20);}. 894 * 895 * <p>This constructor is public to permit tools that require a JavaBean 896 * instance to operate.</p> 897 */ 898 public WordUtils() { 899 } 900 }