Source code

001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017package org.apache.commons.text;
018
019import java.util.HashSet;
020import java.util.Set;
021import java.util.regex.Matcher;
022import java.util.regex.Pattern;
023
024import org.apache.commons.lang3.ArrayUtils;
025import org.apache.commons.lang3.StringUtils;
026import org.apache.commons.lang3.Validate;
027
028/**
029 * Operations on Strings that contain words.
030 *
031 * <p>
032 * This class tries to handle {@code null} input gracefully. An exception will not be thrown for a
033 * {@code null} input. Each method documents its behavior in more detail.
034 * </p>
035 *
036 * @since 1.1
037 */
038public class WordUtils {
039
040    /**
041     * Abbreviates the words nicely.
042     *
043     * <p>
044     * This method searches for the first space after the lower limit and abbreviates
045     * the String there. It will also append any String passed as a parameter
046     * to the end of the String. The upper limit can be specified to forcibly
047     * abbreviate a String.
048     * </p>
049     *
050     * @param str         the string to be abbreviated. If null is passed, null is returned.
051     *                    If the empty String is passed, the empty string is returned.
052     * @param lower       the lower limit; negative value is treated as zero.
053     * @param upper       the upper limit; specify -1 if no limit is desired.
054     *                    The upper limit cannot be lower than the lower limit.
055     * @param appendToEnd String to be appended to the end of the abbreviated string.
056     *                    This is appended ONLY if the string was indeed abbreviated.
057     *                    The append does not count towards the lower or upper limits.
058     * @return The abbreviated String.
059     *
060     * <pre>
061     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, null));     = "Now"
062     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, null));    = "Now is the"
063     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, null));    = "Now is the time for all"
064     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, ""));       = "Now"
065     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, ""));      = "Now is the"
066     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, ""));      = "Now is the time for all"
067     * WordUtils.abbreviate("Now is the time for all good men", 0, 40, " ..."));   = "Now ..."
068     * WordUtils.abbreviate("Now is the time for all good men", 10, 40, " ..."));  = "Now is the ..."
069     * WordUtils.abbreviate("Now is the time for all good men", 20, 40, " ..."));  = "Now is the time for all ..."
070     * WordUtils.abbreviate("Now is the time for all good men", 0, -1, ""));       = "Now"
071     * WordUtils.abbreviate("Now is the time for all good men", 10, -1, ""));      = "Now is the"
072     * WordUtils.abbreviate("Now is the time for all good men", 20, -1, ""));      = "Now is the time for all"
073     * WordUtils.abbreviate("Now is the time for all good men", 50, -1, ""));      = "Now is the time for all good men"
074     * WordUtils.abbreviate("Now is the time for all good men", 1000, -1, ""));    = "Now is the time for all good men"
075     * WordUtils.abbreviate("Now is the time for all good men", 9, -10, null));    = IllegalArgumentException
076     * WordUtils.abbreviate("Now is the time for all good men", 10, 5, null));     = IllegalArgumentException
077     * </pre>
078     */
079    public static String abbreviate(final String str, int lower, int upper, final String appendToEnd) {
080        Validate.isTrue(upper >= -1, "upper value cannot be less than -1");
081        Validate.isTrue(upper >= lower || upper == -1, "upper value is less than lower value");
082        if (StringUtils.isEmpty(str)) {
083            return str;
084        }
085
086        // if the lower value is greater than the length of the string,
087        // set to the length of the string
088        if (lower > str.length()) {
089            lower = str.length();
090        }
091
092        // if the upper value is -1 (i.e. no limit) or is greater
093        // than the length of the string, set to the length of the string
094        if (upper == -1 || upper > str.length()) {
095            upper = str.length();
096        }
097
098        final StringBuilder result = new StringBuilder();
099        final int index = StringUtils.indexOf(str, " ", lower);
100        if (index == -1) {
101            result.append(str, 0, upper);
102            // only if abbreviation has occurred do we append the appendToEnd value
103            if (upper != str.length()) {
104                result.append(StringUtils.defaultString(appendToEnd));
105            }
106        } else {
107            result.append(str, 0, Math.min(index, upper));
108            result.append(StringUtils.defaultString(appendToEnd));
109        }
110
111        return result.toString();
112    }
113
114    /**
115     * Capitalizes all the whitespace separated words in a String.
116     * Only the first character of each word is changed. To convert the
117     * rest of each word to lowercase at the same time,
118     * use {@link #capitalizeFully(String)}.
119     *
120     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
121     * A {@code null} input String returns {@code null}.
122     * Capitalization uses the Unicode title case, normally equivalent to
123     * upper case.</p>
124     *
125     * <pre>
126     * WordUtils.capitalize(null)        = null
127     * WordUtils.capitalize("")          = ""
128     * WordUtils.capitalize("i am FINE") = "I Am FINE"
129     * </pre>
130     *
131     * @param str  the String to capitalize, may be null
132     * @return capitalized String, {@code null} if null String input
133     * @see #uncapitalize(String)
134     * @see #capitalizeFully(String)
135     */
136    public static String capitalize(final String str) {
137        return capitalize(str, null);
138    }
139
140    /**
141     * Capitalizes all the delimiter separated words in a String.
142     * Only the first character of each word is changed. To convert the
143     * rest of each word to lowercase at the same time,
144     * use {@link #capitalizeFully(String, char[])}.
145     *
146     * <p>The delimiters represent a set of characters understood to separate words.
147     * The first string character and the first non-delimiter character after a
148     * delimiter will be capitalized.</p>
149     *
150     * <p>A {@code null} input String returns {@code null}.
151     * Capitalization uses the Unicode title case, normally equivalent to
152     * upper case.</p>
153     *
154     * <pre>
155     * WordUtils.capitalize(null, *)            = null
156     * WordUtils.capitalize("", *)              = ""
157     * WordUtils.capitalize(*, new char[0])     = *
158     * WordUtils.capitalize("i am fine", null)  = "I Am Fine"
159     * WordUtils.capitalize("i aM.fine", {'.'}) = "I aM.Fine"
160     * WordUtils.capitalize("i am fine", new char[]{}) = "I am fine"
161     * </pre>
162     *
163     * @param str  the String to capitalize, may be null
164     * @param delimiters  set of characters to determine capitalization, null means whitespace
165     * @return capitalized String, {@code null} if null String input
166     * @see #uncapitalize(String)
167     * @see #capitalizeFully(String)
168     */
169    public static String capitalize(final String str, final char... delimiters) {
170        if (StringUtils.isEmpty(str)) {
171            return str;
172        }
173        final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
174        final int strLen = str.length();
175        final int[] newCodePoints = new int[strLen];
176        int outOffset = 0;
177
178        boolean capitalizeNext = true;
179        for (int index = 0; index < strLen;) {
180            final int codePoint = str.codePointAt(index);
181
182            if (delimiterSet.contains(codePoint)) {
183                capitalizeNext = true;
184                newCodePoints[outOffset++] = codePoint;
185                index += Character.charCount(codePoint);
186            } else if (capitalizeNext) {
187                final int titleCaseCodePoint = Character.toTitleCase(codePoint);
188                newCodePoints[outOffset++] = titleCaseCodePoint;
189                index += Character.charCount(titleCaseCodePoint);
190                capitalizeNext = false;
191            } else {
192                newCodePoints[outOffset++] = codePoint;
193                index += Character.charCount(codePoint);
194            }
195        }
196        return new String(newCodePoints, 0, outOffset);
197    }
198
199    /**
200     * Converts all the whitespace separated words in a String into capitalized words,
201     * that is each word is made up of a titlecase character and then a series of
202     * lowercase characters.
203     *
204     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
205     * A {@code null} input String returns {@code null}.
206     * Capitalization uses the Unicode title case, normally equivalent to
207     * upper case.</p>
208     *
209     * <pre>
210     * WordUtils.capitalizeFully(null)        = null
211     * WordUtils.capitalizeFully("")          = ""
212     * WordUtils.capitalizeFully("i am FINE") = "I Am Fine"
213     * </pre>
214     *
215     * @param str  the String to capitalize, may be null
216     * @return capitalized String, {@code null} if null String input
217     */
218    public static String capitalizeFully(final String str) {
219        return capitalizeFully(str, null);
220    }
221
222    /**
223     * Converts all the delimiter separated words in a String into capitalized words,
224     * that is each word is made up of a titlecase character and then a series of
225     * lowercase characters.
226     *
227     * <p>The delimiters represent a set of characters understood to separate words.
228     * The first string character and the first non-delimiter character after a
229     * delimiter will be capitalized.</p>
230     *
231     * <p>A {@code null} input String returns {@code null}.
232     * Capitalization uses the Unicode title case, normally equivalent to
233     * upper case.</p>
234     *
235     * <pre>
236     * WordUtils.capitalizeFully(null, *)            = null
237     * WordUtils.capitalizeFully("", *)              = ""
238     * WordUtils.capitalizeFully(*, null)            = *
239     * WordUtils.capitalizeFully(*, new char[0])     = *
240     * WordUtils.capitalizeFully("i aM.fine", {'.'}) = "I am.Fine"
241     * </pre>
242     *
243     * @param str  the String to capitalize, may be null
244     * @param delimiters  set of characters to determine capitalization, null means whitespace
245     * @return capitalized String, {@code null} if null String input
246     */
247    public static String capitalizeFully(String str, final char... delimiters) {
248        if (StringUtils.isEmpty(str)) {
249            return str;
250        }
251        str = str.toLowerCase();
252        return capitalize(str, delimiters);
253    }
254
255    /**
256     * Checks if the String contains all words in the given array.
257     *
258     * <p>
259     * A {@code null} String will return {@code false}. A {@code null}, zero
260     * length search array or if one element of array is null will return {@code false}.
261     * </p>
262     *
263     * <pre>
264     * WordUtils.containsAllWords(null, *)            = false
265     * WordUtils.containsAllWords("", *)              = false
266     * WordUtils.containsAllWords(*, null)            = false
267     * WordUtils.containsAllWords(*, [])              = false
268     * WordUtils.containsAllWords("abcd", "ab", "cd") = false
269     * WordUtils.containsAllWords("abc def", "def", "abc") = true
270     * </pre>
271     *
272     * @param word The CharSequence to check, may be null
273     * @param words The array of String words to search for, may be null
274     * @return {@code true} if all search words are found, {@code false} otherwise
275     */
276    public static boolean containsAllWords(final CharSequence word, final CharSequence... words) {
277        if (StringUtils.isEmpty(word) || ArrayUtils.isEmpty(words)) {
278            return false;
279        }
280        for (final CharSequence w : words) {
281            if (StringUtils.isBlank(w)) {
282                return false;
283            }
284            final Pattern p = Pattern.compile(".*\\b" + w + "\\b.*");
285            if (!p.matcher(word).matches()) {
286                return false;
287            }
288        }
289        return true;
290    }
291
292    /**
293     * Converts an array of delimiters to a hash set of code points. Code point of space(32) is added as the default
294     * value if delimiters is null. The generated hash set provides O(1) lookup time.
295     *
296     * @param delimiters set of characters to determine capitalization, null means whitespace
297     * @return Set<Integer>
298     */
299    private static Set<Integer> generateDelimiterSet(final char[] delimiters) {
300        final Set<Integer> delimiterHashSet = new HashSet<>();
301        if (delimiters == null || delimiters.length == 0) {
302            if (delimiters == null) {
303                delimiterHashSet.add(Character.codePointAt(new char[] {' '}, 0));
304            }
305
306            return delimiterHashSet;
307        }
308
309        for (int index = 0; index < delimiters.length; index++) {
310            delimiterHashSet.add(Character.codePointAt(delimiters, index));
311        }
312        return delimiterHashSet;
313    }
314
315    /**
316     * Extracts the initial characters from each word in the String.
317     *
318     * <p>All first characters after whitespace are returned as a new string.
319     * Their case is not changed.</p>
320     *
321     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
322     * A {@code null} input String returns {@code null}.</p>
323     *
324     * <pre>
325     * WordUtils.initials(null)             = null
326     * WordUtils.initials("")               = ""
327     * WordUtils.initials("Ben John Lee")   = "BJL"
328     * WordUtils.initials("Ben J.Lee")      = "BJ"
329     * </pre>
330     *
331     * @param str  the String to get initials from, may be null
332     * @return String of initial letters, {@code null} if null String input
333     * @see #initials(String,char[])
334     */
335    public static String initials(final String str) {
336        return initials(str, null);
337    }
338
339    /**
340     * Extracts the initial characters from each word in the String.
341     *
342     * <p>All first characters after the defined delimiters are returned as a new string.
343     * Their case is not changed.</p>
344     *
345     * <p>If the delimiters array is null, then Whitespace is used.
346     * Whitespace is defined by {@link Character#isWhitespace(char)}.
347     * A {@code null} input String returns {@code null}.
348     * An empty delimiter array returns an empty String.</p>
349     *
350     * <pre>
351     * WordUtils.initials(null, *)                = null
352     * WordUtils.initials("", *)                  = ""
353     * WordUtils.initials("Ben John Lee", null)   = "BJL"
354     * WordUtils.initials("Ben J.Lee", null)      = "BJ"
355     * WordUtils.initials("Ben J.Lee", [' ','.']) = "BJL"
356     * WordUtils.initials(*, new char[0])         = ""
357     * </pre>
358     *
359     * @param str  the String to get initials from, may be null
360     * @param delimiters  set of characters to determine words, null means whitespace
361     * @return String of initial characters, {@code null} if null String input
362     * @see #initials(String)
363     */
364    public static String initials(final String str, final char... delimiters) {
365        if (StringUtils.isEmpty(str)) {
366            return str;
367        }
368        if (delimiters != null && delimiters.length == 0) {
369            return StringUtils.EMPTY;
370        }
371        final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
372        final int strLen = str.length();
373        final int[] newCodePoints = new int[strLen / 2 + 1];
374        int count = 0;
375        boolean lastWasGap = true;
376        for (int i = 0; i < strLen;) {
377            final int codePoint = str.codePointAt(i);
378
379            if (delimiterSet.contains(codePoint) || delimiters == null && Character.isWhitespace(codePoint)) {
380                lastWasGap = true;
381            } else if (lastWasGap) {
382                newCodePoints[count++] = codePoint;
383                lastWasGap = false;
384            }
385
386            i += Character.charCount(codePoint);
387        }
388        return new String(newCodePoints, 0, count);
389    }
390
391    /**
392     * Is the character a delimiter.
393     *
394     * @param ch the character to check
395     * @param delimiters the delimiters
396     * @return true if it is a delimiter
397     * @deprecated as of 1.2 and will be removed in 2.0
398     */
399    @Deprecated
400    public static boolean isDelimiter(final char ch, final char[] delimiters) {
401        if (delimiters == null) {
402            return Character.isWhitespace(ch);
403        }
404        for (final char delimiter : delimiters) {
405            if (ch == delimiter) {
406                return true;
407            }
408        }
409        return false;
410    }
411
412    /**
413     * Is the codePoint a delimiter.
414     *
415     * @param codePoint the codePint to check
416     * @param delimiters the delimiters
417     * @return true if it is a delimiter
418     * @deprecated as of 1.2 and will be removed in 2.0
419     */
420    @Deprecated
421    public static boolean isDelimiter(final int codePoint, final char[] delimiters) {
422        if (delimiters == null) {
423            return Character.isWhitespace(codePoint);
424        }
425        for (int index = 0; index < delimiters.length; index++) {
426            final int delimiterCodePoint = Character.codePointAt(delimiters, index);
427            if (delimiterCodePoint == codePoint) {
428                return true;
429            }
430        }
431        return false;
432    }
433
434    /**
435     * Swaps the case of a String using a word based algorithm.
436     *
437     * <ul>
438     *  <li>Upper case character converts to Lower case</li>
439     *  <li>Title case character converts to Lower case</li>
440     *  <li>Lower case character after Whitespace or at start converts to Title case</li>
441     *  <li>Other Lower case character converts to Upper case</li>
442     * </ul>
443     *
444     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
445     * A {@code null} input String returns {@code null}.</p>
446     *
447     * <pre>
448     * StringUtils.swapCase(null)                 = null
449     * StringUtils.swapCase("")                   = ""
450     * StringUtils.swapCase("The dog has a BONE") = "tHE DOG HAS A bone"
451     * </pre>
452     *
453     * @param str  the String to swap case, may be null
454     * @return The changed String, {@code null} if null String input
455     */
456    public static String swapCase(final String str) {
457        if (StringUtils.isEmpty(str)) {
458            return str;
459        }
460        final int strLen = str.length();
461        final int[] newCodePoints = new int[strLen];
462        int outOffset = 0;
463        boolean whitespace = true;
464        for (int index = 0; index < strLen;) {
465            final int oldCodepoint = str.codePointAt(index);
466            final int newCodePoint;
467            if (Character.isUpperCase(oldCodepoint) || Character.isTitleCase(oldCodepoint)) {
468                newCodePoint = Character.toLowerCase(oldCodepoint);
469                whitespace = false;
470            } else if (Character.isLowerCase(oldCodepoint)) {
471                if (whitespace) {
472                    newCodePoint = Character.toTitleCase(oldCodepoint);
473                    whitespace = false;
474                } else {
475                    newCodePoint = Character.toUpperCase(oldCodepoint);
476                }
477            } else {
478                whitespace = Character.isWhitespace(oldCodepoint);
479                newCodePoint = oldCodepoint;
480            }
481            newCodePoints[outOffset++] = newCodePoint;
482            index += Character.charCount(newCodePoint);
483        }
484        return new String(newCodePoints, 0, outOffset);
485    }
486
487    /**
488     * Uncapitalizes all the whitespace separated words in a String.
489     * Only the first character of each word is changed.
490     *
491     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
492     * A {@code null} input String returns {@code null}.</p>
493     *
494     * <pre>
495     * WordUtils.uncapitalize(null)        = null
496     * WordUtils.uncapitalize("")          = ""
497     * WordUtils.uncapitalize("I Am FINE") = "i am fINE"
498     * </pre>
499     *
500     * @param str  the String to uncapitalize, may be null
501     * @return uncapitalized String, {@code null} if null String input
502     * @see #capitalize(String)
503     */
504    public static String uncapitalize(final String str) {
505        return uncapitalize(str, null);
506    }
507
508    /**
509     * Uncapitalizes all the whitespace separated words in a String.
510     * Only the first character of each word is changed.
511     *
512     * <p>The delimiters represent a set of characters understood to separate words.
513     * The first string character and the first non-delimiter character after a
514     * delimiter will be uncapitalized.</p>
515     *
516     * <p>Whitespace is defined by {@link Character#isWhitespace(char)}.
517     * A {@code null} input String returns {@code null}.</p>
518     *
519     * <pre>
520     * WordUtils.uncapitalize(null, *)            = null
521     * WordUtils.uncapitalize("", *)              = ""
522     * WordUtils.uncapitalize(*, null)            = *
523     * WordUtils.uncapitalize(*, new char[0])     = *
524     * WordUtils.uncapitalize("I AM.FINE", {'.'}) = "i AM.fINE"
525     * WordUtils.uncapitalize("I am fine", new char[]{}) = "i am fine"
526     * </pre>
527     *
528     * @param str  the String to uncapitalize, may be null
529     * @param delimiters  set of characters to determine uncapitalization, null means whitespace
530     * @return uncapitalized String, {@code null} if null String input
531     * @see #capitalize(String)
532     */
533    public static String uncapitalize(final String str, final char... delimiters) {
534        if (StringUtils.isEmpty(str)) {
535            return str;
536        }
537        final Set<Integer> delimiterSet = generateDelimiterSet(delimiters);
538        final int strLen = str.length();
539        final int[] newCodePoints = new int[strLen];
540        int outOffset = 0;
541
542        boolean uncapitalizeNext = true;
543        for (int index = 0; index < strLen;) {
544            final int codePoint = str.codePointAt(index);
545
546            if (delimiterSet.contains(codePoint)) {
547                uncapitalizeNext = true;
548                newCodePoints[outOffset++] = codePoint;
549                index += Character.charCount(codePoint);
550            } else if (uncapitalizeNext) {
551                final int titleCaseCodePoint = Character.toLowerCase(codePoint);
552                newCodePoints[outOffset++] = titleCaseCodePoint;
553                index += Character.charCount(titleCaseCodePoint);
554                uncapitalizeNext = false;
555            } else {
556                newCodePoints[outOffset++] = codePoint;
557                index += Character.charCount(codePoint);
558            }
559        }
560        return new String(newCodePoints, 0, outOffset);
561    }
562
563    /**
564     * Wraps a single line of text, identifying words by {@code ' '}.
565     *
566     * <p>New lines will be separated by the system property line separator.
567     * Very long words, such as URLs will <i>not</i> be wrapped.</p>
568     *
569     * <p>Leading spaces on a new line are stripped.
570     * Trailing spaces are not stripped.</p>
571     *
572     * <table border="1">
573     *  <caption>Examples</caption>
574     *  <tr>
575     *   <th>input</th>
576     *   <th>wrapLength</th>
577     *   <th>result</th>
578     *  </tr>
579     *  <tr>
580     *   <td>null</td>
581     *   <td>*</td>
582     *   <td>null</td>
583     *  </tr>
584     *  <tr>
585     *   <td>""</td>
586     *   <td>*</td>
587     *   <td>""</td>
588     *  </tr>
589     *  <tr>
590     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
591     *   <td>20</td>
592     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
593     *  </tr>
594     *  <tr>
595     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
596     *   <td>20</td>
597     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
598     *  </tr>
599     *  <tr>
600     *   <td>"Click here, https://commons.apache.org, to jump to the commons website"</td>
601     *   <td>20</td>
602     *   <td>"Click here,\nhttps://commons.apache.org,\nto jump to the\ncommons website"</td>
603     *  </tr>
604     * </table>
605     *
606     * (assuming that '\n' is the systems line separator)
607     *
608     * @param str  the String to be word wrapped, may be null
609     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
610     * @return a line with newlines inserted, {@code null} if null input
611     */
612    public static String wrap(final String str, final int wrapLength) {
613        return wrap(str, wrapLength, null, false);
614    }
615
616    /**
617     * Wraps a single line of text, identifying words by {@code ' '}.
618     *
619     * <p>Leading spaces on a new line are stripped.
620     * Trailing spaces are not stripped.</p>
621     *
622     * <table border="1">
623     *  <caption>Examples</caption>
624     *  <tr>
625     *   <th>input</th>
626     *   <th>wrapLength</th>
627     *   <th>newLineString</th>
628     *   <th>wrapLongWords</th>
629     *   <th>result</th>
630     *  </tr>
631     *  <tr>
632     *   <td>null</td>
633     *   <td>*</td>
634     *   <td>*</td>
635     *   <td>true/false</td>
636     *   <td>null</td>
637     *  </tr>
638     *  <tr>
639     *   <td>""</td>
640     *   <td>*</td>
641     *   <td>*</td>
642     *   <td>true/false</td>
643     *   <td>""</td>
644     *  </tr>
645     *  <tr>
646     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
647     *   <td>20</td>
648     *   <td>"\n"</td>
649     *   <td>true/false</td>
650     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
651     *  </tr>
652     *  <tr>
653     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
654     *   <td>20</td>
655     *   <td>"&lt;br /&gt;"</td>
656     *   <td>true/false</td>
657     *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;
658     *   br /&gt;to be wrapped after&lt;br /&gt;20 columns."</td>
659     *  </tr>
660     *  <tr>
661     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
662     *   <td>20</td>
663     *   <td>null</td>
664     *   <td>true/false</td>
665     *   <td>"Here is one line of" + systemNewLine + "text that is going"
666     *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
667     *  </tr>
668     *  <tr>
669     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
670     *   <td>20</td>
671     *   <td>"\n"</td>
672     *   <td>false</td>
673     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
674     *  </tr>
675     *  <tr>
676     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
677     *   <td>20</td>
678     *   <td>"\n"</td>
679     *   <td>true</td>
680     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td>
681     *  </tr>
682     * </table>
683     *
684     * @param str  the String to be word wrapped, may be null
685     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
686     * @param newLineStr  the string to insert for a new line,
687     *  {@code null} uses the system property line separator
688     * @param wrapLongWords  true if long words (such as URLs) should be wrapped
689     * @return a line with newlines inserted, {@code null} if null input
690     */
691    public static String wrap(final String str,
692                              final int wrapLength,
693                              final String newLineStr,
694                              final boolean wrapLongWords) {
695        return wrap(str, wrapLength, newLineStr, wrapLongWords, " ");
696    }
697
698    /**
699     * Wraps a single line of text, identifying words by {@code wrapOn}.
700     *
701     * <p>Leading spaces on a new line are stripped.
702     * Trailing spaces are not stripped.</p>
703     *
704     * <table border="1">
705     *  <caption>Examples</caption>
706     *  <tr>
707     *   <th>input</th>
708     *   <th>wrapLength</th>
709     *   <th>newLineString</th>
710     *   <th>wrapLongWords</th>
711     *   <th>wrapOn</th>
712     *   <th>result</th>
713     *  </tr>
714     *  <tr>
715     *   <td>null</td>
716     *   <td>*</td>
717     *   <td>*</td>
718     *   <td>true/false</td>
719     *   <td>*</td>
720     *   <td>null</td>
721     *  </tr>
722     *  <tr>
723     *   <td>""</td>
724     *   <td>*</td>
725     *   <td>*</td>
726     *   <td>true/false</td>
727     *   <td>*</td>
728     *   <td>""</td>
729     *  </tr>
730     *  <tr>
731     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
732     *   <td>20</td>
733     *   <td>"\n"</td>
734     *   <td>true/false</td>
735     *   <td>" "</td>
736     *   <td>"Here is one line of\ntext that is going\nto be wrapped after\n20 columns."</td>
737     *  </tr>
738     *  <tr>
739     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
740     *   <td>20</td>
741     *   <td>"&lt;br /&gt;"</td>
742     *   <td>true/false</td>
743     *   <td>" "</td>
744     *   <td>"Here is one line of&lt;br /&gt;text that is going&lt;br /&gt;
745     *   to be wrapped after&lt;br /&gt;20 columns."</td>
746     *  </tr>
747     *  <tr>
748     *   <td>"Here is one line of text that is going to be wrapped after 20 columns."</td>
749     *   <td>20</td>
750     *   <td>null</td>
751     *   <td>true/false</td>
752     *   <td>" "</td>
753     *   <td>"Here is one line of" + systemNewLine + "text that is going"
754     *   + systemNewLine + "to be wrapped after" + systemNewLine + "20 columns."</td>
755     *  </tr>
756     *  <tr>
757     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
758     *   <td>20</td>
759     *   <td>"\n"</td>
760     *   <td>false</td>
761     *   <td>" "</td>
762     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttps://commons.apache.org"</td>
763     *  </tr>
764     *  <tr>
765     *   <td>"Click here to jump to the commons website - https://commons.apache.org"</td>
766     *   <td>20</td>
767     *   <td>"\n"</td>
768     *   <td>true</td>
769     *   <td>" "</td>
770     *   <td>"Click here to jump\nto the commons\nwebsite -\nhttp://commons.apach\ne.org"</td>
771     *  </tr>
772     *  <tr>
773     *   <td>"flammable/inflammable"</td>
774     *   <td>20</td>
775     *   <td>"\n"</td>
776     *   <td>true</td>
777     *   <td>"/"</td>
778     *   <td>"flammable\ninflammable"</td>
779     *  </tr>
780     * </table>
781     * @param str  the String to be word wrapped, may be null
782     * @param wrapLength  the column to wrap the words at, less than 1 is treated as 1
783     * @param newLineStr  the string to insert for a new line,
784     *  {@code null} uses the system property line separator
785     * @param wrapLongWords  true if long words (such as URLs) should be wrapped
786     * @param wrapOn regex expression to be used as a breakable characters,
787     *               if blank string is provided a space character will be used
788     * @return a line with newlines inserted, {@code null} if null input
789     */
790    public static String wrap(final String str,
791                              int wrapLength,
792                              String newLineStr,
793                              final boolean wrapLongWords,
794                              String wrapOn) {
795        if (str == null) {
796            return null;
797        }
798        if (newLineStr == null) {
799            newLineStr = System.lineSeparator();
800        }
801        if (wrapLength < 1) {
802            wrapLength = 1;
803        }
804        if (StringUtils.isBlank(wrapOn)) {
805            wrapOn = " ";
806        }
807        final Pattern patternToWrapOn = Pattern.compile(wrapOn);
808        final int inputLineLength = str.length();
809        int offset = 0;
810        final StringBuilder wrappedLine = new StringBuilder(inputLineLength + 32);
811        int matcherSize = -1;
812
813        while (offset < inputLineLength) {
814            int spaceToWrapAt = -1;
815            Matcher matcher = patternToWrapOn.matcher(str.substring(offset,
816                    Math.min((int) Math.min(Integer.MAX_VALUE, offset + wrapLength + 1L), inputLineLength)));
817            if (matcher.find()) {
818                if (matcher.start() == 0) {
819                    matcherSize = matcher.end();
820                    if (matcherSize != 0) {
821                        offset += matcher.end();
822                        continue;
823                    }
824                    offset += 1;
825                }
826                spaceToWrapAt = matcher.start() + offset;
827            }
828
829            // only last line without leading spaces is left
830            if (inputLineLength - offset <= wrapLength) {
831                break;
832            }
833
834            while (matcher.find()) {
835                spaceToWrapAt = matcher.start() + offset;
836            }
837
838            if (spaceToWrapAt >= offset) {
839                // normal case
840                wrappedLine.append(str, offset, spaceToWrapAt);
841                wrappedLine.append(newLineStr);
842                offset = spaceToWrapAt + 1;
843
844            } else // really long word or URL
845            if (wrapLongWords) {
846                if (matcherSize == 0) {
847                    offset--;
848                }
849                // wrap really long word one line at a time
850                wrappedLine.append(str, offset, wrapLength + offset);
851                wrappedLine.append(newLineStr);
852                offset += wrapLength;
853                matcherSize = -1;
854            } else {
855                // do not wrap really long word, just extend beyond limit
856                matcher = patternToWrapOn.matcher(str.substring(offset + wrapLength));
857                if (matcher.find()) {
858                    matcherSize = matcher.end() - matcher.start();
859                    spaceToWrapAt = matcher.start() + offset + wrapLength;
860                }
861
862                if (spaceToWrapAt >= 0) {
863                    if (matcherSize == 0 && offset != 0) {
864                        offset--;
865                    }
866                    wrappedLine.append(str, offset, spaceToWrapAt);
867                    wrappedLine.append(newLineStr);
868                    offset = spaceToWrapAt + 1;
869                } else {
870                    if (matcherSize == 0 && offset != 0) {
871                        offset--;
872                    }
873                    wrappedLine.append(str, offset, str.length());
874                    offset = inputLineLength;
875                    matcherSize = -1;
876                }
877            }
878        }
879
880        if (matcherSize == 0 && offset < inputLineLength) {
881            offset--;
882        }
883
884        // Whatever is left in line is short enough to just pass through
885        wrappedLine.append(str, offset, str.length());
886
887        return wrappedLine.toString();
888    }
889
890    /**
891     * {@code WordUtils} instances should NOT be constructed in
892     * standard programming. Instead, the class should be used as
893     * {@code WordUtils.wrap("foo bar", 20);}.
894     *
895     * <p>This constructor is public to permit tools that require a JavaBean
896     * instance to operate.</p>
897     */
898    public WordUtils() {
899    }
900 }