diff options
author | sherman <none@none> | 2013-11-13 11:26:01 -0800 |
---|---|---|
committer | sherman <none@none> | 2013-11-13 11:26:01 -0800 |
commit | 567f74c5ff29fccccc8c18ca2f9e54b59b645a8d (patch) | |
tree | 72ec124c79949ccd57ca75f93e81824dbe368b4a /src | |
parent | 0e7cac59c1b147c47ba1f2041f38f66cc6484032 (diff) |
8027645: Pattern.split() with positive lookahead
6559590: Pattern.compile(".*").split("") returns incorrect result
Summary: updated spec/impl for these two corner cases
Reviewed-by: alanb, psandoz
Diffstat (limited to 'src')
-rw-r--r-- | src/share/classes/java/lang/String.java | 10 | ||||
-rw-r--r-- | src/share/classes/java/util/regex/Pattern.java | 28 |
2 files changed, 33 insertions, 5 deletions
diff --git a/src/share/classes/java/lang/String.java b/src/share/classes/java/lang/String.java index a7bed2280..a561c5d59 100644 --- a/src/share/classes/java/lang/String.java +++ b/src/share/classes/java/lang/String.java @@ -2235,7 +2235,13 @@ public final class String * expression or is terminated by the end of the string. The substrings in * the array are in the order in which they occur in this string. If the * expression does not match any part of the input then the resulting array - * has just one element, namely this string. + * has just one element, namely this string. A zero-length input sequence + * always results zero-length resulting array. + * + * <p> When there is a positive-width match at the beginning of this + * string then an empty leading substring is included at the beginning + * of the resulting array. A zero-width match at the beginning however + * never produces such empty leading substring. * * <p> The {@code limit} parameter controls the number of times the * pattern is applied and therefore affects the length of the resulting @@ -2325,6 +2331,8 @@ public final class String (ch < Character.MIN_HIGH_SURROGATE || ch > Character.MAX_LOW_SURROGATE)) { + if (value.length == 0) + return new String[0]; int off = 0; int next = 0; boolean limited = limit > 0; diff --git a/src/share/classes/java/util/regex/Pattern.java b/src/share/classes/java/util/regex/Pattern.java index e9a0c67f7..f1bb37c63 100644 --- a/src/share/classes/java/util/regex/Pattern.java +++ b/src/share/classes/java/util/regex/Pattern.java @@ -1142,9 +1142,15 @@ public final class Pattern * input sequence that is terminated by another subsequence that matches * this pattern or is terminated by the end of the input sequence. The * substrings in the array are in the order in which they occur in the - * input. If this pattern does not match any subsequence of the input then + * input. If this pattern does not match any subsequence of the input then * the resulting array has just one element, namely the input sequence in - * string form. + * string form. A zero-length input sequence always results zero-length + * resulting array. + * + * <p> When there is a positive-width match at the beginning of the input + * sequence then an empty leading substring is included at the beginning + * of the resulting array. A zero-width match at the beginning however + * never produces such empty leading substring. * * <p> The <tt>limit</tt> parameter controls the number of times the * pattern is applied and therefore affects the length of the resulting @@ -1185,7 +1191,6 @@ public final class Pattern * <td><tt>{ "b", "", ":and:f" }</tt></td></tr> * </table></blockquote> * - * * @param input * The character sequence to be split * @@ -1196,6 +1201,8 @@ public final class Pattern * around matches of this pattern */ public String[] split(CharSequence input, int limit) { + if (input.length() == 0) + return new String[0]; int index = 0; boolean matchLimited = limit > 0; ArrayList<String> matchList = new ArrayList<>(); @@ -1204,6 +1211,11 @@ public final class Pattern // Add segments before each match found while(m.find()) { if (!matchLimited || matchList.size() < limit - 1) { + if (index == 0 && index == m.start() && m.start() == m.end()) { + // no empty leading substring included for zero-width match + // at the beginning of the input char sequence. + continue; + } String match = input.subSequence(index, m.start()).toString(); matchList.add(match); index = m.end(); @@ -5762,6 +5774,13 @@ NEXT: while (i <= last) { * the resulting stream has just one element, namely the input sequence in * string form. * + * <p> A zero-length input sequence always results an empty stream. + * + * <p> When there is a positive-width match at the beginning of the input + * sequence then an empty leading substring is included at the beginning + * of the stream. A zero-width match at the beginning however never produces + * such empty leading substring. + * * <p> If the input sequence is mutable, it must remain constant during the * execution of the terminal stream operation. Otherwise, the result of the * terminal stream operation is undefined. @@ -5817,7 +5836,8 @@ NEXT: while (i <= last) { current = matcher.end(); if (!nextElement.isEmpty()) { return true; - } else { + } else if (current > 0) { // no empty leading substring for zero-width + // match at the beginning of the input emptyElementCount++; } } |