aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorsherman <none@none>2013-11-13 11:26:01 -0800
committersherman <none@none>2013-11-13 11:26:01 -0800
commit567f74c5ff29fccccc8c18ca2f9e54b59b645a8d (patch)
tree72ec124c79949ccd57ca75f93e81824dbe368b4a /src
parent0e7cac59c1b147c47ba1f2041f38f66cc6484032 (diff)
8027645: Pattern.split() with positive lookahead
6559590: Pattern.compile(".*").split("") returns incorrect result Summary: updated spec/impl for these two corner cases Reviewed-by: alanb, psandoz
Diffstat (limited to 'src')
-rw-r--r--src/share/classes/java/lang/String.java10
-rw-r--r--src/share/classes/java/util/regex/Pattern.java28
2 files changed, 33 insertions, 5 deletions
diff --git a/src/share/classes/java/lang/String.java b/src/share/classes/java/lang/String.java
index a7bed2280..a561c5d59 100644
--- a/src/share/classes/java/lang/String.java
+++ b/src/share/classes/java/lang/String.java
@@ -2235,7 +2235,13 @@ public final class String
* expression or is terminated by the end of the string. The substrings in
* the array are in the order in which they occur in this string. If the
* expression does not match any part of the input then the resulting array
- * has just one element, namely this string.
+ * has just one element, namely this string. A zero-length input sequence
+ * always results zero-length resulting array.
+ *
+ * <p> When there is a positive-width match at the beginning of this
+ * string then an empty leading substring is included at the beginning
+ * of the resulting array. A zero-width match at the beginning however
+ * never produces such empty leading substring.
*
* <p> The {@code limit} parameter controls the number of times the
* pattern is applied and therefore affects the length of the resulting
@@ -2325,6 +2331,8 @@ public final class String
(ch < Character.MIN_HIGH_SURROGATE ||
ch > Character.MAX_LOW_SURROGATE))
{
+ if (value.length == 0)
+ return new String[0];
int off = 0;
int next = 0;
boolean limited = limit > 0;
diff --git a/src/share/classes/java/util/regex/Pattern.java b/src/share/classes/java/util/regex/Pattern.java
index e9a0c67f7..f1bb37c63 100644
--- a/src/share/classes/java/util/regex/Pattern.java
+++ b/src/share/classes/java/util/regex/Pattern.java
@@ -1142,9 +1142,15 @@ public final class Pattern
* input sequence that is terminated by another subsequence that matches
* this pattern or is terminated by the end of the input sequence. The
* substrings in the array are in the order in which they occur in the
- * input. If this pattern does not match any subsequence of the input then
+ * input. If this pattern does not match any subsequence of the input then
* the resulting array has just one element, namely the input sequence in
- * string form.
+ * string form. A zero-length input sequence always results zero-length
+ * resulting array.
+ *
+ * <p> When there is a positive-width match at the beginning of the input
+ * sequence then an empty leading substring is included at the beginning
+ * of the resulting array. A zero-width match at the beginning however
+ * never produces such empty leading substring.
*
* <p> The <tt>limit</tt> parameter controls the number of times the
* pattern is applied and therefore affects the length of the resulting
@@ -1185,7 +1191,6 @@ public final class Pattern
* <td><tt>{ "b", "", ":and:f" }</tt></td></tr>
* </table></blockquote>
*
- *
* @param input
* The character sequence to be split
*
@@ -1196,6 +1201,8 @@ public final class Pattern
* around matches of this pattern
*/
public String[] split(CharSequence input, int limit) {
+ if (input.length() == 0)
+ return new String[0];
int index = 0;
boolean matchLimited = limit > 0;
ArrayList<String> matchList = new ArrayList<>();
@@ -1204,6 +1211,11 @@ public final class Pattern
// Add segments before each match found
while(m.find()) {
if (!matchLimited || matchList.size() < limit - 1) {
+ if (index == 0 && index == m.start() && m.start() == m.end()) {
+ // no empty leading substring included for zero-width match
+ // at the beginning of the input char sequence.
+ continue;
+ }
String match = input.subSequence(index, m.start()).toString();
matchList.add(match);
index = m.end();
@@ -5762,6 +5774,13 @@ NEXT: while (i <= last) {
* the resulting stream has just one element, namely the input sequence in
* string form.
*
+ * <p> A zero-length input sequence always results an empty stream.
+ *
+ * <p> When there is a positive-width match at the beginning of the input
+ * sequence then an empty leading substring is included at the beginning
+ * of the stream. A zero-width match at the beginning however never produces
+ * such empty leading substring.
+ *
* <p> If the input sequence is mutable, it must remain constant during the
* execution of the terminal stream operation. Otherwise, the result of the
* terminal stream operation is undefined.
@@ -5817,7 +5836,8 @@ NEXT: while (i <= last) {
current = matcher.end();
if (!nextElement.isEmpty()) {
return true;
- } else {
+ } else if (current > 0) { // no empty leading substring for zero-width
+ // match at the beginning of the input
emptyElementCount++;
}
}