Skip to content

Commit

Permalink
fix: preserve unquoted unicode whitespace in array literals (#1266)
Browse files Browse the repository at this point in the history
* fix: don't ignore unquoted non-ascii whitespace

The postgresql backend only quotes ascii whitespace.
#1257

add comment to existing isSpace method
change from switch to chained ORs

* test: fix comment in ArrayTest.testDirectFieldString

* fix: revert spaces in array literal

* add changelog entry

add test showing inner white spaces not dropped on unquoted strings by
backend.

Co-authored-by: Vladimir Sitnikov <sitnikov.vladimir@gmail.com>
Co-authored-by: BO8979 <BO8979@W1971362.northamerica.cerner.net>
  • Loading branch information
3 people authored and davecramer committed Aug 11, 2020
1 parent 0fe766b commit 8ad48a6
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 6 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ Sehrope Sarkuni reworked the XML parsing to provide a solution in commit [14b62a
- jre-6 was added back to allow us to release fixes for all artifacts in the 42.2.x branch [PR 1787](https://github.com/pgjdbc/pgjdbc/pull/1787)

### Fixed
- fix: preserve unquoted unicode whitespace in array literals [PR 1266](https://github.com/pgjdbc/pgjdbc/pull/1266)
- Fixed async copy performance regression [PR 1314](https://github.com/pgjdbc/pgjdbc/pull/1314)
- I/O error ru translation [PR 1756](https://github.com/pgjdbc/pgjdbc/pull/1756)
- Issue [1771](https://github.com/pgjdbc/pgjdbc/issues/1771) PgDatabaseMetaData.getFunctions() returns
procedures fixed in [PR 1774](https://github.com/pgjdbc/pgjdbc/pull/1774)
Expand Down
23 changes: 23 additions & 0 deletions pgjdbc/src/main/java/org/postgresql/core/Parser.java
Original file line number Diff line number Diff line change
Expand Up @@ -776,13 +776,36 @@ public static int digitAt(String s, int pos) {
}

/**
* Identifies characters which the backend scanner considers to be whitespace.
*
* <p>
* https://github.com/postgres/postgres/blob/17bb62501787c56e0518e61db13a523d47afd724/src/backend/parser/scan.l#L194-L198
* </p>
*
* @param c character
* @return true if the character is a whitespace character as defined in the backend's parser
*/
public static boolean isSpace(char c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f';
}

/**
* Identifies white space characters which the backend uses to determine if a
* {@code String} value needs to be quoted in array representation.
*
* <p>
* https://github.com/postgres/postgres/blob/f2c587067a8eb9cf1c8f009262381a6576ba3dd0/src/backend/utils/adt/arrayfuncs.c#L421-L438
* </p>
*
* @param c
* Character to examine.
* @return Indication if the character is a whitespace which back end will
* escape.
*/
public static boolean isArrayWhiteSpace(char c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f' || c == 0x0B;
}

/**
* @param c character
* @return true if the given character is a valid character for an operator in the backend's
Expand Down
3 changes: 2 additions & 1 deletion pgjdbc/src/main/java/org/postgresql/jdbc/ArrayDecoding.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import org.postgresql.core.BaseConnection;
import org.postgresql.core.Oid;
import org.postgresql.core.Parser;
import org.postgresql.jdbc2.ArrayAssistant;
import org.postgresql.jdbc2.ArrayAssistantRegistry;
import org.postgresql.util.GT;
Expand Down Expand Up @@ -654,7 +655,7 @@ static PgArrayList buildArrayList(String fieldString, char delim) {
insideString = !insideString;
wasInsideString = true;
continue;
} else if (!insideString && Character.isWhitespace(chars[i])) {
} else if (!insideString && Parser.isArrayWhiteSpace(chars[i])) {
// white space
continue;
} else if ((!insideString && (chars[i] == delim || chars[i] == '}')) || i == chars.length - 1) {
Expand Down
84 changes: 79 additions & 5 deletions pgjdbc/src/test/java/org/postgresql/test/jdbc2/ArrayTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import org.postgresql.PGConnection;
import org.postgresql.core.BaseConnection;
import org.postgresql.core.Oid;
import org.postgresql.geometric.PGbox;
import org.postgresql.geometric.PGpoint;
import org.postgresql.jdbc.PgArray;
Expand Down Expand Up @@ -87,10 +88,11 @@ public void testSetNull() throws SQLException {

@Test
public void testSetPrimitiveObjects() throws SQLException {
final String stringWithNonAsciiWhiteSpace = "a\u2001b";
PreparedStatement pstmt = conn.prepareStatement("INSERT INTO arrtest VALUES (?,?,?)");
pstmt.setObject(1, new int[]{1,2,3}, Types.ARRAY);
pstmt.setObject(1, new int[]{1, 2, 3}, Types.ARRAY);
pstmt.setObject(2, new double[]{3.1d, 1.4d}, Types.ARRAY);
pstmt.setObject(3, new String[]{"abc", "f'a", "fa\"b"}, Types.ARRAY);
pstmt.setObject(3, new String[]{stringWithNonAsciiWhiteSpace, "f'a", " \tfa\"b "}, Types.ARRAY);
pstmt.executeUpdate();
pstmt.close();

Expand Down Expand Up @@ -118,7 +120,10 @@ public void testSetPrimitiveObjects() throws SQLException {
String[] strarr = (String[]) arr.getArray(2, 2);
assertEquals(2, strarr.length);
assertEquals("f'a", strarr[0]);
assertEquals("fa\"b", strarr[1]);
assertEquals(" \tfa\"b ", strarr[1]);

strarr = (String[]) arr.getArray();
assertEquals(stringWithNonAsciiWhiteSpace, strarr[0]);

rs.close();
}
Expand Down Expand Up @@ -207,7 +212,7 @@ public void testRetrieveArrays() throws SQLException {

// you need a lot of backslashes to get a double quote in.
stmt.executeUpdate("INSERT INTO arrtest VALUES ('{1,2,3}','{3.1,1.4}', '"
+ TestUtil.escapeString(conn, "{abc,f'a,\"fa\\\"b\",def}") + "')");
+ TestUtil.escapeString(conn, "{abc,f'a,\"fa\\\"b\",def, un quot\u000B \u2001 \r}") + "')");

ResultSet rs = stmt.executeQuery("SELECT intarr, decarr, strarr FROM arrtest");
Assert.assertTrue(rs.next());
Expand All @@ -234,6 +239,10 @@ public void testRetrieveArrays() throws SQLException {
Assert.assertEquals("f'a", strarr[0]);
Assert.assertEquals("fa\"b", strarr[1]);

strarr = (String[]) arr.getArray();
assertEquals(5, strarr.length);
assertEquals("un quot\u000B \u2001", strarr[4]);

rs.close();
stmt.close();
}
Expand All @@ -242,9 +251,10 @@ public void testRetrieveArrays() throws SQLException {
public void testRetrieveResultSets() throws SQLException {
Statement stmt = conn.createStatement();

final String stringWithNonAsciiWhiteSpace = "a\u2001b";
// you need a lot of backslashes to get a double quote in.
stmt.executeUpdate("INSERT INTO arrtest VALUES ('{1,2,3}','{3.1,1.4}', '"
+ TestUtil.escapeString(conn, "{abc,f'a,\"fa\\\"b\",def}") + "')");
+ TestUtil.escapeString(conn, "{\"a\u2001b\",f'a,\"fa\\\"b\",def}") + "')");

ResultSet rs = stmt.executeQuery("SELECT intarr, decarr, strarr FROM arrtest");
Assert.assertTrue(rs.next());
Expand Down Expand Up @@ -289,6 +299,13 @@ public void testRetrieveResultSets() throws SQLException {
Assert.assertTrue(!arrrs.next());
arrrs.close();

arrrs = arr.getResultSet(1, 1);
Assert.assertTrue(arrrs.next());
Assert.assertEquals(1, arrrs.getInt(1));
Assert.assertEquals(stringWithNonAsciiWhiteSpace, arrrs.getString(2));
Assert.assertFalse(arrrs.next());
arrrs.close();

rs.close();
stmt.close();
}
Expand Down Expand Up @@ -391,6 +408,63 @@ public void testNullFieldString() throws SQLException {
Assert.assertNull(arr.toString());
}

@Test
public void testDirectFieldString() throws SQLException {
Array arr = new PgArray((BaseConnection) conn, Oid.VARCHAR_ARRAY,
"{\" lead\t\", unquot\u000B \u2001 \r, \" \fnew \n \"\t, \f\" \" }");
final String[] array = (String[]) arr.getArray();
assertEquals(4, array.length);
assertEquals(" lead\t", array[0]);
assertEquals(" \fnew \n ", array[2]);
assertEquals(" ", array[3]);

// PostgreSQL drops leading and trailing whitespace, so does the driver
assertEquals("unquot\u2001", array[1]);
}

@Test
public void testStringEscaping() throws SQLException {

final String stringArray = "{f'a,\"fa\\\"b\",def, un quot\u000B \u2001 \r, someString }";

final Statement stmt = conn.createStatement();
try {

stmt.executeUpdate("INSERT INTO arrtest VALUES (NULL, NULL, '" + TestUtil.escapeString(conn, stringArray) + "')");

final ResultSet rs = stmt.executeQuery("SELECT strarr FROM arrtest");
Assert.assertTrue(rs.next());

Array arr = rs.getArray(1);
Assert.assertEquals(Types.VARCHAR, arr.getBaseType());
String[] strarr = (String[]) arr.getArray();
assertEquals(5, strarr.length);
assertEquals("f'a", strarr[0]);
assertEquals("fa\"b", strarr[1]);
assertEquals("def", strarr[2]);
assertEquals("un quot\u000B \u2001", strarr[3]);
assertEquals("someString", strarr[4]);

rs.close();
} finally {
stmt.close();
}

final Array directArray = new PgArray((BaseConnection) conn, Oid.VARCHAR_ARRAY, stringArray);
final String[] actual = (String[]) directArray.getArray();
assertEquals(5, actual.length);
assertEquals("f'a", actual[0]);
assertEquals("fa\"b", actual[1]);
assertEquals("def", actual[2]);
assertEquals("someString", actual[4]);

// the driver strips out ascii white spaces from an unescaped string, even in
// the middle of the value. while this does not exactly match the behavior of
// the backend, it will always quote values where ascii white spaces are
// present, making this difference not worth the complexity involved addressing.
assertEquals("unquot\u2001", actual[3]);
}

@Test
public void testUnknownArrayType() throws SQLException {
Statement stmt = conn.createStatement();
Expand Down

0 comments on commit 8ad48a6

Please sign in to comment.