Skip to content

Commit

Permalink
Convert non-UTF-8 JSON
Browse files Browse the repository at this point in the history
Jackson's asynchronous parser does not support any encoding except UTF-8
(or ASCII). This commit converts non-UTF-8/ASCII encoded JSON to UTF-8.

Closes gh-24489
  • Loading branch information
poutsma committed Feb 20, 2020
1 parent 4e55262 commit 439ffe2
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,29 @@ public Flux<Object> decode(Publisher<DataBuffer> input, ResolvableType elementTy
forceUseOfBigDecimal = true;
}

Flux<TokenBuffer> tokens = Jackson2Tokenizer.tokenize(Flux.from(input), this.jsonFactory, getObjectMapper(),
Flux<DataBuffer> processed = processInput(input, elementType, mimeType, hints);
Flux<TokenBuffer> tokens = Jackson2Tokenizer.tokenize(processed, this.jsonFactory, getObjectMapper(),
true, forceUseOfBigDecimal, getMaxInMemorySize());
return decodeInternal(tokens, elementType, mimeType, hints);
}

/**
* Process the input publisher into a flux. Default implementation returns
* {@link Flux#from(Publisher)}, but subclasses can choose to to customize
* this behaviour.
* @param input the {@code DataBuffer} input stream to process
* @param elementType the expected type of elements in the output stream
* @param mimeType the MIME type associated with the input stream (optional)
* @param hints additional information about how to do encode
* @return the processed flux
* @since 5.1.14
*/
protected Flux<DataBuffer> processInput(Publisher<DataBuffer> input, ResolvableType elementType,
@Nullable MimeType mimeType, @Nullable Map<String, Object> hints) {

return Flux.from(input);
}

@Override
public Mono<Object> decodeToMono(Publisher<DataBuffer> input, ResolvableType elementType,
@Nullable MimeType mimeType, @Nullable Map<String, Object> hints) {
Expand All @@ -134,7 +152,8 @@ public Mono<Object> decodeToMono(Publisher<DataBuffer> input, ResolvableType ele
forceUseOfBigDecimal = true;
}

Flux<TokenBuffer> tokens = Jackson2Tokenizer.tokenize(Flux.from(input), this.jsonFactory, getObjectMapper(),
Flux<DataBuffer> processed = processInput(input, elementType, mimeType, hints);
Flux<TokenBuffer> tokens = Jackson2Tokenizer.tokenize(processed, this.jsonFactory, getObjectMapper(),
false, forceUseOfBigDecimal, getMaxInMemorySize());
return decodeInternal(tokens, elementType, mimeType, hints).singleOrEmpty();
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright 2002-2018 the original author or authors.
* Copyright 2002-2020 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -16,10 +16,24 @@

package org.springframework.http.codec.json;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.Map;

import com.fasterxml.jackson.databind.ObjectMapper;
import org.reactivestreams.Publisher;
import reactor.core.publisher.Flux;

import org.springframework.core.ResolvableType;
import org.springframework.core.codec.StringDecoder;
import org.springframework.core.io.buffer.DataBuffer;
import org.springframework.core.io.buffer.DataBufferFactory;
import org.springframework.core.io.buffer.DefaultDataBufferFactory;
import org.springframework.http.converter.json.Jackson2ObjectMapperBuilder;
import org.springframework.lang.Nullable;
import org.springframework.util.MimeType;
import org.springframework.util.MimeTypeUtils;

/**
* Decode a byte stream into JSON and convert to Object's with Jackson 2.9,
Expand All @@ -32,6 +46,11 @@
*/
public class Jackson2JsonDecoder extends AbstractJackson2Decoder {

private static final StringDecoder STRING_DECODER = StringDecoder.textPlainOnly(Arrays.asList(",", "\n"), false);

private static final ResolvableType STRING_TYPE = ResolvableType.forClass(String.class);


public Jackson2JsonDecoder() {
super(Jackson2ObjectMapperBuilder.json().build());
}
Expand All @@ -40,4 +59,28 @@ public Jackson2JsonDecoder(ObjectMapper mapper, MimeType... mimeTypes) {
super(mapper, mimeTypes);
}

@Override
protected Flux<DataBuffer> processInput(Publisher<DataBuffer> input, ResolvableType elementType,
@Nullable MimeType mimeType, @Nullable Map<String, Object> hints) {

Flux<DataBuffer> flux = Flux.from(input);
if (mimeType == null) {
return flux;
}

// Jackson asynchronous parser only supports UTF-8
Charset charset = mimeType.getCharset();
if (charset == null || StandardCharsets.UTF_8.equals(charset) || StandardCharsets.US_ASCII.equals(charset)) {
return flux;
}

// Potentially, the memory consumption of this conversion could be improved by using CharBuffers instead
// of allocating Strings, but that would require refactoring the buffer tokenization code from StringDecoder

MimeType textMimeType = new MimeType(MimeTypeUtils.TEXT_PLAIN, charset);
Flux<String> decoded = STRING_DECODER.decode(input, STRING_TYPE, textMimeType, null);
DataBufferFactory factory = new DefaultDataBufferFactory();
return decoded.map(s -> factory.wrap(s.getBytes(StandardCharsets.UTF_8)));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.io.IOException;
import java.math.BigDecimal;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.Collections;
import java.util.List;
Expand All @@ -34,6 +35,7 @@
import reactor.core.publisher.Mono;
import reactor.test.StepVerifier;

import org.springframework.core.ParameterizedTypeReference;
import org.springframework.core.ResolvableType;
import org.springframework.core.codec.AbstractDecoderTestCase;
import org.springframework.core.codec.CodecException;
Expand Down Expand Up @@ -218,9 +220,42 @@ public void bigDecimalFlux() {
);
}

@Test
public void decodeNonUtf8Encoding() {
Mono<DataBuffer> input = stringBuffer("{\"foo\":\"bar\"}", StandardCharsets.UTF_16);

testDecode(input, ResolvableType.forType(new ParameterizedTypeReference<Map<String, String>>() {}),
step -> step.assertNext(o -> {
Map<String, String> map = (Map<String, String>) o;
assertEquals("bar", map.get("foo"));
})
.verifyComplete(),
MediaType.parseMediaType("application/json; charset=utf-16"),
null);
}

@Test
public void decodeMonoNonUtf8Encoding() {
Mono<DataBuffer> input = stringBuffer("{\"foo\":\"bar\"}", StandardCharsets.UTF_16);

testDecodeToMono(input, ResolvableType.forType(new ParameterizedTypeReference<Map<String, String>>() {
}),
step -> step.assertNext(o -> {
Map<String, String> map = (Map<String, String>) o;
assertEquals("bar", map.get("foo"));
})
.verifyComplete(),
MediaType.parseMediaType("application/json; charset=utf-16"),
null);
}

private Mono<DataBuffer> stringBuffer(String value) {
return stringBuffer(value, StandardCharsets.UTF_8);
}

private Mono<DataBuffer> stringBuffer(String value, Charset charset) {
return Mono.defer(() -> {
byte[] bytes = value.getBytes(StandardCharsets.UTF_8);
byte[] bytes = value.getBytes(charset);
DataBuffer buffer = this.bufferFactory.allocateBuffer(bytes.length);
buffer.write(bytes);
return Mono.just(buffer);
Expand Down

0 comments on commit 439ffe2

Please sign in to comment.