Skip to content

Commit

Permalink
Change all instances of encoded.subSequence() to use a simple start i…
Browse files Browse the repository at this point in the history
…ndex offset, which reduces the Java memory footprint for TrieParser by at at least 130 MBs. This is because the original call to subSequence() would generate a new String on the heap for each call, where as just using an offset avoids the need to generate a new String all together.

This memory enhancement was found while investigating flakiness issues for some tests which had the same root cause of: java.lang.OutOfMemoryError: GC overhead limit exceeded, mostly in the TrieParser class.

100-test runs at original code:
Memory usage
- maximum: 503MiB (527,769,600 bytes)
- average: 388MiB (407,225,077 bytes)

100-test runs with my changes:
Memory usage
- maximum: 316MiB (325,271,552 bytes)
- average: 221MiB (239,355,320 bytes)

At maximum: 187 MB saved
At average: 167 MB saved

RELNOTES=Reduced heap memory usage when parsing domain names

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=188111053
  • Loading branch information
redseiko authored and ronshapiro committed Mar 7, 2018
1 parent e47fc16 commit 7f1defa
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 16 deletions.
Expand Up @@ -23,7 +23,6 @@
/** Parser for a map of reversed domain names stored as a serialized radix tree. */
@GwtCompatible
final class TrieParser {

private static final Joiner PREFIX_JOINER = Joiner.on("");

/**
Expand All @@ -35,9 +34,7 @@ static ImmutableMap<String, PublicSuffixType> parseTrie(CharSequence encoded) {
int encodedLen = encoded.length();
int idx = 0;
while (idx < encodedLen) {
idx +=
doParseTrieToBuilder(
Lists.<CharSequence>newLinkedList(), encoded.subSequence(idx, encodedLen), builder);
idx += doParseTrieToBuilder(Lists.<CharSequence>newLinkedList(), encoded, idx, builder);
}
return builder.build();
}
Expand All @@ -48,16 +45,18 @@ static ImmutableMap<String, PublicSuffixType> parseTrie(CharSequence encoded) {
* @param stack The prefixes that precede the characters represented by this node. Each entry of
* the stack is in reverse order.
* @param encoded The serialized trie.
* @param start An index in the encoded serialized trie to begin reading characters from.
* @param builder A map builder to which all entries will be added.
* @return The number of characters consumed from {@code encoded}.
*/
private static int doParseTrieToBuilder(
List<CharSequence> stack,
CharSequence encoded,
int start,
ImmutableMap.Builder<String, PublicSuffixType> builder) {

int encodedLen = encoded.length();
int idx = 0;
int idx = start;
char c = '\0';

// Read all of the characters for this node.
Expand All @@ -68,7 +67,7 @@ private static int doParseTrieToBuilder(
}
}

stack.add(0, reverse(encoded.subSequence(0, idx)));
stack.add(0, reverse(encoded.subSequence(start, idx)));

if (c == '!' || c == '?' || c == ':' || c == ',') {
// '!' represents an interior node that represents a REGISTRY entry in the map.
Expand All @@ -85,7 +84,7 @@ private static int doParseTrieToBuilder(
if (c != '?' && c != ',') {
while (idx < encodedLen) {
// Read all the children
idx += doParseTrieToBuilder(stack, encoded.subSequence(idx, encodedLen), builder);
idx += doParseTrieToBuilder(stack, encoded, idx, builder);
if (encoded.charAt(idx) == '?' || encoded.charAt(idx) == ',') {
// An extra '?' or ',' after a child node indicates the end of all children of this node.
idx++;
Expand All @@ -94,7 +93,7 @@ private static int doParseTrieToBuilder(
}
}
stack.remove(0);
return idx;
return idx - start;
}

private static CharSequence reverse(CharSequence s) {
Expand Down
15 changes: 7 additions & 8 deletions guava/src/com/google/thirdparty/publicsuffix/TrieParser.java
Expand Up @@ -23,7 +23,6 @@
/** Parser for a map of reversed domain names stored as a serialized radix tree. */
@GwtCompatible
final class TrieParser {

private static final Joiner PREFIX_JOINER = Joiner.on("");

/**
Expand All @@ -35,9 +34,7 @@ static ImmutableMap<String, PublicSuffixType> parseTrie(CharSequence encoded) {
int encodedLen = encoded.length();
int idx = 0;
while (idx < encodedLen) {
idx +=
doParseTrieToBuilder(
Lists.<CharSequence>newLinkedList(), encoded.subSequence(idx, encodedLen), builder);
idx += doParseTrieToBuilder(Lists.<CharSequence>newLinkedList(), encoded, idx, builder);
}
return builder.build();
}
Expand All @@ -48,16 +45,18 @@ static ImmutableMap<String, PublicSuffixType> parseTrie(CharSequence encoded) {
* @param stack The prefixes that precede the characters represented by this node. Each entry of
* the stack is in reverse order.
* @param encoded The serialized trie.
* @param start An index in the encoded serialized trie to begin reading characters from.
* @param builder A map builder to which all entries will be added.
* @return The number of characters consumed from {@code encoded}.
*/
private static int doParseTrieToBuilder(
List<CharSequence> stack,
CharSequence encoded,
int start,
ImmutableMap.Builder<String, PublicSuffixType> builder) {

int encodedLen = encoded.length();
int idx = 0;
int idx = start;
char c = '\0';

// Read all of the characters for this node.
Expand All @@ -68,7 +67,7 @@ private static int doParseTrieToBuilder(
}
}

stack.add(0, reverse(encoded.subSequence(0, idx)));
stack.add(0, reverse(encoded.subSequence(start, idx)));

if (c == '!' || c == '?' || c == ':' || c == ',') {
// '!' represents an interior node that represents a REGISTRY entry in the map.
Expand All @@ -85,7 +84,7 @@ private static int doParseTrieToBuilder(
if (c != '?' && c != ',') {
while (idx < encodedLen) {
// Read all the children
idx += doParseTrieToBuilder(stack, encoded.subSequence(idx, encodedLen), builder);
idx += doParseTrieToBuilder(stack, encoded, idx, builder);
if (encoded.charAt(idx) == '?' || encoded.charAt(idx) == ',') {
// An extra '?' or ',' after a child node indicates the end of all children of this node.
idx++;
Expand All @@ -94,7 +93,7 @@ private static int doParseTrieToBuilder(
}
}
stack.remove(0);
return idx;
return idx - start;
}

private static CharSequence reverse(CharSequence s) {
Expand Down

0 comments on commit 7f1defa

Please sign in to comment.