Skip to content

Commit

Permalink
perf: utilize char array over string in irc parser (#840)
Browse files Browse the repository at this point in the history
  • Loading branch information
iProdigy committed Sep 6, 2023
1 parent 8306594 commit 5751b46
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 35 deletions.
@@ -1,10 +1,7 @@
package com.github.twitch4j.chat.parser;

import com.github.twitch4j.chat.events.channel.IRCMessageEvent;
import com.github.twitch4j.chat.util.BenchmarkFileUtils;
import com.github.twitch4j.chat.util.MessageParser;
import com.google.code.regexp.Matcher;
import com.google.code.regexp.Pattern;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
Expand All @@ -26,13 +23,10 @@

@State(Scope.Thread)
public class MessageParserBenchmark {
private static final Pattern MESSAGE_PATTERN = Pattern.compile("^(?:@(?<tags>\\S+?)\\s)?(?<clientName>\\S+?)\\s(?<command>[A-Z0-9]+)\\s?(?:(?<login>\\S+)\\s=\\s)?(?:#(?<channel>\\S*?)\\s?)?(?<payload>[:\\-+](?<message>.+))?$");

private Map<String, String> idToName;
private Map<String, String> nameToId;

private String[] rawMessages;
private String[] rawTags;

@Setup
public void setupBenchmark() throws IOException {
Expand All @@ -45,14 +39,6 @@ public void setupBenchmark() throws IOException {
// parse raw messages
try (BufferedReader reader = new BufferedReader(new InputStreamReader(Files.newInputStream(BenchmarkFileUtils.resolveFilePath("benchmark-chat-jprochazk.txt").toPath())))) {
rawMessages = reader.lines().limit(1000).toArray(String[]::new);
rawTags = reader.lines().limit(1000).map(input -> {
Matcher matcher = MESSAGE_PATTERN.matcher(input);
if (matcher.matches()) {
return matcher.group("tags");
} else {
return "";
}
}).toArray(String[]::new);
}
}

Expand All @@ -66,23 +52,13 @@ public void parse1kMessages(Blackhole bh) {
}
}

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@OperationsPerInvocation(1000)
public void parse1kTagsOld(Blackhole bh) {
for (int i = 0; i < 1000; i++) {
bh.consume(IRCMessageEvent.parseTags(rawTags[i]));
}
}

@Benchmark
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@OperationsPerInvocation(1000)
public void parse1kTags(Blackhole bh) {
for (int i = 0; i < 1000; i++) {
bh.consume(MessageParser.parseTags(rawTags[i], new HashMap<>(32)));
bh.consume(MessageParser.parseTags(rawMessages[i].toCharArray(), new HashMap<>(32)));
}
}

Expand Down
22 changes: 12 additions & 10 deletions chat/src/main/java/com/github/twitch4j/chat/util/MessageParser.java
Expand Up @@ -2,6 +2,7 @@

import com.github.twitch4j.chat.events.channel.IRCMessageEvent;
import lombok.experimental.UtilityClass;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.jetbrains.annotations.ApiStatus;
import org.jetbrains.annotations.NotNull;
Expand Down Expand Up @@ -29,23 +30,25 @@ public IRCMessageEvent parse(@NotNull String rawMessage) {
@ApiStatus.Internal
public IRCMessageEvent parse(@NotNull String raw, @NotNull Map<String, String> channelIdToChannelName, @NotNull Map<String, String> channelNameToChannelId, @Nullable Collection<String> botOwnerIds) {
final int len = raw.length();
if (len == 0) return null;
final char[] chars = raw.toCharArray();
int i = 0;

// Tags
final Map<String, CharSequence> tags;
if (raw.startsWith("@")) {
if (chars[0] == '@') {
tags = new HashMap<>(32);
i = parseTags(raw, tags);
i = parseTags(chars, tags);
} else {
tags = Collections.emptyMap();
}

// Client
if (raw.charAt(i) == ':') i++;
if (chars[i] == ':') i++;
int exclamation = -1;
int space = -1;
for (int j = i; j < len; j++) {
final char c = raw.charAt(j);
final char c = chars[j];
if (c == '!') {
if (exclamation < 0)
exclamation = j;
Expand All @@ -56,11 +59,11 @@ public IRCMessageEvent parse(@NotNull String raw, @NotNull Map<String, String> c
}
if (space < 0 || space + 1 >= len) return null;
final int clientNameEnd = exclamation > 0 ? exclamation : space;
final CharSequence clientName = CharBuffer.wrap(raw, i, clientNameEnd);
final CharSequence clientName = CharBuffer.wrap(chars, i, clientNameEnd - i);
i = space + 1;

// Command
int commandEnd = raw.indexOf(' ', i);
int commandEnd = ArrayUtils.indexOf(chars, ' ', i);
if (commandEnd < 0) {
commandEnd = len;
}
Expand All @@ -76,7 +79,7 @@ public IRCMessageEvent parse(@NotNull String raw, @NotNull Map<String, String> c
if (messageStart < 0) {
messageStart = len;
}
final CharSequence channel = CharBuffer.wrap(raw, i, messageStart);
final CharSequence channel = CharBuffer.wrap(chars, i, messageStart - i);
final int chanDelim = StringUtils.indexOf(channel, " = "); // handle 353 NAMES
final CharSequence channelPart = chanDelim < 0 ? channel : channel.subSequence(chanDelim + " = ".length(), channel.length());
final String channelName = (
Expand All @@ -90,14 +93,13 @@ public IRCMessageEvent parse(@NotNull String raw, @NotNull Map<String, String> c
if (messageStart >= len) {
return new IRCMessageEvent(raw, tags, clientName, commandType, channelName, null, null, channelIdToChannelName, channelNameToChannelId, botOwnerIds);
}
final CharSequence payload = CharBuffer.wrap(raw, messageStart, len);
final CharSequence payload = CharBuffer.wrap(chars, messageStart, len - messageStart);
final String message = raw.substring(messageStart + 1);
return new IRCMessageEvent(raw, tags, clientName, commandType, channelName, payload, message, channelIdToChannelName, channelNameToChannelId, botOwnerIds);
}

@VisibleForTesting
public int parseTags(String input, Map<String, CharSequence> output) {
final char[] inputChars = input.toCharArray(); // more memory yet 20% faster (HeapCharBuffer vs StringCharBuffer)
public int parseTags(char[] inputChars, Map<String, CharSequence> output) {
final int len = inputChars.length;
int i = 0;
int delim = -1;
Expand Down

0 comments on commit 5751b46

Please sign in to comment.