Skip to content

Commit

Permalink
Use more efficient id lookups for key attributes in ElasticSearch bac…
Browse files Browse the repository at this point in the history
…kend

Elastic has [ids](https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-ids-query.html) queries which return documents based on their ID.

Use those queries for `EQUAL/NOT_EQUAL` and `IN/NOT_IN` type operators
  • Loading branch information
asereda-gs committed Apr 26, 2020
1 parent 3e585a8 commit db4fa98
Show file tree
Hide file tree
Showing 11 changed files with 208 additions and 18 deletions.
Expand Up @@ -296,6 +296,31 @@ public void basic() {
check(person.isActive.isFalse().or().isActive.isTrue()).notEmpty();
}

/**
* Basic queries on ID
*/
@Test
void id() {
assumeFeature(Feature.QUERY);
PersonGenerator generator = new PersonGenerator();

insert(generator.next().withId("id1"));
insert(generator.next().withId("id2"));

check(person.id.is("id1")).toList(Person::id).isOf("id1");
check(person.id.is("id2")).toList(Person::id).isOf("id2");

check(person.id.in("id1", "id2")).toList(Person::id).hasContentInAnyOrder("id1", "id2");
check(person.id.in(Collections.singleton("id1"))).toList(Person::id).isOf("id1");

// negatives
check(person.id.isNot("id1")).toList(Person::id).hasContentInAnyOrder("id2");
check(person.id.isNot("id2")).toList(Person::id).hasContentInAnyOrder("id1");

check(person.id.notIn(Collections.singleton("id1"))).toList(Person::id).hasContentInAnyOrder("id2");
check(person.id.notIn(Collections.singleton("id2"))).toList(Person::id).hasContentInAnyOrder("id1");
}

@Test
public void nested() {
final ImmutablePerson john = new PersonGenerator().next().withBestFriend(ImmutableFriend.builder().hobby("ski").build());
Expand Down
Expand Up @@ -342,6 +342,28 @@ void specialChars() {
ids(string.value.is("<>|\\")).isOf("id4");
}

/**
* Basic queries on key
*/
@Test
void queryOnId() {
repository.insert(generator.get().withId("id1"));
repository.insert(generator.get().withId("id2"));

ids(string.id.is("id1")).isOf("id1");
ids(string.id.is("id2")).isOf("id2");

ids(string.id.in("id1", "id2")).hasContentInAnyOrder("id1", "id2");
ids(string.id.in(Collections.singleton("id1"))).isOf("id1");

// negatives
ids(string.id.isNot("id1")).hasContentInAnyOrder("id2");
ids(string.id.isNot("id2")).hasContentInAnyOrder("id1");
ids(string.id.notIn(Collections.singleton("id1"))).hasContentInAnyOrder("id2");
ids(string.id.notIn(Collections.singleton("id2"))).hasContentInAnyOrder("id1");
}


/**
* Return {@link TypeHolder.StringHolder#value()} after applying a criteria
*/
Expand Down
Expand Up @@ -44,6 +44,7 @@
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;

/**
Expand All @@ -67,8 +68,9 @@ class AggregateQueryBuilder {
private final ObjectMapper mapper;
private final JsonNodeFactory nodeFactory;
private final PathNaming pathNaming;
private final Predicate<Path> idPredicate;

AggregateQueryBuilder(Query query, ObjectMapper mapper, Mapping mapping, PathNaming pathNaming) {
AggregateQueryBuilder(Query query, ObjectMapper mapper, Mapping mapping, PathNaming pathNaming, Predicate<Path> idPredicate) {
this.query = Objects.requireNonNull(query, "query");
Preconditions.checkArgument(query.hasAggregations(), "no aggregations for query %s", query);
this.mapping = mapping;
Expand All @@ -80,6 +82,7 @@ class AggregateQueryBuilder {
naming = UniqueCachedNaming.of(toName);
this.mapper = mapper;
this.nodeFactory = mapper.getNodeFactory();
this.idPredicate = idPredicate;
}

ObjectNode jsonQuery() {
Expand All @@ -98,7 +101,7 @@ ObjectNode jsonQuery() {
json.put("size", 0);
json.put("stored_fields", "_none_"); // avoid fetch phase

query.filter().ifPresent(f -> json.set("query", Elasticsearch.constantScoreQuery(mapper, pathNaming).convert(f)));
query.filter().ifPresent(f -> json.set("query", Elasticsearch.constantScoreQuery(mapper, pathNaming, idPredicate).convert(f)));

// due to ES aggregation format. fields in "order by" clause should go first
// if "order by" is missing. order in "group by" is un-important
Expand Down
Expand Up @@ -48,7 +48,7 @@ public Single<Long> call() {
}

ObjectNode filter = query.filter()
.map(f -> Elasticsearch.toBuilder(f, session.pathNaming).toJson(session.objectMapper))
.map(f -> Elasticsearch.toBuilder(f, session.pathNaming, session.idPredicate).toJson(session.objectMapper))
.orElse(session.objectMapper.createObjectNode());

if (filter.size() != 0) {
Expand Down
Expand Up @@ -18,29 +18,54 @@

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.Iterables;
import org.immutables.criteria.backend.KeyExtractor;
import org.immutables.criteria.backend.PathNaming;
import org.immutables.criteria.expression.Expression;
import org.immutables.criteria.expression.ExpressionConverter;
import org.immutables.criteria.expression.Path;
import org.immutables.criteria.expression.Visitors;

import java.util.Objects;
import java.util.function.Predicate;

final class Elasticsearch {

private Elasticsearch() {}

static QueryBuilders.QueryBuilder toBuilder(Expression expression, PathNaming pathNaming) {
return expression.accept(new ElasticsearchQueryVisitor(pathNaming));
static QueryBuilders.QueryBuilder toBuilder(Expression expression, PathNaming pathNaming, Predicate<Path> idPredicate) {
return expression.accept(new ElasticsearchQueryVisitor(pathNaming, idPredicate));
}

/**
* {@code query} part of the JSON
*/
static ExpressionConverter<ObjectNode> constantScoreQuery(ObjectMapper mapper, PathNaming pathNaming) {
static ExpressionConverter<ObjectNode> constantScoreQuery(ObjectMapper mapper, PathNaming pathNaming, Predicate<Path> idPredicate) {
Objects.requireNonNull(mapper, "expression");
return expression -> {
final QueryBuilders.QueryBuilder builder = toBuilder(expression, pathNaming);
final QueryBuilders.QueryBuilder builder = toBuilder(expression, pathNaming, idPredicate);
return QueryBuilders.constantScoreQuery(builder).toJson(mapper);
};
}

/**
* Predicate which checks if a path represents entity key (id). Used to generate {@code ids}
* type of queries.
*/
static Predicate<Path> idPredicate(KeyExtractor.KeyMetadata metadata) {
Objects.requireNonNull(metadata, "metadata");
Predicate<Path> alwaysFalse = p -> false;
if (!(metadata.isKeyDefined() && metadata.isExpression())) {
return alwaysFalse;
}

if (metadata.keys().size() != 1) {
return alwaysFalse;
}

Expression expression = Iterables.getOnlyElement(metadata.keys());
return Visitors.maybePath(expression)
.map(p -> ((Predicate<Path>) p::equals))
.orElse(alwaysFalse);
}
}
Expand Up @@ -37,6 +37,7 @@
import java.util.List;
import java.util.Objects;
import java.util.function.BiFunction;
import java.util.function.Predicate;
import java.util.stream.Collectors;

/**
Expand Down Expand Up @@ -75,6 +76,7 @@ static class Session implements Backend.Session {
final JsonConverter<Object> converter;
private final boolean hasId;
final PathNaming pathNaming;
final Predicate<Path> idPredicate;

private Session(Class<?> entityClass, KeyExtractor keyExtractor, ElasticsearchOps ops, PathNaming pathNaming) {
Objects.requireNonNull(entityClass, "entityClass");
Expand All @@ -86,6 +88,7 @@ private Session(Class<?> entityClass, KeyExtractor keyExtractor, ElasticsearchOp
KeyExtractor.KeyMetadata metadata = keyExtractor.metadata();
this.hasId = metadata.isKeyDefined();
this.pathNaming = pathNaming;
this.idPredicate = Elasticsearch.idPredicate(keyExtractor.metadata());
}

@Override
Expand All @@ -108,7 +111,7 @@ public Result execute(Operation query) {
private Flowable<ProjectedTuple> aggregate(StandardOperations.Select op) {
final Query query = op.query();
Preconditions.checkArgument(query.hasAggregations(), "No Aggregations");
AggregateQueryBuilder builder = new AggregateQueryBuilder(query, objectMapper, ops.mapping, pathNaming);
AggregateQueryBuilder builder = new AggregateQueryBuilder(query, objectMapper, ops.mapping, pathNaming, idPredicate);
return ops.searchRaw(builder.jsonQuery(), Collections.emptyMap())
.map(builder::processResult)
.toFlowable()
Expand All @@ -131,7 +134,7 @@ private Flowable<?> select(StandardOperations.Select op) {
}
final ObjectNode json = objectMapper.createObjectNode();

query.filter().ifPresent(f -> json.set("query", Elasticsearch.constantScoreQuery(objectMapper, pathNaming).convert(f)));
query.filter().ifPresent(f -> json.set("query", Elasticsearch.constantScoreQuery(objectMapper, pathNaming, idPredicate).convert(f)));
query.limit().ifPresent(limit -> json.put("size", limit));
query.offset().ifPresent(offset -> json.put("from", offset));
if (!query.collations().isEmpty()) {
Expand Down
Expand Up @@ -25,11 +25,14 @@
import org.immutables.criteria.expression.Operator;
import org.immutables.criteria.expression.Operators;
import org.immutables.criteria.expression.OptionalOperators;
import org.immutables.criteria.expression.Path;
import org.immutables.criteria.expression.StringOperators;
import org.immutables.criteria.expression.Visitors;

import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.function.Predicate;
import java.util.regex.Pattern;

/**
Expand All @@ -38,10 +41,12 @@
class ElasticsearchQueryVisitor extends AbstractExpressionVisitor<QueryBuilders.QueryBuilder> {

private final PathNaming pathNaming;
private final Predicate<Path> idPredicate;

ElasticsearchQueryVisitor(PathNaming pathNaming) {
ElasticsearchQueryVisitor(PathNaming pathNaming, Predicate<Path> idPredicate) {
super(e -> { throw new UnsupportedOperationException(); });
this.pathNaming = Objects.requireNonNull(pathNaming, "pathNaming");
this.idPredicate = Objects.requireNonNull(idPredicate, "idPredicate");
}

@Override
Expand Down Expand Up @@ -89,27 +94,41 @@ private QueryBuilders.QueryBuilder binaryCall(Call call) {
Preconditions.checkArgument(arguments.size() == 2, "Size should be 2 for %s but was %s",
call.operator(), arguments.size());
final Operator op = call.operator();
final String field = pathNaming.name(Visitors.toPath(arguments.get(0)));
final Path path = Visitors.toPath(arguments.get(0));
final String field = pathNaming.name(path);
final Object value = Visitors.toConstant(arguments.get(1)).value();

if (op == Operators.EQUAL || op == Operators.NOT_EQUAL) {
QueryBuilders.QueryBuilder term = QueryBuilders.termQuery(field, value);
QueryBuilders.QueryBuilder builder;
if (idPredicate.test(path)) {
// use more efficient ids query for keys
builder = QueryBuilders.idsQuery(Collections.singleton(value));
} else {
builder = QueryBuilders.termQuery(field, value);
}

if (op == Operators.NOT_EQUAL) {
QueryBuilders.BoolQueryBuilder bool = QueryBuilders.boolQuery().mustNot(term);
QueryBuilders.BoolQueryBuilder bool = QueryBuilders.boolQuery().mustNot(builder);
if ("".equals(value)) {
// string is not empty (should also exists)
bool = bool.should(QueryBuilders.existsQuery(field));
}
term = bool;
builder = bool;
}

return term;
return builder;
}

if (op == Operators.IN || op == Operators.NOT_IN) {
final List<Object> values = Visitors.toConstant(arguments.get(1)).values();

QueryBuilders.QueryBuilder builder = QueryBuilders.termsQuery(field, values);
QueryBuilders.QueryBuilder builder;
if (idPredicate.test(path)) {
// more efficient query by ID
builder = QueryBuilders.idsQuery(values);
} else {
builder = QueryBuilders.termsQuery(field, values);
}

if (op == Operators.NOT_IN) {
builder = QueryBuilders.boolQuery().mustNot(builder);
Expand Down
Expand Up @@ -187,6 +187,13 @@ static WildcardQueryBuilder wildcardQuery(String name, String wildcard) {
return new WildcardQueryBuilder(name, wildcard);
}

/**
* Filter on document ID ({@code _id}) values
*/
static IdsQueryBuilder idsQuery(Iterable<?> values) {
return new IdsQueryBuilder(values);
}

/**
* A query that matches on all documents.
*/
Expand Down Expand Up @@ -459,6 +466,25 @@ ObjectNode toJson(ObjectMapper mapper) {
}
}

/**
* @see <a href="https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-ids-query.html">IDs query</a>
*/
static class IdsQueryBuilder extends QueryBuilder {
private final Iterable<?> values;

IdsQueryBuilder(Iterable<?> values) {
this.values = Objects.requireNonNull(values, "values");
}

@Override
ObjectNode toJson(ObjectMapper mapper) {
ObjectNode result = mapper.createObjectNode();
ArrayNode array = result.with("ids").withArray("values");
values.forEach(v -> array.add(toJsonValue(v, mapper)));
return result;
}
}

/**
* A query that wraps a filter and simply returns a constant score equal to the
* query boost for every document in the filter.
Expand Down
Expand Up @@ -23,15 +23,18 @@
import com.fasterxml.jackson.datatype.jdk8.Jdk8Module;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import org.immutables.criteria.Criterias;
import org.immutables.criteria.backend.KeyExtractor;
import org.immutables.criteria.backend.PathNaming;
import org.immutables.criteria.expression.Collation;
import org.immutables.criteria.expression.Path;
import org.immutables.criteria.expression.Query;
import org.immutables.criteria.matcher.Matchers;
import org.immutables.criteria.personmodel.Person;
import org.immutables.criteria.personmodel.PersonCriteria;
import org.junit.jupiter.api.Test;

import java.util.Collections;
import java.util.function.Predicate;

class AggregateQueryBuilderTest {

Expand All @@ -42,8 +45,8 @@ class AggregateQueryBuilderTest {
.registerModule(new Jdk8Module());

private final Mapping mapping = Mapping.ofElastic(PersonModel.MAPPING);

private final PathNaming pathNaming = PathNaming.defaultNaming();
private final Predicate<Path> idPredicate = Elasticsearch.idPredicate(KeyExtractor.defaultFactory().create(Person.class).metadata());

@Test
void agg1() {
Expand All @@ -61,7 +64,7 @@ void agg1() {
.withFilter(Criterias.toQuery(person.age.atLeast(30)).filter().get())
.withLimit(11);

AggregateQueryBuilder builder = new AggregateQueryBuilder(query, MAPPER, mapping, pathNaming);
AggregateQueryBuilder builder = new AggregateQueryBuilder(query, MAPPER, mapping, pathNaming, idPredicate);
ObjectNode json = builder.jsonQuery();

JsonChecker.of(json).is("{'_source':false,",
Expand Down

0 comments on commit db4fa98

Please sign in to comment.