Skip to content

Commit

Permalink
feat(schema): Update schema for Elasticsearch 5 and 6
Browse files Browse the repository at this point in the history
This change makes our Elasticsearch schema compatible with Elasticsearch
5 and 6. It shouldn't have any effect on performance or operation, but it
will completely drop compatibility for Elasticsearch 2.

The primary change is that Elasticsearch 5 introduces two types of text
fields: `text` and `keyword`, whereas Elasticsearch 2 only had 1:
`string`.

Roughly, a `text` field is for true full text search and a `keyword`
field is for simple values that are primarily used for filtering or
aggregation (for example, our `source` and `layer` fields). The `string` datatype previously filled both of those roles depending on
how it was configured.

Fortunately, we had already roughly created a concept similar to the
`keyword` datatype in our schema, but called it `literal`. This has been
renamed to `keyword` to cut down on the number of terms needed

One nice effect of this change is that it removes all deprecation
warnings printed by Elasticsearch 5. Notably, as discovered in
#337 (comment), these
warnings were quite noisy and required special handling to work around
Node.js header size restrictions. This special handling can now been
removed.

Fixes pelias/whosonfirst#457
Connects pelias/pelias#719
Connects pelias/pelias#461
  • Loading branch information
orangejulius committed Jul 5, 2019
1 parent 77c4e13 commit e797503
Show file tree
Hide file tree
Showing 14 changed files with 2,124 additions and 3,951 deletions.
77 changes: 34 additions & 43 deletions mappings/document.js
Expand Up @@ -2,16 +2,16 @@ const admin = require('./partial/admin');
const postalcode = require('./partial/postalcode');
const hash = require('./partial/hash');
const multiplier = require('./partial/multiplier');
const literal = require('./partial/literal');
const literal_with_doc_values = require('./partial/literal_with_doc_values');
const keyword = require('./partial/keyword');
const keyword_with_doc_values = require('./partial/keyword_with_doc_values');
const config = require('pelias-config').generate();

var schema = {
properties: {

// data partitioning
source: literal_with_doc_values,
layer: literal_with_doc_values,
source: keyword_with_doc_values,
layer: keyword_with_doc_values,

// place name (ngram analysis)
name: hash,
Expand All @@ -25,27 +25,27 @@ var schema = {
dynamic: 'strict',
properties: {
name: {
type: 'string',
type: 'text',
analyzer: 'keyword',
},
unit: {
type: 'string',
type: 'text',
analyzer: 'peliasUnit',
},
number: {
type: 'string',
type: 'text',
analyzer: 'peliasHousenumber',
},
street: {
type: 'string',
type: 'text',
analyzer: 'peliasStreet',
},
cross_street: {
type: 'string',
type: 'text',
analyzer: 'peliasStreet',
},
zip: {
type: 'string',
type: 'text',
analyzer: 'peliasZip',
},
}
Expand All @@ -59,77 +59,77 @@ var schema = {
// https://github.com/whosonfirst/whosonfirst-placetypes#continent
continent: admin,
continent_a: admin,
continent_id: literal,
continent_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#ocean
ocean: admin,
ocean_a: admin,
ocean_id: literal,
ocean_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#empire
empire: admin,
empire_a: admin,
empire_id: literal,
empire_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#country
country: admin,
country_a: admin,
country_id: literal,
country_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#dependency
dependency: admin,
dependency_a: admin,
dependency_id: literal,
dependency_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#marinearea
marinearea: admin,
marinearea_a: admin,
marinearea_id: literal,
marinearea_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#macroregion
macroregion: admin,
macroregion_a: admin,
macroregion_id: literal,
macroregion_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#region
region: admin,
region_a: admin,
region_id: literal,
region_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#macrocounty
macrocounty: admin,
macrocounty_a: admin,
macrocounty_id: literal,
macrocounty_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#county
county: admin,
county_a: admin,
county_id: literal,
county_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#locality
locality: admin,
locality_a: admin,
locality_id: literal,
locality_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#borough
borough: admin,
borough_a: admin,
borough_id: literal,
borough_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#localadmin
localadmin: admin,
localadmin_a: admin,
localadmin_id: literal,
localadmin_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#neighbourhood
neighbourhood: admin,
neighbourhood_a: admin,
neighbourhood_id: literal,
neighbourhood_id: keyword,

// https://github.com/whosonfirst/whosonfirst-placetypes#postalcode
postalcode: postalcode,
postalcode_a: postalcode,
postalcode_id: literal
postalcode_id: keyword
}
},

Expand All @@ -139,8 +139,8 @@ var schema = {
bounding_box: require('./partial/boundingbox'),

// meta info
source_id: literal,
category: literal,
source_id: keyword,
category: keyword,
population: multiplier,
popularity: multiplier,

Expand All @@ -152,36 +152,27 @@ var schema = {
path_match: 'name.*',
match_mapping_type: 'string',
mapping: {
type: 'string',
analyzer: 'peliasIndexOneEdgeGram',
fielddata : {
format: 'disabled'
}
type: 'text',
analyzer: 'peliasIndexOneEdgeGram'
}
},
},{
phrase: {
path_match: 'phrase.*',
match_mapping_type: 'string',
mapping: {
type: 'string',
analyzer: 'peliasPhrase',
fielddata : {
format: 'disabled'
}
type: 'text',
analyzer: 'peliasPhrase'
}
}
},{
addendum: {
path_match: 'addendum.*',
match_mapping_type: 'string',
mapping: {
type: 'string',
index: 'no',
doc_values: false,
fielddata : {
format: 'disabled'
}
type: 'keyword',
index: false,
doc_values: false
}
}
}],
Expand Down
11 changes: 4 additions & 7 deletions mappings/partial/admin.json
@@ -1,14 +1,11 @@
{
"type": "string",
"type": "text",
"analyzer": "peliasAdmin",
"fields": {
"ngram": {
"type": "string",
"type": "text",
"analyzer": "peliasIndexOneEdgeGram",
"doc_values": false,
"fielddata": {
"format": "disabled"
}
"doc_values": false
}
}
}
}
4 changes: 2 additions & 2 deletions mappings/partial/boundingbox.json
@@ -1,4 +1,4 @@
{
"type": "string",
"index": "no"
"type": "keyword",
"index": false
}
4 changes: 4 additions & 0 deletions mappings/partial/keyword.json
@@ -0,0 +1,4 @@
{
"type": "keyword",
"doc_values": false
}
3 changes: 3 additions & 0 deletions mappings/partial/keyword_with_doc_values.json
@@ -0,0 +1,3 @@
{
"type": "keyword"
}
5 changes: 0 additions & 5 deletions mappings/partial/literal.json

This file was deleted.

4 changes: 0 additions & 4 deletions mappings/partial/literal_with_doc_values.json

This file was deleted.

10 changes: 3 additions & 7 deletions mappings/partial/postalcode.json
@@ -1,14 +1,10 @@
{
"type": "string",
"type": "text",
"analyzer": "peliasZip",
"fields": {
"ngram": {
"type": "string",
"analyzer": "peliasIndexOneEdgeGram",
"doc_values": false,
"fielddata": {
"format": "disabled"
}
"type": "text",
"analyzer": "peliasIndexOneEdgeGram"
}
}
}
7 changes: 2 additions & 5 deletions test/compile.js
Expand Up @@ -40,11 +40,8 @@ module.exports.tests.dynamic_templates = function(test, common) {
t.equal(template.path_match, 'name.*');
t.equal(template.match_mapping_type, 'string');
t.deepEqual(template.mapping, {
type: 'string',
analyzer: 'peliasIndexOneEdgeGram',
fielddata : {
format: "disabled"
}
type: 'text',
analyzer: 'peliasIndexOneEdgeGram'
});
t.end();
});
Expand Down

0 comments on commit e797503

Please sign in to comment.