Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add exponential moving average to vega-lite #9225

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 4 additions & 0 deletions build/vega-lite-schema.json
Expand Up @@ -71,6 +71,10 @@
"AggregatedFieldDef": {
"additionalProperties": false,
"properties": {
"aggregate_param": {
"description": "A parameter that can be passed to aggregation functions. The aggregation operation `\"exponential\"` requires it.",
"type": "number"
},
"as": {
"$ref": "#/definitions/FieldName",
"description": "The output field names to use for each aggregated field."
Expand Down
3 changes: 2 additions & 1 deletion site/docs/transform/aggregate.md
Expand Up @@ -86,7 +86,7 @@ An `aggregate` transform in the [`transform`](transform.html) array has the foll

### Aggregated Field Definition for Aggregate Transform

{% include table.html props="op,field,as" source="AggregatedFieldDef" %}
{% include table.html props="op,field,as,aggregate_param" source="AggregatedFieldDef" %}

Note: It is important you [`parse`](data.html#format) your data types explicitly, especially if you are likely to have `null` values in your dataset and automatic type inference will fail.

Expand Down Expand Up @@ -121,6 +121,7 @@ The supported **aggregation operations** are:
| max | The maximum field value. |
| argmin | An input data object containing the minimum field value. <br/> **Note:** When used inside encoding, `argmin` must be specified as an object. (See below for an example.) |
| argmax | An input data object containing the maximum field value. <br/> **Note:** When used inside encoding, `argmax` must be specified as an object. (See below for an example.) |
| exponential | The exponential moving average of field values. Set the required weight (a number between `0` and `1`) with [`aggregate_param`](#aggregate-op-def). <br/> **Note:** Cannot be used inside encoding. |

{:#argmax}

Expand Down
36 changes: 24 additions & 12 deletions src/compile/data/aggregate.ts
Expand Up @@ -27,7 +27,7 @@
import {isRectBasedMark} from '../../mark';
import {OFFSETTED_RECT_END_SUFFIX, OFFSETTED_RECT_START_SUFFIX} from './timeunit';

type Measures = Dict<Partial<Record<AggregateOp, Set<string>>>>;
type Measures = Dict<Partial<Record<AggregateOp, {aliases: Set<string>; aggregate_param?: number}>>>;
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated our measures to hold the fieldnames (aliases) and the aggregate param (aggregate_param).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now aggregateParam

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. It's an internal name so it doesn't matter too much.


function addDimension(dims: Set<string>, channel: Channel, fieldDef: FieldDef<string>, model: ModelWithField) {
const channelDef2 = isUnitModel(model) ? model.encoding[getSecondaryRangeChannel(channel)] : undefined;
Expand Down Expand Up @@ -71,7 +71,9 @@
for (const op of keys(ops)) {
if (field in parentMeasures) {
// add operator to existing measure field
parentMeasures[field][op] = new Set([...(parentMeasures[field][op] ?? []), ...ops[op]]);
parentMeasures[field][op] = {
aliases: new Set([...(parentMeasures[field][op]?.aliases ?? []), ...ops[op].aliases])
};
} else {
parentMeasures[field] = {[op]: ops[op]};
}
Expand Down Expand Up @@ -121,23 +123,23 @@
if (aggregate) {
if (aggregate === 'count') {
meas['*'] ??= {};
meas['*']['count'] = new Set([vgField(fieldDef, {forAs: true})]);
meas['*']['count'] = {aliases: new Set([vgField(fieldDef, {forAs: true})])};
} else {
if (isArgminDef(aggregate) || isArgmaxDef(aggregate)) {
const op = isArgminDef(aggregate) ? 'argmin' : 'argmax';
const argField = aggregate[op];
meas[argField] ??= {};
meas[argField][op] = new Set([vgField({op, field: argField}, {forAs: true})]);
meas[argField][op] = {aliases: new Set([vgField({op, field: argField}, {forAs: true})])};
} else {
meas[field] ??= {};
meas[field][aggregate] = new Set([vgField(fieldDef, {forAs: true})]);
meas[field][aggregate] = {aliases: new Set([vgField(fieldDef, {forAs: true})])};
}

// For scale channel with domain === 'unaggregated', add min/max so we can use their union as unaggregated domain
if (isScaleChannel(channel) && model.scaleDomain(channel) === 'unaggregated') {
meas[field] ??= {};
meas[field]['min'] = new Set([vgField({field, aggregate: 'min'}, {forAs: true})]);
meas[field]['max'] = new Set([vgField({field, aggregate: 'max'}, {forAs: true})]);
meas[field]['min'] = {aliases: new Set([vgField({field, aggregate: 'min'}, {forAs: true})])};
meas[field]['max'] = {aliases: new Set([vgField({field, aggregate: 'max'}, {forAs: true})])};
}
}
} else {
Expand All @@ -157,14 +159,18 @@
const meas: Measures = {};

for (const s of t.aggregate) {
const {op, field, as} = s;
const {op, field, as, aggregate_param} = s;
if (op) {
if (op === 'count') {
meas['*'] ??= {};
meas['*']['count'] = new Set([as ? as : vgField(s, {forAs: true})]);
meas['*']['count'] = {aliases: new Set([as ? as : vgField(s, {forAs: true})])};
} else {
meas[field] ??= {};
meas[field][op] = new Set([as ? as : vgField(s, {forAs: true})]);
meas[field][op] = {aliases: new Set([as ? as : vgField(s, {forAs: true})])};

if (aggregate_param) {
meas[field][op].aggregate_param = aggregate_param;
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I only added aggregate_param to view-level transforms, but do we want included in encoding as well? Maybe something like

{
  "encoding": {
    "x": {
      "aggregate": {"exponential": 0.5},
    },
  }
}

I based the example off how the code works with argmax/argmin

Copy link
Member

@domoritz domoritz Jan 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that makes sense. I wonder whether we want to use a similar API for transforms.

"aggregate": [{
  "op": {"exponential": 0.5},
  "as": "exp_0.5"
}]

Current API in this pull request for reference

"aggregate": [{
  "op": "exponential"
  "aggregate_param": 0.5,
  "as": "exp_0.5"
}]

We often have more concise transform APIs than Vega and this would make the API more consistent between encodings and transforms.

However, it would be a bit less constant with the API in https://vega.github.io/vega-lite/docs/aggregate.html#argmax

    "aggregate": [{
      "op": "argmax",
      "field": "US Gross",
      "as": "argmax_US_Gross"
    }]

What do you think?

I'm somewhat leaning towards "op": {"exponential": 0.5} because the parameter is so closely tied to the exponential here and aggregate_param is not as meaningful of a description as field in the argmax case.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@arvind +1s "op": {"exponential": 0.5}

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense! I'll update encoding level transforms to have "aggregate": {"exponential": 0.5}, and view level transforms to have "op": {"exponential": 0.5}.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you.

Btw, one thing @arvind brought up is that aggregate_param (which is the term in Vega) can be confused with the param concept in Vega-Lite (which Vega doesn't have (yet)). Just wanted to note that here.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense! I'll update encoding level transforms to have "aggregate": {"exponential": 0.5}, and view level transforms to have "op": {"exponential": 0.5}.

Encoding level transforms and view level transforms are now updated.

}
}
}
}
Expand Down Expand Up @@ -202,7 +208,7 @@

for (const field of keys(this.measures)) {
for (const op of keys(this.measures[field])) {
const m = this.measures[field][op];
const m = this.measures[field][op].aliases;
if (m.size === 0) {
out.add(`${op}_${field}`);
} else {
Expand All @@ -222,13 +228,15 @@
const ops: AggregateOp[] = [];
const fields: string[] = [];
const as: string[] = [];
const aggregateParams: (number | null)[] = [];

for (const field of keys(this.measures)) {
for (const op of keys(this.measures[field])) {
for (const alias of this.measures[field][op]) {
for (const alias of this.measures[field][op].aliases) {
as.push(alias);
ops.push(op);
fields.push(field === '*' ? null : replacePathInField(field));
aggregateParams.push(this.measures[field][op].aggregate_param || null);
}
}
}
Expand All @@ -241,6 +249,10 @@
as
};

if (aggregateParams.some(param => typeof param === 'number')) {
result.aggregate_params = aggregateParams;

Check failure on line 253 in src/compile/data/aggregate.ts

View workflow job for this annotation

GitHub Actions / Node

Type 'number[]' is not assignable to type 'object[]'.

Check failure on line 253 in src/compile/data/aggregate.ts

View workflow job for this annotation

GitHub Actions / Runtime, Linting, and Coverage

Type 'number[]' is not assignable to type 'object[]'.

Check failure on line 253 in src/compile/data/aggregate.ts

View workflow job for this annotation

GitHub Actions / CLI (ubuntu-latest)

Type 'number[]' is not assignable to type 'object[]'.
}

return result;
}
}
5 changes: 5 additions & 0 deletions src/transform.ts
Expand Up @@ -111,6 +111,11 @@ export interface AggregatedFieldDef {
*/
field?: FieldName;

/**
* A parameter that can be passed to aggregation functions. The aggregation operation `"exponential"` requires it.
*/
aggregate_param?: number;
Copy link
Author

@julieg18 julieg18 Jan 12, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

aggregate_params can also be used in window transforms, but the type WindowTransform is not updated.

If I'm not misunderstanding something, I can open a pr to update WindowTransform so we can add exponential moving average to window transforms in vega-lite either in this pr or in a later one.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, good catch. Yes, please send a pull request and I can merge it. We can add support for window aggregates in a follow up pull request.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done! Opened vega/vega#3874


/**
* The output field names to use for each aggregated field.
*/
Expand Down
32 changes: 27 additions & 5 deletions test/compile/data/aggregate.test.ts
Expand Up @@ -49,9 +49,9 @@ describe('compile/data/aggregate', () => {

const agg = AggregateNode.makeFromEncoding(null, model);
expect(agg.hash()).toBe(
`Aggregate {"dimensions":"Set(\\"Origin\\")","measures":{"*":{"count":"Set(\\"${internalField(
`Aggregate {"dimensions":"Set(\\"Origin\\")","measures":{"*":{"count":{"aliases":"Set(\\"${internalField(
'count'
)}\\")"},"Acceleration":{"sum":"Set(\\"sum_Acceleration\\")"}}}`
)}\\")"}},"Acceleration":{"sum":{"aliases":"Set(\\"sum_Acceleration\\")"}}}}`
);
});
});
Expand Down Expand Up @@ -309,6 +309,26 @@ describe('compile/data/aggregate', () => {
as: ['Displacement_mean', 'Displacement_max', 'Acceleration_sum']
});
});

it('should produce the correct summary component from transform array with aggregation_params', () => {
const t: AggregateTransform = {
aggregate: [
{op: 'sum', field: 'Acceleration', as: 'Acceleration_sum'},
{op: 'exponential', field: 'Displacement', as: 'Displacement_exponential', aggregate_param: 0.3}
],
groupby: ['Group']
};

const agg = AggregateNode.makeFromTransform(null, t);
expect(agg.assemble()).toEqual({
type: 'aggregate',
groupby: ['Group'],
ops: ['sum', 'exponential'],
fields: ['Acceleration', 'Displacement'],
as: ['Acceleration_sum', 'Displacement_exponential'],
aggregate_params: [null, 0.3]
});
});
});

describe('producedFields', () => {
Expand Down Expand Up @@ -336,8 +356,8 @@ describe('compile/data/aggregate', () => {
});
it('should merge AggregateNodes with same dimensions', () => {
const parent = new PlaceholderDataFlowNode(null);
const agg1 = new AggregateNode(parent, new Set(['a', 'b']), {a: {mean: new Set(['a_mean'])}});
const agg2 = new AggregateNode(parent, new Set(['a', 'b']), {b: {mean: new Set(['b_mean'])}});
const agg1 = new AggregateNode(parent, new Set(['a', 'b']), {a: {mean: {aliases: new Set(['a_mean'])}}});
const agg2 = new AggregateNode(parent, new Set(['a', 'b']), {b: {mean: {aliases: new Set(['b_mean'])}}});

expect(agg1.merge(agg2)).toBe(true);
expect(agg1.producedFields()).toEqual(new Set(['a_mean', 'b_mean']));
Expand All @@ -346,7 +366,9 @@ describe('compile/data/aggregate', () => {

describe('assemble()', () => {
it('should escape nested accesses', () => {
const agg = new AggregateNode(null, new Set(['foo.bar']), {'foo.baz': {mean: new Set(['foo_baz_mean'])}});
const agg = new AggregateNode(null, new Set(['foo.bar']), {
'foo.baz': {mean: {aliases: new Set(['foo_baz_mean'])}}
});
expect(agg.assemble()).toEqual({
as: ['foo_baz_mean'],
fields: ['foo\\.baz'],
Expand Down
2 changes: 1 addition & 1 deletion test/compile/data/assemble.test.ts
Expand Up @@ -35,7 +35,7 @@ describe('compile/data/assemble', () => {
const outputNodeRefCounts = {};
const raw = new OutputNode(null, 'rawOut', DataSourceType.Raw, outputNodeRefCounts);
raw.parent = src;
const agg = new AggregateNode(null, new Set(['a']), {b: {count: new Set(['count_*'])}});
const agg = new AggregateNode(null, new Set(['a']), {b: {count: {aliases: new Set(['count_*'])}}});
agg.parent = raw;
const main = new OutputNode(null, 'mainOut', DataSourceType.Main, outputNodeRefCounts);
main.parent = agg;
Expand Down