Skip to content

Commit

Permalink
Add joinaggregate. Fixes #4121
Browse files Browse the repository at this point in the history
  • Loading branch information
domoritz committed Feb 15, 2019
1 parent f843b44 commit e075987
Show file tree
Hide file tree
Showing 21 changed files with 593 additions and 99 deletions.
1 change: 1 addition & 0 deletions .prettierignore
@@ -0,0 +1 @@
examples/specs/*.vl.json
37 changes: 37 additions & 0 deletions examples/specs/joinaggregate_mean_difference.vl.json
@@ -0,0 +1,37 @@
{
"$schema": "https://vega.github.io/schema/vega-lite/v3.json",
"data": {"url": "data/movies.json"},
"transform": [
{"filter": "datum.IMDB_Rating != null"},
{
"joinaggregate": [{
"op": "mean",
"field": "IMDB_Rating",
"as": "AverageRating"
}]
},
{"filter": "(datum.IMDB_Rating - datum.AverageRating) > 2.5"}
],
"layer": [
{
"mark": "bar",
"encoding": {
"x": {
"field": "IMDB_Rating", "type": "quantitative",
"axis": {"title": "IMDB Rating"}
},
"y": {"field": "Title", "type": "ordinal"}
}
},
{
"mark": {"type": "rule", "color": "red"},
"encoding": {
"x": {
"aggregate": "average",
"field": "AverageRating",
"type": "quantitative"
}
}
}
]
}
56 changes: 56 additions & 0 deletions examples/specs/joinaggregate_mean_difference_by_year.vl.json
@@ -0,0 +1,56 @@
{
"$schema": "https://vega.github.io/schema/vega-lite/v3.json",
"description": "Bar graph showing the best films for the year they were produced, where best is defined by at least 2.5 points above average for that year. The red point shows the average rating for a film in that year, and the bar is the rating that the film recieved.",
"data": {
"url": "data/movies.json",
"format": {
"parse": {"Release_Date": "date:'%d-%b-%y'"}
}
},
"transform": [
{"filter": "datum.IMDB_Rating != null"},
{"timeUnit": "year", "field": "Release_Date", "as": "year"},
{
"joinaggregate": [{
"op": "mean",
"field": "IMDB_Rating",
"as": "AverageYearRating"
}],
"groupby": [
"year"
]
},
{
"filter": "(datum.IMDB_Rating - datum.AverageYearRating) > 2.5"
}
],
"layer": [{
"mark": {"type": "bar", "clip": true},
"encoding": {
"x": {
"field": "IMDB_Rating",
"type": "quantitative",
"axis": {"title": "IMDB Rating"}
},
"y": {
"field": "Title",
"type": "ordinal"
}
}
},
{
"mark": "tick",
"encoding": {
"x": {
"field": "AverageYearRating",
"type": "quantitative"
},
"y": {
"field": "Title",
"type": "ordinal"
},
"color": {"value": "red"}
}
}
]
}
41 changes: 41 additions & 0 deletions examples/specs/joinaggregate_percent_of_total.vl.json
@@ -0,0 +1,41 @@
{
"$schema": "https://vega.github.io/schema/vega-lite/v3.json",
"description": "A bar graph showing what activites consume what percentage of the day.",
"data": {
"values": [
{"Activity": "Sleeping","Time": 8},
{"Activity": "Eating","Time": 2},
{"Activity": "TV","Time": 4},
{"Activity": "Work","Time": 8},
{"Activity": "Exercise","Time": 2}
]
},
"transform": [{
"joinaggregate": [{
"op": "sum",
"field": "Time",
"as": "TotalTime"
}]
},
{
"calculate": "datum.Time/datum.TotalTime * 100",
"as": "PercentOfTotal"
}],
"mark": "bar",
"encoding": {
"x": {
"field": "PercentOfTotal",
"type": "quantitative",
"axis": {
"title": "% of total Time"
}
},
"y": {
"field": "Activity",
"type": "nominal",
"scale": {
"rangeStep": 12
}
}
}
}
37 changes: 37 additions & 0 deletions examples/specs/joinaggregate_residual_graph.vl.json
@@ -0,0 +1,37 @@
{
"$schema": "https://vega.github.io/schema/vega-lite/v3.json",
"description": "A dot plot showing each movie in the database, and the difference from the average movie rating. The display is sorted by year to visualize everything in sequential order. The graph is for all Movies before 2019.",
"data": {
"url": "data/movies.json",
"format": {
"parse": {"Release_Date": "date:'%d-%b-%y'"}
}
},
"transform": [
{"filter": "datum.IMDB_Rating != null"},
{"filter": {"timeUnit": "year", "field": "Release_Date", "range": [null, 2019]}},
{
"joinaggregate": [{
"op": "mean",
"field": "IMDB_Rating",
"as": "AverageRating"
}]
},
{
"calculate": "datum.IMDB_Rating - datum.AverageRating",
"as": "RatingDelta"
}
],
"mark": "point",
"encoding": {
"x": {
"field": "Release_Date",
"type": "temporal"
},
"y": {
"field": "RatingDelta",
"type": "quantitative",
"axis": {"title": "Rating Delta"}
}
}
}
5 changes: 1 addition & 4 deletions examples/specs/window_mean_difference.vl.json
Expand Up @@ -9,10 +9,7 @@
"field": "IMDB_Rating",
"as": "AverageRating"
}],
"frame": [
null,
null
]
"frame": [null, null]
},
{"filter": "(datum.IMDB_Rating - datum.AverageRating) > 2.5"}
],
Expand Down
8 changes: 4 additions & 4 deletions examples/specs/window_mean_difference_by_year.vl.json
Expand Up @@ -12,12 +12,12 @@
{"timeUnit": "year", "field": "Release_Date", "as": "year"},
{
"window": [{
"op": "mean",
"field": "IMDB_Rating",
"as": "AverageYearRating"
"op": "mean",
"field": "IMDB_Rating",
"as": "AverageYearRating"
}],
"groupby": [
"year"
"year"
],
"frame": [null, null]
},
Expand Down
6 changes: 3 additions & 3 deletions examples/specs/window_percent_of_total.vl.json
Expand Up @@ -12,9 +12,9 @@
},
"transform": [{
"window": [{
"op": "sum",
"field": "Time",
"as": "TotalTime"
"op": "sum",
"field": "Time",
"as": "TotalTime"
}],
"frame": [null, null]
},
Expand Down
6 changes: 3 additions & 3 deletions examples/specs/window_residual_graph.vl.json
Expand Up @@ -12,9 +12,9 @@
{"filter": {"timeUnit": "year", "field": "Release_Date", "range": [null, 2019]}},
{
"window": [{
"op": "mean",
"field": "IMDB_Rating",
"as": "AverageRating"
"op": "mean",
"field": "IMDB_Rating",
"as": "AverageRating"
}],
"frame": [null, null]
},
Expand Down
78 changes: 78 additions & 0 deletions site/docs/transform/joinaggregate.md
@@ -0,0 +1,78 @@
---
layout: docs
menu: docs
title: Join Aggregate
permalink: /docs/joinaggregate.html
---

The joinaggregate transform extends the input data objects with aggregate values. Aggregation is performed and the results are then joined with the input data. This transform can be helpful for creating derived values that combine both raw data and aggregate calculations, such as percentages of group totals. This transform is a special case of the [window](window.html) transform where the `frame` is always `[null, null]`.

## Documentation Overview

{:.no_toc}

<!-- prettier-ignore -->
- TOC
{:toc}

## Join Aggregate Field Definition

{: .suppress-error}

```json
// A View Specification
{
...
"transform": [
{
// Join Aggregate Transform
"joinaggregate": [{
"op": ...,
"field": ...,
"as": ...
}],
"groupby": [
"..."
]
}
...
],
...
}
```

## Join Aggregate Transform Definition

{% include table.html props="joinaggregate,frame,ignorePeers,groupby,sort" source="JoinAggregateTransform" %}

{:#field-def}

### Join Aggregate Transform Field Definition

{% include table.html props="op,field,as" source="JoinAggregateFieldDef" %}

{:#ops}

## Examples

Below are some common use cases for the join aggregate transform.

### Percent of Total

Here we use the join aggregate transform to derive the global sum so that we can calculate percentage.

<div class="vl-example" data-name="joinaggregate_percent_of_total"></div>

### Difference from Mean

One example is to show the "exemplar" movies from a movie collection. Here "exemplar" is defined by having a score of 2.5 points higher than the global average.

<div class="vl-example" data-name="joinaggregate_mean_difference"></div>

Another example is to show the "exemplar" movies based on the release year average. Here "exemplar" is defined by having a score 2.5 points higher than the annual average for its release year (instead of the global average).

<div class="vl-example" data-name="joinaggregate_mean_difference_by_year"></div>

Rather than filtering the above two examples we can also calculate a residual by deriving the mean using the join aggregate transform first.

<div class="vl-example" data-name="joinaggregate_residual_graph"></div>
44 changes: 24 additions & 20 deletions site/docs/transform/window.md
Expand Up @@ -5,7 +5,7 @@ title: Window
permalink: /docs/window.html
---

The window transform performs calculations over sorted groups of data objects. These calculations including ranking, lead/lag analysis, and aggregates such as running sums and averages. Calculated values are written back to the input data stream.
The window transform performs calculations over sorted groups of data objects. These calculations including ranking, lead/lag analysis, and aggregates such as running sums and averages. Calculated values are written back to the input data stream. If you only want to set the same aggregated value in a new field, you can use the simpler [join aggregate](joinaggregate.html) transform.

## Documentation Overview

Expand All @@ -29,7 +29,7 @@ The window transform performs calculations over sorted groups of data objects. T
"window": [{
"op": ...,
"field": ...,
"param": ...
"param": ...,
"as": ...
}],
"sort": [
Expand Down Expand Up @@ -95,6 +95,28 @@ Here we use the window transform with `frame: [null, 0]` to accumulate count in

**See also:** [layered histogram and cumulative histogram](../examples/layer_cumulative_histogram.html)

### Rank Chart

We can also use `rank` operator to calculate ranks over time.

<div class="vl-example" data-name="window_rank"></div>

### Top K

Here we use window transform to derive the total number of students along with the rank of the current student to determine the top K students and display their score.

<div class="vl-example" data-name="window_top_k"></div>

### Cumulative Running Average

Here we use window transform to visualize how the average MPG for vehicles have changed over the years.

<div class="vl-example" data-name="window_cumulative_running_average"></div>

## Additional Examples

These are examples of window transforms that can be simplifies with the join aggregate transform. Please refer to the [join aggregate examples](joinaggregate.html#examples).

### Percent of Total

Here we use the window transform to derive the global sum so that we can calculate percentage.
Expand All @@ -114,21 +136,3 @@ Another example is to show the "exemplar" movies based on the release year avera
Rather than filtering the above two examples we can also calculate a residual by deriving the mean using the window transform first.

<div class="vl-example" data-name="window_residual_graph"></div>

### Rank Chart

We can also use `rank` operator to calculate ranks over time.

<div class="vl-example" data-name="window_rank"></div>

### Top K

Here we use window transform to derive the total number of students along with the rank of the current student to determine the top K students and display their score.

<div class="vl-example" data-name="window_top_k"></div>

### Cumulative Running Average

Here we use window transform to visualize how the average MPG for vehicles have changed over the years.

<div class="vl-example" data-name="window_cumulative_running_average"></div>
2 changes: 2 additions & 0 deletions src/compile/data/assemble.ts
Expand Up @@ -15,6 +15,7 @@ import {GeoJSONNode} from './geojson';
import {GeoPointNode} from './geopoint';
import {IdentifierNode} from './identifier';
import {ImputeNode} from './impute';
import {JoinAggregateTransformNode} from './joinaggregate';
import {LookupNode} from './lookup';
import {SampleTransformNode} from './sample';
import {SourceNode} from './source';
Expand Down Expand Up @@ -89,6 +90,7 @@ function makeWalkTree(data: VgData[]) {
node instanceof AggregateNode ||
node instanceof LookupNode ||
node instanceof WindowTransformNode ||
node instanceof JoinAggregateTransformNode ||
node instanceof FoldTransformNode ||
node instanceof FlattenTransformNode ||
node instanceof IdentifierNode ||
Expand Down

0 comments on commit e075987

Please sign in to comment.