Skip to content

Commit

Permalink
fix(core): bundle the genome assemblies into the JavaScript library
Browse files Browse the repository at this point in the history
Built-in genome assemblies were loaded from https://genomespy.app/. This is not a good idea
because it makes the library dependent on the availability of the server. Instead, the genome
assemblies are now bundled into the JavaScript library. Custom genomes are still supported
but the way they are specified has changed a bit.
  • Loading branch information
tuner committed Apr 15, 2024
1 parent aa34a6a commit 80d1645
Show file tree
Hide file tree
Showing 6 changed files with 320 additions and 50 deletions.
35 changes: 20 additions & 15 deletions docs/genomic-data/genomic-coordinates.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,42 +27,47 @@ name of the assembly to the top level view specification:

## Supported genomes

By default, GenomeSpy loads genomes from the _genomespy.app_ website. The
following assemblies are provided: `"hg38"`, `"hg19"`, `"hg18"`, `"mm10"`,
`"mm9"`, and `"dm6"`.
GenomeSpy bundles a few common built-in genome assemblies: `"hg38"`, `"hg19"`,
`"hg18"`, `"mm10"`, `"mm9"`, and `"dm6"`.

## Custom genomes

At minimum, a custom genome needs a list of contigs and their sizes, which
can be loaded from a `"chrom.sizes"` file or provided within the specification.
Custom genome assemblies can be provided in two ways: as a `chrom.sizes` file or within the
the specification.

### As files
### As a `chrom.sizes` file

The `baseUrl` property specifies the location of genomes:
The `chrom.sizes` file is a two-column text file with the chromosome names and
their sizes. You may want to use the UCSC Genome Browser's
[fetchChromSizes](http://hgdownload.soe.ucsc.edu/admin/exe/linux.x86_64/fetchChromSizes)
script to download the sizes for a genome assembly. GenomeSpy does not filter
out any alternative contigs or haplotypes, so you may want to preprocess the
file before using it.

Example:

```json
{
"genome": {
"name": "hg99",
"baseUrl": "https://your.site/genomes/"
"name": "hg19",
"url": "https://genomespy.app/data/genomes/hg19/chrom.sizes"
},
...
}
```

The directory must have the following structure:

```
hg99/hg99.chrom.sizes
```
### Within the specification

### Within the Specification
You can provide the genome assembly directly in the specification using the
`contigs` property. The contigs are an array of objects with the `name` and
`size` properties.

Example:

```json
{
"genome": {
"name": "dm6",
"contigs": [
{"name": "chr3R", "size": 32079331 },
{"name": "chr3L", "size": 28110227 },
Expand Down
94 changes: 66 additions & 28 deletions packages/core/src/genome/genome.js
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import { bisect } from "d3-array";
import { tsvParseRows } from "d3-dsv";
import { loader } from "vega-loader";
import { isObject } from "vega-util";
import { formatRange } from "./locusFormat.js";

const defaultBaseUrl = "https://genomespy.app/data/genomes/";
import { getContigs } from "./genomes.js";
import { concatUrl } from "../utils/url.js";

/**
* @typedef {import("../spec/genome.js").GenomeConfig} GenomeConfig
Expand Down Expand Up @@ -33,11 +32,17 @@ export default class Genome {
* @param {GenomeConfig} config
*/
constructor(config) {
this.config = config;
this.config = { name: "custom", ...config };

if ("baseUrl" in config) {
throw new Error(
"The `baseUrl` property in genome config has been removed in GenomeSpy v0.52.0. Use `url` instead. See https://genomespy.app/docs/genomic-data/genomic-coordinates/."
);
}

if (!this.config.contigs && typeof this.config.name !== "string") {
if (!isGenomeConfig(config)) {
throw new Error(
"No name has been defined for the genome assembly!"
"Not a genome configuration: " + JSON.stringify(config)
);
}

Expand All @@ -55,8 +60,19 @@ export default class Genome {

this.totalSize = 0;

if (this.config.contigs) {
if (isInlineGenomeConfig(this.config)) {
this.setChromSizes(this.config.contigs);
} else if (isUrlGenomeConfig(this.config)) {
// Nop
} else {
const contigs = getContigs(this.config.name);
if (contigs) {
this.setChromSizes(contigs);
} else {
throw new Error(
`Unknown genome: ${this.config.name}. Please provide contigs or a URL. See https://genomespy.app/docs/genomic-data/genomic-coordinates/.`
);
}
}
}

Expand All @@ -68,28 +84,21 @@ export default class Genome {
* @param {string} baseUrl
*/
async load(baseUrl) {
if (this.config.contigs) {
if (!isUrlGenomeConfig(this.config)) {
return;
}

if (this.config.baseUrl) {
this.baseUrl = /^http(s)?/.test(this.config.baseUrl)
? this.config.baseUrl
: baseUrl + "/" + this.config.baseUrl;
} else {
this.baseUrl = defaultBaseUrl;
}

try {
this.setChromSizes(
parseChromSizes(
await loader({ baseURL: this.baseUrl }).load(
`${this.config.name}/${this.name}.chrom.sizes`
)
)
);
const fullUrl = concatUrl(baseUrl, this.config.url);
const result = await fetch(fullUrl);
if (!result.ok) {
throw new Error(`${result.status} ${result.statusText}`);
}
this.setChromSizes(parseChromSizes(await result.text()));
} catch (e) {
throw new Error(`Could not load chrom sizes: ${e.message}`);
throw new Error(
`Could not load chrom sizes: ${this.config.url}. Reason: ${e.message}`
);
}
}

Expand Down Expand Up @@ -354,10 +363,10 @@ export default class Genome {
* @param {string} chromSizesData
*/
export function parseChromSizes(chromSizesData) {
// TODO: Support other organisms too
return tsvParseRows(chromSizesData)
.filter((row) => /^chr[0-9A-Z]+$/.test(row[0]))
.map(([name, size]) => ({ name, size: parseInt(size) }));
return tsvParseRows(chromSizesData).map(([name, size]) => ({
name,
size: parseInt(size),
}));
}

/**
Expand All @@ -377,3 +386,32 @@ export function isChromosomalLocus(value) {
export function isChromosomalLocusInterval(value) {
return value.every(isChromosomalLocus);
}

/**
* @param {any} value
* @returns {value is GenomeConfig}
*/
export function isGenomeConfig(value) {
return (
isObject(value) &&
("name" in value ||
isUrlGenomeConfig(value) ||
isInlineGenomeConfig(value))
);
}

/**
* @param {any} value
* @returns {value is import("../spec/genome.js").UrlGenomeConfig }
*/
export function isUrlGenomeConfig(value) {
return isGenomeConfig(value) && "url" in value;
}

/**
* @param {any} value
* @returns {value is import("../spec/genome.js").InlineGenomeConfig}
*/
export function isInlineGenomeConfig(value) {
return isGenomeConfig(value) && "contigs" in value;
}
44 changes: 43 additions & 1 deletion packages/core/src/genome/genome.test.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { describe, expect, test } from "vitest";
import { afterAll, describe, expect, test, vi } from "vitest";
import Genome from "./genome.js";

describe("Human genome, chromosome names prefixed with 'chr'", () => {
Expand Down Expand Up @@ -190,6 +190,48 @@ describe("C. elegans genome, chromosome names prefixed with 'chr'", () => {
});
});

describe("Load chrom.sizes file from a URL", () => {
const fetchSpy = vi.spyOn(global, "fetch");
fetchSpy.mockImplementation((/** @type {string} */ url) => {
if (url !== "http://example.com/chrom.sizes") {
throw new Error(`Unexpected URL: ${url}`);
}
return Promise.resolve(
// @ts-expect-error
{
text() {
return Promise.resolve(
"chr1\t1000\nchr2\t2000\nchr3\t3000\nchrX\t4000"
);
},
ok: true,
}
);
});

afterAll(() => {
fetchSpy.mockRestore();
});

test("Throw if the deprecated baseUrl property is provided", () => {
expect(
() =>
new Genome({
name: "random",
// @ts-expect-error
baseUrl: "http://example.com",
})
).toThrow(/removed/);
});

test("Loads and parses a genome", async () => {
const g = new Genome({ name: "random", url: "chrom.sizes" });
await g.load("http://example.com");

expect(g.parseInterval("chr2")).toEqual([1000, 3000]);
});
});

describe("Parse interval strings", () => {
const chromosomes = [
{ name: "chr1", size: 1000 },
Expand Down

0 comments on commit 80d1645

Please sign in to comment.