Skip to content

Commit

Permalink
Merge pull request #955 from papillot/v3000
Browse files Browse the repository at this point in the history
V3000 support
  • Loading branch information
ppillot committed Jan 20, 2023
2 parents 29f89f7 + db6b3b5 commit 3f0fd8e
Show file tree
Hide file tree
Showing 3 changed files with 266 additions and 25 deletions.
124 changes: 100 additions & 24 deletions src/parser/sdf-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,37 +43,74 @@ class SdfParser extends StructureParser {
const ap2 = s.getAtomProxy()

let idx = 0
let lineNo = 0
let lineNo = 0 // for V2000: current line number in currently parsed Mol file
let modelIdx = 0
let modelAtomIdxStart = 0

const sdfData: {[k: string]: string[]}[] = []
let currentItem: string|boolean = false
let currentData: {[k: string]: string[]} = {}
let mItem
let mItem: RegExpMatchArray | null
s.extraData.sdf = sdfData

let atomCount, bondCount, atomStart: number, atomEnd: number, bondStart: number, bondEnd: number
let atomCount, bondCount, atomStart: number, atomEnd: number, bondStart: number, bondEnd: number, x: number, y: number, z: number, atomname: string, element: string, atomindex: number, order: number
let isV3000 = false, isAtomBlock = false, isBondBlock = false
let tokens: string[] = [], acc: string[] = []
const atomindexToStoreindex = new Map<number, number>()

function _parseChunkOfLines (_i: number, _n: number, lines: string[]) {
for (let i = _i; i < _n; ++i) {
const line = lines[ i ]

if (isV3000 && line) {
tokens = line.substring(7).split(' ')

// Entity properties may extend over multiple lines (hanging line finishes with '-')
// Tokens are accumulated to be processed at the same time for a given entity
if (acc.length) {
tokens = [...acc, ...tokens]
acc = []
}
if (tokens[tokens.length - 1] === '-') {
tokens.pop();
acc = tokens;
continue;
}
}

if (line.substr(0, 4) === '$$$$') {
lineNo = -1
++modelIdx
modelAtomIdxStart = atomStore.count
sdfData.push(currentData)
currentData = {}
currentItem = false
isV3000 = false
} else if (lineNo === 3) {
atomCount = parseInt(line.substr(0, 3))
bondCount = parseInt(line.substr(3, 3))
isV3000 = line.indexOf(' V3000') > -1

if (isV3000) {
atomindexToStoreindex.clear()
} else {

atomCount = parseInt(line.substr(0, 3))
bondCount = parseInt(line.substr(3, 3))

atomStart = 4
atomEnd = atomStart + atomCount
bondStart = atomEnd
bondEnd = bondStart + bondCount
atomStart = 4
atomEnd = atomStart + atomCount
bondStart = atomEnd
bondEnd = bondStart + bondCount

if (asTrajectory) {
currentCoord = 0
currentFrame = new Float32Array(atomCount * 3)
frames.push(currentFrame)

if (modelIdx > 0) doFrames = true
}
}
} else if (isV3000 && tokens[0] === 'COUNTS') {
atomCount = parseInt(tokens[1]);

if (asTrajectory) {
currentCoord = 0
Expand All @@ -82,12 +119,45 @@ class SdfParser extends StructureParser {

if (modelIdx > 0) doFrames = true
}
} else if (lineNo >= atomStart && lineNo < atomEnd) {
} else if (isV3000 && tokens.length == 2) {
if (tokens[1] === 'ATOM') {
if (tokens[0] === 'BEGIN') isAtomBlock = true
else if (tokens[0] === 'END') isAtomBlock = false
} else if (tokens[1] === 'BOND') {
if (tokens[0] === 'BEGIN') isBondBlock = true
else if (tokens[0] === 'END') isBondBlock = false
}
} else if (
isAtomBlock
|| (!isV3000 && lineNo >= atomStart && lineNo < atomEnd)
) {
if (firstModelOnly && modelIdx > 0) continue

const x = parseFloat(line.substr(0, 10))
const y = parseFloat(line.substr(10, 10))
const z = parseFloat(line.substr(20, 10))
let charge = 0
if (isV3000) {
x = parseFloat(tokens[2])
y = parseFloat(tokens[3])
z = parseFloat(tokens[4])

element = tokens[1]
atomindex = parseInt(tokens[0])
atomindexToStoreindex.set(atomindex, idx)
atomname = element + atomindex

if (tokens.length > 6) {
let chgTok = tokens.slice(6).find(t => t.indexOf('CHG=') === 0);
if (chgTok) {
charge = parseInt(chgTok.substring(4))
}
}
} else {
x = parseFloat(line.substr(0, 10))
y = parseFloat(line.substr(10, 10))
z = parseFloat(line.substr(20, 10))

element = line.substr(31, 3).trim()
atomname = element + (idx - modelAtomIdxStart + 1)
}

if (asTrajectory) {
const j = currentCoord * 3
Expand All @@ -101,31 +171,37 @@ class SdfParser extends StructureParser {
if (doFrames) continue
}

const element = line.substr(31, 3).trim()
const atomname = element + (idx - modelAtomIdxStart + 1)

atomStore.growIfFull()
atomStore.atomTypeId[ idx ] = atomMap.add(atomname, element)

atomStore.x[ idx ] = x
atomStore.y[ idx ] = y
atomStore.z[ idx ] = z
atomStore.serial[ idx ] = idx
atomStore.formalCharge[ idx ] = 0
atomStore.serial[ idx ] = isV3000 ? atomindex : idx
atomStore.formalCharge[ idx ] = charge

sb.addAtom(modelIdx, '', '', 'HET', 1, 1)

idx += 1
} else if (lineNo >= bondStart && lineNo < bondEnd) {
} else if (
isBondBlock
|| (!isV3000 && lineNo >= bondStart && lineNo < bondEnd)
) {
if (firstModelOnly && modelIdx > 0) continue
if (asTrajectory && modelIdx > 0) continue

ap1.index = parseInt(line.substr(0, 3)) - 1 + modelAtomIdxStart
ap2.index = parseInt(line.substr(3, 3)) - 1 + modelAtomIdxStart
const order = parseInt(line.substr(6, 3))
if (isV3000) {
ap1.index = atomindexToStoreindex.get(parseInt(tokens[2]))
ap2.index = atomindexToStoreindex.get(parseInt(tokens[3]))
order = parseInt(tokens[1])
} else {
ap1.index = parseInt(line.substr(0, 3)) - 1 + modelAtomIdxStart
ap2.index = parseInt(line.substr(3, 3)) - 1 + modelAtomIdxStart
order = parseInt(line.substr(6, 3))
}

s.bondStore.addBond(ap1, ap2, order)
} else if (line.match(/M {2}CHG/)) {
} else if (line.substr(0, 6) === 'M CHG') {
const chargeCount = parseInt(line.substr(6, 3))
for (let ci = 0, coffset = 10; ci < chargeCount; ++ci, coffset += 8) {
const aToken = parseInt(line.substr(coffset, 3))
Expand All @@ -134,7 +210,7 @@ class SdfParser extends StructureParser {
atomStore.formalCharge[ atomIdx ] = cToken
}
// eslint-disable-next-line no-cond-assign
} else if (mItem = line.match(reItem)) {
} else if (line.charAt(0) === '>' && (mItem = line.match(reItem))) {
currentItem = mItem[ 1 ]
currentData[ currentItem ] = []
} else if (currentItem !== false && line) {
Expand Down
130 changes: 130 additions & 0 deletions test/data/v3000.sdf
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@

SciTegic04161911273D

0 0 0 0 0 0 999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 23 24 0 0 0
M V30 BEGIN ATOM
M V30 1 C 3.69653 -0.53111 -0.07569 0
M V30 2 N 4.83107 -1.1738 0.63455 0
M V30 3 O 2.48284 -1.4333 -0.00289 0
M V30 4 C 4.08798 -0.11703 -1.56721 0
M V30 5 C 4.71272 -2.28965 1.40283 0
M V30 6 N 6.07017 -0.66124 0.69861 0
M V30 7 C 1.20743 -0.69735 -0.00008 0
M V30 8 C 4.42492 -1.30971 -2.54228 0
M V30 9 O 2.98159 0.67208 -2.19303 0
M V30 10 N 5.94489 -2.48273 1.97545 0
M V30 11 C 6.77991 -1.48028 1.54171 0
M V30 12 C 1.20809 0.69256 0.00012 0
M V30 13 C 0.00395 -1.39238 -0.00028 0
M V30 14 C 4.79438 -0.70993 -3.90963 0
M V30 15 C 3.20101 -2.22546 -2.73765 0
M V30 16 C 5.61846 -2.15207 -2.05702 0
M V30 17 C 0.00465 1.38756 0.00012 0
M V30 18 C -1.19951 -0.69741 -0.00028 0
M V30 19 C -1.19887 0.69232 -0.00008 0
M V30 20 C -2.5099 1.45264 -0.00006 0
M V30 21 N -2.74496 2.76473 -0.00015 0 -
M V30 CHG=1
M V30 22 C -1.69103 3.82223 -0.00033 0
M V30 23 O -4.08697 3.18583 -0.0001 0 CHG=-1
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 1 1 3
M V30 3 1 1 4
M V30 4 1 2 5
M V30 5 1 2 6
M V30 6 1 3 7
M V30 7 1 4 8
M V30 8 1 4 9
M V30 9 2 5 10
M V30 10 2 6 11
M V30 11 1 7 12
M V30 12 2 7 13
M V30 13 1 8 14
M V30 14 1 8 15
M V30 15 1 8 16
M V30 16 2 12 17
M V30 17 1 13 18
M V30 18 1 17 19
M V30 19 1 19 20
M V30 20 2 20 21
M V30 21 1 21 22
M V30 22 1 21 23
M V30 23 1 10 11
M V30 24 2 18 19
M V30 END BOND
M V30 END CTAB
M END
> <IDNUMBER>
BAS 0003964

> <CLUSTER>
19849

> <CL_SIZE>
1

> <CL_VAR>
0.0000

$$$$
CHEBI:57262
RDKit 3D

0 0 0 0 0 0 0 0 0 0999 V3000
M V30 BEGIN CTAB
M V30 COUNTS 22 21 0 0 0
M V30 BEGIN ATOM
M V30 1 P 18.2567 -5.6825 0 0
M V30 2 O 18.0395 -6.6275 0 0 CHG=-1
M V30 3 O 18.255 -4.7415 0 0
M V30 4 O 18.9453 -6.0633 0 0
M V30 5 O 17.4245 -5.9341 0 0
M V30 6 C 15.2886 -5.4953 0 0
M V30 7 C 15.9989 -5.9147 0 0
M V30 8 C 16.7174 -5.5091 0 0
M V30 9 O 14.5702 -5.9009 0 0
M V30 10 C 13.8556 -5.4886 0 0
M V30 11 O 13.8553 -4.6636 0 0
M V30 12 O 15.9909 -6.7396 0 0
M V30 13 O 21.0888 -4.8258 0 0
M V30 14 C 21.0888 -5.6508 0 0
M V30 15 C 20.3743 -6.0633 0 0
M V30 16 N 20.3743 -6.8883 0 0 CHG=1
M V30 17 C 19.6598 -5.6508 0 0
M V30 18 O 21.8033 -6.0633 0 0 CHG=-1
M V30 19 R1 13.1411 -5.9011 0 0
M V30 20 C 15.2764 -7.1521 0 0
M V30 21 O 15.2764 -7.9771 0 0
M V30 22 R2 14.562 -6.7396 0 0
M V30 END ATOM
M V30 BEGIN BOND
M V30 1 1 1 2
M V30 2 2 1 3
M V30 3 1 1 4
M V30 4 1 6 9
M V30 5 1 7 6 -
M V30 CFG=3
M V30 6 1 8 7
M V30 7 1 5 8
M V30 8 1 10 9
M V30 9 2 10 11
M V30 10 1 7 12
M V30 11 1 1 5
M V30 12 1 15 14 CFG=1
M V30 13 2 14 13
M V30 14 1 15 16
M V30 15 1 15 17
M V30 16 1 17 4
M V30 17 1 18 14
M V30 18 1 10 19
M V30 19 1 12 20
M V30 20 2 20 21
M V30 21 1 20 22
M V30 END BOND
M V30 END CTAB
M END
$$$$
37 changes: 36 additions & 1 deletion test/parser/tests-sdf-parser.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import { join } from 'path'
import * as fs from 'fs'

describe('parser/sdf-parser', function () {
describe('parsing', function () {
describe('parsing V2000', function () {
it('basic', function () {
var file = join(__dirname, '/../data/01W_ideal.sdf')
var str = fs.readFileSync(file, 'utf-8')
Expand All @@ -23,4 +23,39 @@ describe('parser/sdf-parser', function () {
})
})
})

describe('parsing V3000', function () {
it('basic', function () {
var file = join(__dirname, '/../data/v3000.sdf')
var str = fs.readFileSync(file, 'utf-8')
var streamer = new StringStreamer(str)
var sdfParser = new SdfParser(streamer)
return sdfParser.parse().then(function (structure) {
expect(structure.modelStore.count).toBe(2)
expect(structure.atomCount).toBe(45)
expect(structure.bondCount).toBe(45)
expect(structure.atomStore.count).toBe(45)
expect(structure.atomStore.formalCharge).toBeTruthy()
expect(structure.bondStore.count).toBe(45)
expect(structure.atomStore.formalCharge[20]).toBe(1)
expect(structure.atomStore.formalCharge[22]).toBe(-1)
expect(structure.atomStore.formalCharge[24]).toBe(-1)
expect(structure.atomStore.formalCharge[25]).toBe(0)
})
})
})

describe('parsing mixed V2000/V3000 sdf file', function () {
it('basic', function () {
var file = join(__dirname, '/../data/v3000.sdf')
var str = fs.readFileSync(file, 'utf-8')
file = join(__dirname, '/../data/01W_ideal.sdf')
str = str.trimRight() + '\n' + fs.readFileSync(file, 'utf-8')
var streamer = new StringStreamer(str)
var sdfParser = new SdfParser(streamer)
return sdfParser.parse().then(function (structure) {
expect(structure.modelStore.count).toBe(3)
})
})
})
})

0 comments on commit 3f0fd8e

Please sign in to comment.