Skip to content

Commit

Permalink
Merge pull request #611 from python-rope/lieryan-implement-json-datafile
Browse files Browse the repository at this point in the history
Implement JSON DataFile serialization
  • Loading branch information
lieryan committed Dec 28, 2022
2 parents 93fc5bc + 89ee77f commit b38d82e
Show file tree
Hide file tree
Showing 6 changed files with 385 additions and 3 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- #626 Install pre-commit hooks on rope repository (@lieryan)
- #548 Implement MoveGlobal using string as destination module names (@lieryan)
- #627 Fix parsing of octal literal (@lieryan)
- #611 Implement JSON DataFile serialization (@lieryan)

# Release 1.6.0

Expand Down
14 changes: 12 additions & 2 deletions rope/base/oi/memorydb.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from rope.base import utils
from rope.base.oi import objectdb
from rope.base.serializer import json_to_python, python_to_json


class MemoryDB(objectdb.FileDict):
Expand Down Expand Up @@ -115,7 +116,16 @@ def add_call(self, parameters, returned):
self.call_info[parameters] = returned

def __getstate__(self):
return (self.call_info, self.per_name)
original_data = (self.call_info, self.per_name)
encoded = python_to_json(original_data, version=2)
encoded["$"] = "ScopeInfo"
return encoded

def __setstate__(self, data):
self.call_info, self.per_name = data
if isinstance(data, tuple) and len(data) == 2:
# legacy pickle-based serialization
self.call_info, self.per_name = data
else:
# new serialization
assert data["$"] == "ScopeInfo"
self.call_info, self.per_name = json_to_python(data)
7 changes: 6 additions & 1 deletion rope/base/project.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
import os
import sys
import warnings
from contextlib import ExitStack
from typing import Optional

import rope.base.fscommands # Use full qualification for clarity.
Expand Down Expand Up @@ -393,8 +395,11 @@ def read_data(self, name):
def write_data(self, name, data):
if self.project.ropefolder is not None:
file = self._get_file(name)
with open(file.real_path, "wb") as output_file:
with ExitStack() as cm:
output_file = cm.enter_context(open(file.real_path, "wb"))
output_file2 = cm.enter_context(open(file.real_path + ".json", "w"))
pickle.dump(data, output_file, 2)
json.dump(data, output_file2, default=lambda o: o.__getstate__())

def add_write_hook(self, hook):
self.hooks.append(hook)
Expand Down
145 changes: 145 additions & 0 deletions rope/base/serializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
"""
This module serves to convert a data structure composed of Python primitives
(dict, list, tuple, int, str, None) to JSON-serializable primitives (object,
array, number, str, null).
A core feature of this serializer is that the produced will round-trip to
identical objects when deserialized by the standard library json module.
In other words, this property always holds:
>>> original_data = ... any JSON ...
>>> encoded = python_to_json(original_data)
>>> serialized = json.dumps(encoded)
>>> decoded = json.loads(serialized)
>>> rehydrated_data = json_to_python(decoded)
>>> assert rehydrated_data == original_data
>>> assert encoded == decoded
Couple challenges in straight serialization that this module helps resolve:
- json.dumps() maps both Python list and tuple to JSON array. This module
provides two variants:
- In version=1, this module converts Python list `[1, 2, 3]` as-is and
converts Python tuple `(1, 2, 3)` to special object construct
`{"$": "t", "items": [1, 2, 3]}`
- In version=2, it is the other way around, this module converts Python tuple
`(1, 2, 3)` as-is and converts Python list `[1, 2, 3]` to special object
construct `{"$": "l", "items": [1, 2, 3]}`
- Python dict keys can be a tuple/dict, but JSON Object keys must be strings
This module replaces all `dict` keys with `refid` which can be resolved using
the `encoded["references"][refid]` lookup table. Except there's a small
optimisation, if the dict key is a string that isn't only numeric, which is
encoded directly into the object.
- Python dict keys cannot be another dict because it is unhashable, therefore
there's no encoding for having objects as keys either.
- There is currently no support for floating point numbers.
Note that `json_to_python` only accepts Python objects that can be the output
of `python_to_json`, there is NO guarantee for going the other way around. This
may or may not work:
>>> python_to_json(json_to_python(original_data)) == original_data
"""


def python_to_json(o, version=1):
if version not in (1, 2):
raise ValueError(f"Unexpected version {version}")
references = []
result = {
"v": version,
"data": _py2js(o, references, version=version),
"references": references,
}
if not result["references"]:
del result["references"]
return result


def json_to_python(o):
version = o["v"]
if version not in (1, 2):
raise ValueError(f"Unexpected version {version}")
references = o.get("references", {})
data = _js2py(o["data"], references, version)
return data


def _py2js(o, references, version):
if isinstance(o, (str, int)) or o is None:
return o
elif isinstance(o, tuple):
if version == 1:
return {
"$": "t",
"items": [_py2js(item, references, version) for item in o],
}
else:
return [_py2js(item, references, version) for item in o]
elif isinstance(o, list):
if version == 2:
return {
"$": "l",
"items": [_py2js(item, references, version) for item in o],
}
else:
return [_py2js(item, references, version) for item in o]
elif isinstance(o, dict):
result = {}
for pykey, pyvalue in o.items():
if pykey == "$":
raise ValueError('dict cannot contain reserved key "$"')
if isinstance(pykey, str) and not pykey.isdigit():
result[pykey] = _py2js(pyvalue, references, version)
else:
assert isinstance(pykey, (str, int, tuple)) or pykey is None
assert not isinstance(pykey, list)
refid = len(references)
references.append(_py2js(pykey, references, version))
result[str(refid)] = _py2js(pyvalue, references, version)
return result
raise TypeError(f"Object of type {type(o)} is not allowed {o}")


def _js2py(o, references, version):
if isinstance(o, (str, int)) or o is None:
return o
elif isinstance(o, list):
if version == 1:
return list(_js2py(item, references, version) for item in o)
elif version == 2:
return tuple(_js2py(item, references, version) for item in o)
raise ValueError(f"Unexpected version {version}")
elif isinstance(o, dict):
result = {}
if "$" in o:
if o["$"] == "t":
assert version == 1
data = o["items"]
return tuple(_js2py(item, references, version) for item in data)
elif o["$"] == "l":
assert version == 2
data = o["items"]
return list(_js2py(item, references, version) for item in data)
raise TypeError(f'Unrecognized object of type: {o["$"]} {o}')
else:
for refid, jsvalue in o.items():
assert isinstance(refid, str)
if refid.isdigit():
refid = int(refid)
assert 0 <= refid < len(references)
jskey = references[refid]
pyvalue = _js2py(jsvalue, references, version)
pykey = _js2py(jskey, references, version)
result[pykey] = pyvalue
else:
result[refid] = _js2py(jsvalue, references, version)
return result
raise TypeError(f'Object of type "{type(o).__name__}" is not allowed {o}')
54 changes: 54 additions & 0 deletions ropetest/objectdbtest.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,3 +146,57 @@ def test_using_file_list_observer(self, db):
db.add_file_list_observer(observer)
db.validate_files()
self.assertEqual("removed invalid ", observer.log)

@_do_for_all_dbs
def test_legacy_serialization(self, db):
import pickle

db.add_callinfo("file", "key", (1, 2), 3)
db.add_pername("file", "key", "name", 1)
scope_info = db._get_scope_info("file", "key")

pickled_data = b'\x80\x04\x95D\x00\x00\x00\x00\x00\x00\x00\x8c\x15rope.base.oi.memorydb\x94\x8c\tScopeInfo\x94\x93\x94)\x81\x94}\x94K\x01K\x02\x86\x94K\x03s}\x94\x8c\x04name\x94K\x01s\x86\x94b.' # noqa

assert pickle.loads(pickled_data).call_info == scope_info.call_info
assert pickle.loads(pickled_data).per_name == scope_info.per_name

@_do_for_all_dbs
def test_new_pickle_serialization(self, db):
import pickle

db.add_callinfo("file", "key", (1, 2), 3)
db.add_pername("file", "key", "name", 1)
scope_info = db._get_scope_info("file", "key")

serialized = pickle.dumps(scope_info)

rehydrated_data = pickle.loads(serialized)
assert rehydrated_data.call_info == scope_info.call_info
assert rehydrated_data.per_name == scope_info.per_name

@_do_for_all_dbs
def test_new_json_serialization(self, db):
import json

from rope.base.oi.memorydb import ScopeInfo

db.add_callinfo("file", "key", (1, 2), 3)
db.add_pername("file", "key", "name", 1)
scope_info = db._get_scope_info("file", "key")

data = {"inside": [scope_info], "other": scope_info, "things": [1, 2, 3]}

def object_hook(o):
if o.get("$") == "ScopeInfo":
new_o = ScopeInfo.__new__(ScopeInfo)
new_o.__setstate__(o)
return new_o
return o

serialized = json.dumps(data, default=lambda o: o.__getstate__())
rehydrated_data = json.loads(serialized, object_hook=object_hook)

rehydrated_scope_info = rehydrated_data["inside"][0]
assert isinstance(rehydrated_scope_info, ScopeInfo)
assert rehydrated_scope_info.call_info == scope_info.call_info
assert rehydrated_scope_info.per_name == scope_info.per_name

0 comments on commit b38d82e

Please sign in to comment.