[tmva][sofie-gnn] Fix on loading required packages (sonnet, graph_net…

…s) and restricting numpy version avoid trying to load sonnet and graph_nets if not installed [tmva][sofie-gnn] numpy version for sofie-gnn test should be restricted within <=1.19 or >=1.24 Because of the changed behavior of np.bool and similar aliases for builtin data types, we need to restrict the numpy version to the stated range for sonnet. For more information, refer here: numpy/numpy#14882 numpy/numpy#22607 fix: definition of OutputGenerated in RModel_Base [tmva][sofie-gnn] Suppress warnings for cases other than .dat file in method WriteInitializedTensorsToFile in RModel [tmva][sofie-gnn] Fix node update in GNN and size of global features in GraphIndependent [tmva][sofie-gnn] Fix node update in RModel_GNN generated code [tmva][sofie-gnn] Fix for correct size of global features in GraphIndependent fix also the way the computation of output features in RModel_GNN Fix dimension of global feature tensor during node update If the number of nodes is larger than the edges the tensor storing the global feature needs to be resize to the correct number of nodes * number of feature [tmva][sofie-gnn] Fix importing _gnn if python version is less than 3.8 Improve also gnn test and address some of the Vincenzo's comments Changes addressing comments by @vepadulano Co-authored-by: moneta <lorenzo.moneta@cern.ch>
maksgraczyk · Jan 12, 2024 · 294a211 · 294a211
1 parent 9eb0c08
commit 294a211
Show file tree

Hide file tree

Showing 28 changed files with 546 additions and 429 deletions.
diff --git a/bindings/pyroot/pythonizations/CMakeLists.txt b/bindings/pyroot/pythonizations/CMakeLists.txt
@@ -68,7 +68,7 @@ if(tmva)
 endif()
 
 if(PYTHON_VERSION_STRING_Development_Main VERSION_GREATER_EQUAL 3.8 AND dataframe)
-    list(APPEND PYROOT_EXTRA_PY3_SOURCE
+list(APPEND PYROOT_EXTRA_PY3_SOURCE
      ROOT/_pythonization/_tmva/_batchgenerator.py)
 endif()
 

diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tmva/__init__.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tmva/__init__.py
@@ -41,8 +41,8 @@ def inject_rbatchgenerator(ns):
             setattr(ns.Experimental, func_name, python_func)
 
         return ns
-    
-from ._gnn import RModel_GNN, RModel_GraphIndependent
+
+    from ._gnn import RModel_GNN, RModel_GraphIndependent
 
 hasRDF = gSystem.GetFromPipe("root-config --has-dataframe") == "yes"
 if hasRDF:

diff --git a/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tmva/_gnn.py b/bindings/pyroot/pythonizations/python/ROOT/_pythonization/_tmva/_gnn.py
@@ -14,7 +14,7 @@
 import sys
 from cppyy import gbl as gbl_namespace
 
-if sys.version_info < (3, 7):
+if sys.version_info < (3, 8):
     raise RuntimeError("GNN Pythonizations are only supported in Python3")
 
 
@@ -29,7 +29,7 @@ def getActivationFunction(model):
 
     Returns:
         The activation function enum value.
-    """    
+    """
     function = model._activation.__name__
     if function == 'relu':
         return gbl_namespace.TMVA.Experimental.SOFIE.Activation.RELU
@@ -80,7 +80,7 @@ def add_layer_norm(gin, module_layer, function_target):
     else:
         model_block = gin.globals_update_block
     axis = module_layer._axis
-    eps  = module_layer._eps 
+    eps  = module_layer._eps
     stash_type = 1
     name_x = model_block.GetFunctionBlock().GetOutputTensorNames()[0]
     name_bias = module_layer.offset.name
@@ -90,7 +90,7 @@ def add_layer_norm(gin, module_layer, function_target):
     current_output_tensors = model_block.GetFunctionBlock().GetOutputTensorNames()
     new_output_tensors = gbl_namespace.std.vector['std::string']()
     new_output_tensors.push_back(name_Y)
-    model_block.GetFunctionBlock().AddOutputTensorNameList(new_output_tensors)    
+    model_block.GetFunctionBlock().AddOutputTensorNameList(new_output_tensors)
 
 def add_weights(gin, weights, function_target):
     """
@@ -133,12 +133,12 @@ def add_aggregate_function(gin, reducer, relation):
         agg = gbl_namespace.TMVA.Experimental.SOFIE.RFunction_Mean()
         gin.createAggregateFunction[gbl_namespace.TMVA.Experimental.SOFIE.RFunction_Mean](agg, relation)
     else:
-        raise RuntimeError("Invalid aggregate function for reduction")    
+        raise RuntimeError("Invalid aggregate function for reduction")
 
 
 def add_update_function(gin, component_model, graph_type, function_target):
     """
-    Add update function for respective function target, either of nodes, edges or globals 
+    Add update function for respective function target, either of nodes, edges or globals
     based on the supplied component_model
 
     Parameters:
@@ -164,7 +164,7 @@ def add_update_function(gin, component_model, graph_type, function_target):
 
 
 
-class RModel_GNN: 
+class RModel_GNN:
     """
     Wrapper class for graph_nets' GNN model;s parsing and inference generation
 
@@ -199,21 +199,21 @@ def ParseFromMemory(graph_module, graph_data, filename = "gnn_network"):
         gin.num_global_features = len(graph_data['globals'])
 
         gin.filename = filename
-        
+
         # adding the node update function
         node_model = graph_module._node_block._node_model
-        add_update_function(gin, node_model, gbl_namespace.TMVA.Experimental.SOFIE.GraphType.GNN, 
+        add_update_function(gin, node_model, gbl_namespace.TMVA.Experimental.SOFIE.GraphType.GNN,
                                          gbl_namespace.TMVA.Experimental.SOFIE.FunctionTarget.NODES)
 
         # adding the edge update function
         edge_model = graph_module._edge_block._edge_model
-        add_update_function(gin, edge_model, gbl_namespace.TMVA.Experimental.SOFIE.GraphType.GNN, 
+        add_update_function(gin, edge_model, gbl_namespace.TMVA.Experimental.SOFIE.GraphType.GNN,
                                              gbl_namespace.TMVA.Experimental.SOFIE.FunctionTarget.EDGES)
 
         # adding the global update function
         global_model = graph_module._global_block._global_model
-        add_update_function(gin, global_model, gbl_namespace.TMVA.Experimental.SOFIE.GraphType.GNN, 
-                                             gbl_namespace.TMVA.Experimental.SOFIE.FunctionTarget.GLOBALS) 
+        add_update_function(gin, global_model, gbl_namespace.TMVA.Experimental.SOFIE.GraphType.GNN,
+                                             gbl_namespace.TMVA.Experimental.SOFIE.FunctionTarget.GLOBALS)
 
         # adding edge-node aggregate function
         add_aggregate_function(gin, graph_module._node_block._received_edges_aggregator._reducer.__qualname__, gbl_namespace.TMVA.Experimental.SOFIE.FunctionRelation.NODES_EDGES)
@@ -274,18 +274,18 @@ def ParseFromMemory(graph_module, graph_data, filename = "graph_independent_netw
 
         # adding the node update function
         node_model = graph_module._node_model._model
-        add_update_function(gin, node_model, gbl_namespace.TMVA.Experimental.SOFIE.GraphType.GraphIndependent, 
+        add_update_function(gin, node_model, gbl_namespace.TMVA.Experimental.SOFIE.GraphType.GraphIndependent,
                                          gbl_namespace.TMVA.Experimental.SOFIE.FunctionTarget.NODES)
 
         # adding the edge update function
         edge_model = graph_module._edge_model._model
-        add_update_function(gin, edge_model, gbl_namespace.TMVA.Experimental.SOFIE.GraphType.GraphIndependent, 
+        add_update_function(gin, edge_model, gbl_namespace.TMVA.Experimental.SOFIE.GraphType.GraphIndependent,
                                              gbl_namespace.TMVA.Experimental.SOFIE.FunctionTarget.EDGES)
 
         # adding the global update function
         global_model = graph_module._global_model._model
-        add_update_function(gin, global_model, gbl_namespace.TMVA.Experimental.SOFIE.GraphType.GraphIndependent, 
-                                             gbl_namespace.TMVA.Experimental.SOFIE.FunctionTarget.GLOBALS) 
+        add_update_function(gin, global_model, gbl_namespace.TMVA.Experimental.SOFIE.GraphType.GraphIndependent,
+                                             gbl_namespace.TMVA.Experimental.SOFIE.FunctionTarget.GLOBALS)
 
         graph_independent_model = gbl_namespace.TMVA.Experimental.SOFIE.RModel_GraphIndependent(gin)
         blas_routines = gbl_namespace.std.vector['std::string']()

diff --git a/bindings/pyroot/pythonizations/test/CMakeLists.txt b/bindings/pyroot/pythonizations/test/CMakeLists.txt
@@ -128,12 +128,16 @@ endif()
 # SOFIE-GNN pythonizations
 if (dataframe AND tmva)
     if(NOT MSVC OR CMAKE_SIZEOF_VOID_P EQUAL 4 OR win_broken_tests AND PYTHON_VERSION_MAJOR_Development_Main EQUAL 3 )
-        ROOT_ADD_PYUNITTEST(pyroot_pyz_sofie_gnn sofie_gnn.py PYTHON_DEPS numpy sonnet graph_nets)
+        find_python_module(sonnet QUIET)
+        find_python_module(graph_nets QUIET)
+        if (PY_SONNET_FOUND AND PY_GRAPH_NETS_FOUND)
+            ROOT_ADD_PYUNITTEST(pyroot_pyz_sofie_gnn sofie_gnn.py PYTHON_DEPS numpy sonnet graph_nets)
+        endif()
     endif()
 endif()
 
 # RTensor pythonizations
-if (tmva AND sofie)
+if (tmva)
     if(NOT MSVC OR CMAKE_SIZEOF_VOID_P EQUAL 4 OR win_broken_tests)
         ROOT_ADD_PYUNITTEST(pyroot_pyz_rtensor rtensor.py PYTHON_DEPS numpy)
     endif()

diff --git a/bindings/pyroot/pythonizations/test/sofie_gnn.py b/bindings/pyroot/pythonizations/test/sofie_gnn.py
@@ -4,12 +4,13 @@
 import numpy as np
 from numpy.testing import assert_almost_equal
 
-if np.__version__ > "1.19":
-    raise RuntimeError(f"This test requires NumPy version 1.19 or lower")
+if np.__version__ >= "1.20" and np.__version__ < "1.24":
+    raise RuntimeError(f"This test requires NumPy version <=1.19 or >=1.24")
 
 import graph_nets as gn
 from graph_nets import utils_tf
 import sonnet as snt
+import os
 
 
 
@@ -21,9 +22,22 @@ def get_graph_data_dict(num_nodes, num_edges, GLOBAL_FEATURE_SIZE=2, NODE_FEATUR
       "nodes": np.random.rand(num_nodes, NODE_FEATURE_SIZE).astype(np.float32),
       "edges": np.random.rand(num_edges, EDGE_FEATURE_SIZE).astype(np.float32),
       "senders": np.random.randint(num_nodes, size=num_edges, dtype=np.int32),
-      "receivers": np.random.randint(num_nodes, size=num_edges, dtype=np.int32),
+      "receivers": np.random.randint(num_nodes, size=num_edges, dtype=np.int32)
   }
 
+def resize_graph_data(input_data, GLOBAL_FEATURE_SIZE=2, NODE_FEATURE_SIZE=2, EDGE_FEATURE_SIZE=2) :
+    n = input_data["nodes"]
+    e = input_data["edges"]
+    s = input_data["senders"]
+    r = input_data["receivers"]
+    return {
+      "globals" : np.zeros((GLOBAL_FEATURE_SIZE )),
+      "nodes"   : np.zeros((n.shape[0],NODE_FEATURE_SIZE )),
+      "edges"   : np.zeros((e.shape[0],EDGE_FEATURE_SIZE )),
+      "senders" : s, "receivers" : r
+    }
+
+LATENT_SIZE = 2
 def make_mlp_model():
   """Instantiates a new MLP, followed by LayerNorm.
 
@@ -34,7 +48,7 @@ def make_mlp_model():
     A Sonnet module which contains the MLP and LayerNorm.
   """
   return snt.Sequential([
-      snt.nets.MLP([2,2], activate_final=True),
+      snt.nets.MLP([LATENT_SIZE]*2, activate_final=True),
       snt.LayerNorm(axis=-1, create_offset=True, create_scale=True)
   ])
 
@@ -48,9 +62,9 @@ class MLPGraphIndependent(snt.Module):
   def __init__(self, name="MLPGraphIndependent"):
     super(MLPGraphIndependent, self).__init__(name=name)
     self._network = gn.modules.GraphIndependent(
-        edge_model_fn = lambda: snt.nets.MLP([2,2], activate_final=True),
-        node_model_fn = lambda: snt.nets.MLP([2,2], activate_final=True),
-        global_model_fn = lambda: snt.nets.MLP([2,2], activate_final=True))
+        edge_model_fn = lambda: snt.nets.MLP([LATENT_SIZE]*2, activate_final=True),
+        node_model_fn = lambda: snt.nets.MLP([LATENT_SIZE]*2, activate_final=True),
+        global_model_fn = lambda: snt.nets.MLP([LATENT_SIZE]*2, activate_final=True))
 
   def __call__(self, inputs):
     return self._network(inputs)
@@ -69,12 +83,19 @@ def __init__(self, name="MLPGraphNetwork"):
   def __call__(self, inputs):
     return self._network(inputs)
 
+def PrintGData(data, printShape = True):
+  n = data.nodes.numpy()
+  e = data.edges.numpy()
+  g = data.globals.numpy()
+  if (printShape) :
+    print("GNet data ... shapes",n.shape,e.shape,g.shape)
+  print(" node data", n.reshape(n.size,))
+  print(" edge data", e.reshape(e.size,))
+  print(" global data",g.reshape(g.size,))
+
 class EncodeProcessDecode(snt.Module):
 
   def __init__(self,
-               edge_output_size=None,
-               node_output_size=None,
-               global_output_size=None,
                name="EncodeProcessDecode"):
     super(EncodeProcessDecode, self).__init__(name=name)
     self._encoder = MLPGraphIndependent()
@@ -103,12 +124,15 @@ def test_parse_gnn(self):
         Test that parsed GNN model from a graphnets model generates correct
         inference code
         '''
+
+        print('\nRun Graph parsing test')
+
         GraphModule = gn.modules.GraphNetwork(
             edge_model_fn=lambda: snt.nets.MLP([2,2], activate_final=True),
             node_model_fn=lambda: snt.nets.MLP([2,2], activate_final=True),
             global_model_fn=lambda: snt.nets.MLP([2,2], activate_final=True))
 
-        GraphData = get_graph_data_dict(2,1)
+        GraphData = get_graph_data_dict(2,1,2,2,2)
         input_graphs = utils_tf.data_dicts_to_graphs_tuple([GraphData])
         output = GraphModule(input_graphs)
 
@@ -135,18 +159,26 @@ def test_parse_gnn(self):
         assert_almost_equal(output_edge_data, np.asarray(input_data.edge_data))
         assert_almost_equal(output_global_data, np.asarray(input_data.global_data))
 
+        fname = "gnn_network"
+        os.remove(fname + '.dat')
+        os.remove(fname + '.hxx')
+
 
     def test_parse_graph_independent(self):
         '''
         Test that parsed GraphIndependent model from a graphnets model generates correct
         inference code
         '''
+
+        print('\nRun Graph Independent parsing test')
+
+
         GraphModule = gn.modules.GraphIndependent(
             edge_model_fn=lambda: snt.nets.MLP([2,2], activate_final=True),
             node_model_fn=lambda: snt.nets.MLP([2,2], activate_final=True),
             global_model_fn=lambda: snt.nets.MLP([2,2], activate_final=True))
 
-        GraphData = get_graph_data_dict(2,1)
+        GraphData = get_graph_data_dict(2,1,2,2,2)
         input_graphs = utils_tf.data_dicts_to_graphs_tuple([GraphData])
         output = GraphModule(input_graphs)
 
@@ -173,39 +205,58 @@ def test_parse_graph_independent(self):
         assert_almost_equal(output_edge_data, np.asarray(input_data.edge_data))
         assert_almost_equal(output_global_data, np.asarray(input_data.global_data))
 
+        fname = "graph_independent_network"
+        os.remove(fname + '.dat')
+        os.remove(fname + '.hxx')
+
     def test_lhcb_toy_inference(self):
         '''
         Test that parsed stack of SOFIE GNN and GraphIndependent modules generate the correct
         inference code
         '''
+
+        print('Run LHCb test')
+
         # Instantiating EncodeProcessDecode Model
-        ep_model = EncodeProcessDecode(2,2,2)
+
+        #number of features for node. edge, globals
+        nsize = 3
+        esize = 3
+        gsize = 2
+        lsize = LATENT_SIZE  #hard-coded latent size in definition of GNET model (for node edge and globals)
+
+        ep_model = EncodeProcessDecode()
 
         # Initializing randomized input data
-        GraphData = get_graph_data_dict(2,1)
-        input_graphs = utils_tf.data_dicts_to_graphs_tuple([GraphData])
+        InputGraphData = get_graph_data_dict(2,1, gsize, nsize, esize)
+        input_graphs = utils_tf.data_dicts_to_graphs_tuple([InputGraphData])
+
+        # Make data for core networks (number of features for node/edge global is 2 * lsize)
+        CoreGraphData = resize_graph_data(InputGraphData, 2 * lsize, 2 * lsize, 2 * lsize)
+
+
+        OutputGraphData = resize_graph_data(InputGraphData, lsize, lsize, lsize)
 
-        # Initializing randomized input data for core
-        CoreGraphData = get_graph_data_dict(2, 1, 4, 4, 4)
-        input_graphs_2 = utils_tf.data_dicts_to_graphs_tuple([CoreGraphData])
 
         # Collecting output from GraphNets model stack
         output_gn = ep_model(input_graphs, 2)
 
+        print("senders and receivers ",InputGraphData['senders'],InputGraphData['receivers'])
+
         # Declaring sofie models
-        encoder = ROOT.TMVA.Experimental.SOFIE.RModel_GraphIndependent.ParseFromMemory(ep_model._encoder._network, GraphData, filename = "encoder")
+        encoder = ROOT.TMVA.Experimental.SOFIE.RModel_GraphIndependent.ParseFromMemory(ep_model._encoder._network, InputGraphData, filename = "encoder")
         encoder.Generate()
         encoder.OutputGenerated()
 
         core = ROOT.TMVA.Experimental.SOFIE.RModel_GNN.ParseFromMemory(ep_model._core._network, CoreGraphData, filename = "core")
         core.Generate()
         core.OutputGenerated()
 
-        decoder = ROOT.TMVA.Experimental.SOFIE.RModel_GraphIndependent.ParseFromMemory(ep_model._decoder._network, GraphData, filename = "decoder")
+        decoder = ROOT.TMVA.Experimental.SOFIE.RModel_GraphIndependent.ParseFromMemory(ep_model._decoder._network, OutputGraphData, filename = "decoder")
         decoder.Generate()
         decoder.OutputGenerated()
 
-        output_transform = ROOT.TMVA.Experimental.SOFIE.RModel_GraphIndependent.ParseFromMemory(ep_model._output_transform._network, GraphData, filename = "output_transform")
+        output_transform = ROOT.TMVA.Experimental.SOFIE.RModel_GraphIndependent.ParseFromMemory(ep_model._output_transform._network, OutputGraphData, filename = "output_transform")
         output_transform.Generate()
         output_transform.OutputGenerated()
 
@@ -222,14 +273,18 @@ def test_lhcb_toy_inference(self):
 
         # Preparing the input data for running inference on sofie
         input_data = ROOT.TMVA.Experimental.SOFIE.GNN_Data()
-        input_data.node_data = ROOT.TMVA.Experimental.AsRTensor(GraphData['nodes'])
-        input_data.edge_data = ROOT.TMVA.Experimental.AsRTensor(GraphData['edges'])
-        input_data.global_data = ROOT.TMVA.Experimental.AsRTensor(GraphData['globals'])
+        input_data.node_data = ROOT.TMVA.Experimental.AsRTensor(InputGraphData['nodes'])
+        input_data.edge_data = ROOT.TMVA.Experimental.AsRTensor(InputGraphData['edges'])
+        input_data.global_data = ROOT.TMVA.Experimental.AsRTensor(InputGraphData['globals'])
+
+        output_gn = ep_model(input_graphs, 2)
 
         # running inference on sofie
-        encoder_session.infer(input_data)
-        latent0 = CopyData(input_data)
-        latent = input_data
+        data = CopyData(input_data)
+
+        encoder_session.infer(data)
+        latent0 = CopyData(data)
+        latent = data
         output_ops = []
         for _ in range(2):
             core_input = ROOT.TMVA.Experimental.SOFIE.Concatenate(latent0, latent, axis=1)
@@ -246,10 +301,16 @@ def test_lhcb_toy_inference(self):
           output_global_data = output_gn[i].globals.numpy().flatten()
 
           assert_almost_equal(output_node_data, np.asarray(output_ops[i].node_data))
+
           assert_almost_equal(output_edge_data, np.asarray(output_ops[i].edge_data))
-          assert_almost_equal(output_global_data, np.asarray(output_ops[i].global_data))
 
+          assert_almost_equal(output_global_data, np.asarray(output_ops[i].global_data))
 
+        #remove header files after being used
+        filesToRemove = ['core','encoder','decoder','output_transform']
+        for fname in filesToRemove:
+          os.remove(fname + '.hxx')
+          os.remove(fname + '.dat')
 
 
 if __name__ == '__main__':

diff --git a/tmva/pymva/test/EmitFromKeras.cxx b/tmva/pymva/test/EmitFromKeras.cxx
@@ -4,7 +4,6 @@
 //           The program is run when the target 'TestRModelParserKeras' is built.
 //           The program generates the required .hxx file after parsing a Keras .keras file into a RModel object.
 
-#include "TMVA/RModel_Base.hxx"
 #include "TMVA/RModel.hxx"
 #include "TMVA/RModelParser_Keras.h"