"""Single-period multiplex graph representation.
This module defines :class:`Multiplex`, a validated container around ibis edge
and vertex tables with helpers for conversions and degree summaries.
"""
import ibis
from .check import check_edges, check_vertices
from pathlib import Path
from . import io
import logging
from scipy.sparse import csr_matrix
import networkx as nx
logger = logging.getLogger(__name__)
[docs]
class Multiplex:
"""
A multiplex is a graph with multiple layers.
Each layer represents a different type of relationship between the same set of vertices, during one period.
For example, in a social network, one layer could represent friendships, while
another layer could represent professional connections.
For multiple periods, use MultiplexSeries.
"""
#: The edges of the multiplex. This is a table with columns "src", "dst", "layer","relationtype" and optionally weight.
edges: ibis.Table
#: The vertices of the multiplex. This is a table with a column "id" and optional additional columns.
vertices: ibis.Table
period: int | None
[docs]
def __init__(
self, edges: ibis.Table, vertices: ibis.Table = None, period: int | None = None
) -> None:
"""
Initialize a multiplex with the given edges and vertices tables.
Args:
- edges: table with columns ``src``, ``dst``, ``layer``, and ``relationtype``.
- vertices: table with column ``id`` and optional additional columns.
- period: the period this multiplex belongs to, or ``None`` if not applicable.
Raises:
- ValueError: if the edges table does not satisfy the required schema.
- ValueError: if the vertices table does not satisfy the required schema.
"""
if not check_edges(edges, check_period=False):
raise ValueError("Invalid edges table")
if vertices is not None and not check_vertices(vertices, check_period=False):
raise ValueError("Invalid vertices table")
self.period = period
self.edges = edges
# TODO derive vertices from edges if not provided
self.vertices = vertices
[docs]
def layers(self) -> list[str]:
"""
Get the list of layers present in the multiplex.
Returns:
- List of layer names.
"""
layers = self.edges[["layer"]].distinct().layer.to_list()
return layers
[docs]
def update_vertices(self) -> None:
"""
Update the vertices table by deriving it from the edges table.
This is useful when the vertices table was not provided at initialization.
``self.vertices`` is updated in place.
"""
src = self.edges.select(id="src").distinct()
dst = self.edges.select(id="dst").distinct()
V = src.union(dst, distinct=True).to_pyarrow()
self.vertices = ibis.memtable(V)
[docs]
def to_csr_matrix(self, use_weight: bool | str | ibis.Value = False) -> csr_matrix[bool] | csr_matrix[float]:
"""
Transform the multiplex into a sparse matrix, collapsing all layers into one.
To keep layers separate, use ``to_csr_matrices`` instead.
Args:
- use_weight: optional column in the edges table to use as weights for the adjacency matrix. If False, the adjacency matrix will be unweighted (boolean).
if True, the method will look for a column named "weight" in the edges table. If a string is provided, it will be used as the name of the weight column.
If not provided, the adjacency matrix will be unweighted (boolean).
Returns:
- Sparse boolean matrix of shape ``(n_vertices, n_vertices)``.
"""
from .to_csr_matrix import to_row_col_idx, idx_to_csr_matrix
E = self.edges
V = self.vertices
if use_weight is True:
weight = "weight"
elif isinstance(use_weight, str):
E[["weight"]] = E[[use_weight]]
elif isinstance(use_weight, ibis.Value):
weight = "weight"
E = E.mutate(weight=weight)
else:
E = E.drop(["weight"], errors="ignore")
if (use_weight is not False) and (weight not in E.columns):
raise ValueError(f"Weight column '{weight}' not found in edges table")
idx = to_row_col_idx(E, V)
M = idx_to_csr_matrix(idx, V)
return M
[docs]
def to_csr_matrices(self) -> dict[str, csr_matrix]:
"""
Transform the multiplex into a dictionary of sparse matrices, one per layer.
Returns:
- Dictionary mapping layer name to a sparse boolean matrix of shape ``(n_vertices, n_vertices)``.
"""
from .to_csr_matrix import to_row_col_idx, idx_to_csr_matrix
layers = self.layers()
matrices = {}
for layer in layers:
idx = to_row_col_idx(
self.edges.filter(self.edges.layer == layer), self.vertices
)
M = idx_to_csr_matrix(idx, self.vertices)
matrices[layer] = M
return matrices
[docs]
def outdegree(self, by_layer: bool = False) -> ibis.Table:
"""
Compute the out-degree of each vertex in the multiplex.
Args:
- by_layer: if True, compute the out-degree separately for each layer.
Returns:
- by_layer=False: Table with columns "id" and "out_degree", where "id" is the vertex id and "out_degree" is the total number of outgoing edges from that vertex across all layers.
- by_layer=True: Table with columns "id", "layer", and "out_degree", where "id" is the vertex id, "layer" is the layer name, and "out_degree" is the number of outgoing edges from that vertex in that layer.
"""
E = self.edges
gb = ["src"]
if by_layer:
gb.append("layer")
outdegree = (
E.group_by(gb)
.aggregate(outdegree=E.count())
.rename(id = "src")
)
return outdegree
[docs]
def to_networkx(self) -> nx.MultiDiGraph:
"""
Convert the multiplex to a NetworkX MultiDiGraph.
Returns:
- NetworkX MultiDiGraph built from the CSR matrix representation of the edges.
"""
from .networkx import to_MultiDiGraph
return to_MultiDiGraph(self.edges, self.vertices)
[docs]
def save(self, dir: Path | str, **kw_args) -> None:
"""
Save the multiplex to disk.
The directory is created if it does not exist; existing files are overwritten.
Saving also evaluates the lazy ``edges`` and ``vertices`` expressions and
updates them to point at the saved files, which can improve subsequent performance.
Args:
- dir: path to the directory where the Multiplex will be saved.
- **kw_args: additional keyword arguments forwarded to ``io.save_multiplex``.
"""
edges = self.edges
vertices = self.vertices
if vertices is None:
self.update_vertices()
vertices = self.vertices
period = self.period
edges, vertices = io.save_multiplex(edges, vertices, period, dir=dir, **kw_args)
self.edges = edges
self.vertices = vertices