Multivariate Time Series¶

This notebook demonstrates the multivariate API in xaitimesynth. The key parameters are:

Parameter	Where	Description
`n_dimensions`	`TimeSeriesBuilder(...)`	Number of channels/dimensions
`dim`	`add_signal()` / `add_feature()`	Which dimensions to apply the component to (default: all)
`shared_location`	`add_feature()`	Whether all specified dims share the same random feature position
`shared_randomness`	`add_signal()` / `add_feature()`	Whether stochastic components use the same random values across dims
`data_format`	`TimeSeriesBuilder(...)`	`"channels_first"` (N, D, T) or `"channels_last"` (N, T, D)

In [7]:

Copied!





import numpy as np
from lets_plot import LetsPlot
from xaitimesynth import (
    TimeSeriesBuilder,
    constant,
    gaussian_noise,
    gaussian_pulse,
    random_walk,
    plot_components,
)

LetsPlot.setup_html()
import numpy as np
from lets_plot import LetsPlot
from xaitimesynth import (
    TimeSeriesBuilder,
    constant,
    gaussian_noise,
    gaussian_pulse,
    random_walk,
    plot_components,
)

LetsPlot.setup_html()

Basic multivariate dataset¶

Use n_dimensions=3 for a 3-channel dataset. The dim parameter targets specific channels; omitting it applies the component to all channels.

Here class 0 has no discriminating features. Class 1 has:

A constant offset in dimensions 0 and 1 (at a random location, same position in both dims)
A peak in dimension 2 only (at a fixed position)

In [8]:

Copied!





dataset = (
    TimeSeriesBuilder(n_timesteps=100, n_samples=30, n_dimensions=3, random_state=0)
    .for_class(0)
    .add_signal(random_walk(step_size=0.2))  # applied to all 3 dims
    .add_signal(gaussian_noise(sigma=0.1))
    .for_class(1)
    .add_signal(random_walk(step_size=0.2))
    .add_signal(gaussian_noise(sigma=0.1))
    .add_feature(
        constant(value=1.0),
        dim=[0, 1],  # only dims 0 and 1
        length_pct=0.15,
        random_location=True,
        shared_location=True,  # same position in dim 0 and dim 1
    )
    .add_feature(
        gaussian_pulse(amplitude=2.0, width=5),
        dim=[2],  # dim 2 only
        start_pct=0.4,
        end_pct=0.6,
    )
    .build()
)
dataset = (
    TimeSeriesBuilder(n_timesteps=100, n_samples=30, n_dimensions=3, random_state=0)
    .for_class(0)
    .add_signal(random_walk(step_size=0.2))  # applied to all 3 dims
    .add_signal(gaussian_noise(sigma=0.1))
    .for_class(1)
    .add_signal(random_walk(step_size=0.2))
    .add_signal(gaussian_noise(sigma=0.1))
    .add_feature(
        constant(value=1.0),
        dim=[0, 1],  # only dims 0 and 1
        length_pct=0.15,
        random_location=True,
        shared_location=True,  # same position in dim 0 and dim 1
    )
    .add_feature(
        gaussian_pulse(amplitude=2.0, width=5),
        dim=[2],  # dim 2 only
        start_pct=0.4,
        end_pct=0.6,
    )
    .build()
)

In [9]:

Copied!





print("X shape:            ", dataset["X"].shape)  # (N, D, T) channels-first
print("y shape:            ", dataset["y"].shape)
print("feature_masks keys: ", list(dataset["feature_masks"].keys()))
print("components length:  ", len(dataset["components"]))

assert dataset["X"].shape == (30, 3, 100)
print("X shape:            ", dataset["X"].shape)  # (N, D, T) channels-first
print("y shape:            ", dataset["y"].shape)
print("feature_masks keys: ", list(dataset["feature_masks"].keys()))
print("components length:  ", len(dataset["components"]))

assert dataset["X"].shape == (30, 3, 100)

X shape:             (30, 3, 100)
y shape:             (30,)
feature_masks keys:  ['class_1_feature_0_constant_dim0', 'class_1_feature_0_constant_dim1', 'class_1_feature_1_gaussian_pulse_dim2']
components length:   30

In [10]:

Copied!

plots: list = plot_components(dataset)

for plot in plots:
    plot.show()
plots: list = plot_components(dataset)

for plot in plots:
    plot.show()

feature_masks — per-dimension keys¶

Each feature gets a separate mask entry per dimension it applies to.

Key format: class_{label}_feature_{idx}_{type}_dim{dim}

feature_0 → the constant offset (applied to dims 0 and 1 → two keys)
feature_1 → the peak (applied to dim 2 only → one key)

All masks are bool arrays of shape (n_samples, n_timesteps), regardless of the number of dimensions.

In [11]:

Copied!





for key, mask in dataset["feature_masks"].items():
    print(f"{key}: shape={mask.shape}, dtype={mask.dtype}")

# No class-0 masks — class 0 has no features
assert all(k.startswith("class_1") for k in dataset["feature_masks"])

# All masks are bool (n_samples, n_timesteps)
for mask in dataset["feature_masks"].values():
    assert mask.shape == (30, 100)
    assert mask.dtype == bool
for key, mask in dataset["feature_masks"].items():
    print(f"{key}: shape={mask.shape}, dtype={mask.dtype}")

# No class-0 masks — class 0 has no features
assert all(k.startswith("class_1") for k in dataset["feature_masks"])

# All masks are bool (n_samples, n_timesteps)
for mask in dataset["feature_masks"].values():
    assert mask.shape == (30, 100)
    assert mask.dtype == bool

class_1_feature_0_constant_dim0: shape=(30, 100), dtype=bool
class_1_feature_0_constant_dim1: shape=(30, 100), dtype=bool
class_1_feature_1_gaussian_pulse_dim2: shape=(30, 100), dtype=bool

Components — multivariate shapes¶

For multivariate data, background and aggregated are 2D arrays (T, D), one column per dimension. The per-feature entries remain 1D (T,) because each feature targets a single dimension.

In [12]:

Copied!





# Pick a class-1 sample
sample_idx = int(np.where(dataset["y"] == 1)[0][0])
comp = dataset["components"][sample_idx]

print("background shape: ", comp.background.shape)  # (T, D)
print("aggregated shape: ", comp.aggregated.shape)  # (T, D)
print("features keys:    ", list(comp.features.keys()))

# aggregated.T must match X[i]
assert np.allclose(dataset["X"][sample_idx], comp.aggregated.T)
print("aggregated.T matches X[sample_idx]: True")
# Pick a class-1 sample
sample_idx = int(np.where(dataset["y"] == 1)[0][0])
comp = dataset["components"][sample_idx]

print("background shape: ", comp.background.shape)  # (T, D)
print("aggregated shape: ", comp.aggregated.shape)  # (T, D)
print("features keys:    ", list(comp.features.keys()))

# aggregated.T must match X[i]
assert np.allclose(dataset["X"][sample_idx], comp.aggregated.T)
print("aggregated.T matches X[sample_idx]: True")

background shape:  (100, 3)
aggregated shape:  (100, 3)
features keys:     ['feature_0_constant_dim0', 'feature_0_constant_dim1', 'feature_1_gaussian_pulse_dim2']
aggregated.T matches X[sample_idx]: True

shared_location¶

When a feature is placed at a random location across multiple dimensions:

shared_location=True (default): the feature falls at the same timesteps in every specified dimension for a given sample
shared_location=False: each dimension gets its own independent random position

In [15]:

Copied!





# shared_location=True — same position in dim 0 and dim 1
ds_shared = (
    TimeSeriesBuilder(n_timesteps=100, n_samples=20, n_dimensions=2, random_state=42)
    .for_class(0)
    .add_signal(gaussian_noise(sigma=0.1))
    .for_class(1)
    .add_signal(gaussian_noise(sigma=0.1))
    .add_feature(
        gaussian_pulse(amplitude=1.0),
        dim=[0, 1],
        length_pct=0.1,
        random_location=True,
        shared_location=True,
    )
    .build()
)

# For every sample the dim0 and dim1 masks must be identical
masks_shared = ds_shared["feature_masks"]
key_dim0 = [k for k in masks_shared if k.endswith("dim0")][0]
key_dim1 = [k for k in masks_shared if k.endswith("dim1")][0]
assert np.array_equal(masks_shared[key_dim0], masks_shared[key_dim1])

i = int(np.where(ds_shared["y"] == 1)[0][0])
start0 = np.where(masks_shared[key_dim0][i])[0][0]
start1 = np.where(masks_shared[key_dim1][i])[0][0]
print(f"shared_location=True  — dim0 start: {start0}, dim1 start: {start1}")
print("Masks identical across dims: True")


for plot in plot_components(ds_shared):
    plot.show()
# shared_location=True — same position in dim 0 and dim 1
ds_shared = (
    TimeSeriesBuilder(n_timesteps=100, n_samples=20, n_dimensions=2, random_state=42)
    .for_class(0)
    .add_signal(gaussian_noise(sigma=0.1))
    .for_class(1)
    .add_signal(gaussian_noise(sigma=0.1))
    .add_feature(
        gaussian_pulse(amplitude=1.0),
        dim=[0, 1],
        length_pct=0.1,
        random_location=True,
        shared_location=True,
    )
    .build()
)

# For every sample the dim0 and dim1 masks must be identical
masks_shared = ds_shared["feature_masks"]
key_dim0 = [k for k in masks_shared if k.endswith("dim0")][0]
key_dim1 = [k for k in masks_shared if k.endswith("dim1")][0]
assert np.array_equal(masks_shared[key_dim0], masks_shared[key_dim1])

i = int(np.where(ds_shared["y"] == 1)[0][0])
start0 = np.where(masks_shared[key_dim0][i])[0][0]
start1 = np.where(masks_shared[key_dim1][i])[0][0]
print(f"shared_location=True  — dim0 start: {start0}, dim1 start: {start1}")
print("Masks identical across dims: True")


for plot in plot_components(ds_shared):
    plot.show()

shared_location=True  — dim0 start: 89, dim1 start: 89
Masks identical across dims: True

In [17]:

Copied!





# shared_location=False — independent random positions per dim
ds_indep = (
    TimeSeriesBuilder(n_timesteps=100, n_samples=20, n_dimensions=2, random_state=42)
    .for_class(0)
    .add_signal(gaussian_noise(sigma=0.1))
    .for_class(1)
    .add_signal(gaussian_noise(sigma=0.1))
    .add_feature(
        gaussian_pulse(amplitude=1.0),
        dim=[0, 1],
        length_pct=0.1,
        random_location=True,
        shared_location=False,
    )
    .build()
)

masks_indep = ds_indep["feature_masks"]
key_dim0 = [k for k in masks_indep if k.endswith("dim0")][0]
key_dim1 = [k for k in masks_indep if k.endswith("dim1")][0]

# Positions differ for at least one sample
starts_dim0 = [
    np.where(masks_indep[key_dim0][i])[0][0]
    for i in range(len(ds_indep["y"]))
    if ds_indep["y"][i] == 1
]
starts_dim1 = [
    np.where(masks_indep[key_dim1][i])[0][0]
    for i in range(len(ds_indep["y"]))
    if ds_indep["y"][i] == 1
]
assert starts_dim0 != starts_dim1, (
    "Expected at least one difference with shared_location=False"
)

# Show first class-1 sample
i = int(np.where(ds_indep["y"] == 1)[0][0])
print(
    f"shared_location=False — dim0 start: {np.where(masks_indep[key_dim0][i])[0][0]}, dim1 start: {np.where(masks_indep[key_dim1][i])[0][0]}"
)
print("Positions differ across dims for at least one sample: True")
# shared_location=False — independent random positions per dim
ds_indep = (
    TimeSeriesBuilder(n_timesteps=100, n_samples=20, n_dimensions=2, random_state=42)
    .for_class(0)
    .add_signal(gaussian_noise(sigma=0.1))
    .for_class(1)
    .add_signal(gaussian_noise(sigma=0.1))
    .add_feature(
        gaussian_pulse(amplitude=1.0),
        dim=[0, 1],
        length_pct=0.1,
        random_location=True,
        shared_location=False,
    )
    .build()
)

masks_indep = ds_indep["feature_masks"]
key_dim0 = [k for k in masks_indep if k.endswith("dim0")][0]
key_dim1 = [k for k in masks_indep if k.endswith("dim1")][0]

# Positions differ for at least one sample
starts_dim0 = [
    np.where(masks_indep[key_dim0][i])[0][0]
    for i in range(len(ds_indep["y"]))
    if ds_indep["y"][i] == 1
]
starts_dim1 = [
    np.where(masks_indep[key_dim1][i])[0][0]
    for i in range(len(ds_indep["y"]))
    if ds_indep["y"][i] == 1
]
assert starts_dim0 != starts_dim1, (
    "Expected at least one difference with shared_location=False"
)

# Show first class-1 sample
i = int(np.where(ds_indep["y"] == 1)[0][0])
print(
    f"shared_location=False — dim0 start: {np.where(masks_indep[key_dim0][i])[0][0]}, dim1 start: {np.where(masks_indep[key_dim1][i])[0][0]}"
)
print("Positions differ across dims for at least one sample: True")

shared_location=False — dim0 start: 25, dim1 start: 8
Positions differ across dims for at least one sample: True

shared_randomness¶

For stochastic components (e.g. gaussian_noise), shared_randomness controls whether all specified dimensions draw from the same random sequence or independent ones.

shared_randomness=True: identical values across dims — useful for simulating a noise source that affects multiple channels equally
shared_randomness=False (default): independent noise per dim

In [18]:

Copied!





def build_noise_dataset(shared: bool) -> dict:
    return (
        TimeSeriesBuilder(n_timesteps=50, n_samples=10, n_dimensions=2, random_state=7)
        .for_class(0)
        .add_signal(gaussian_noise(sigma=1.0), dim=[0, 1], shared_randomness=shared)
        .build()
    )


ds_same = build_noise_dataset(shared=True)
ds_diff = build_noise_dataset(shared=False)

s = 0  # any sample
print("shared_randomness=True")
print("  dim0[:5]:", ds_same["X"][s, 0, :5].round(3))
print("  dim1[:5]:", ds_same["X"][s, 1, :5].round(3))

print("\nshared_randomness=False")
print("  dim0[:5]:", ds_diff["X"][s, 0, :5].round(3))
print("  dim1[:5]:", ds_diff["X"][s, 1, :5].round(3))

# Verify
comp_same = ds_same["components"][s]
assert np.allclose(comp_same.background[:, 0], comp_same.background[:, 1]), (
    "shared_randomness=True should give identical background across dims"
)

comp_diff = ds_diff["components"][s]
assert not np.allclose(comp_diff.background[:, 0], comp_diff.background[:, 1]), (
    "shared_randomness=False should give different background across dims"
)

print("\nDims match with shared_randomness=True: True")
print("Dims differ with shared_randomness=False: True")
def build_noise_dataset(shared: bool) -> dict:
    return (
        TimeSeriesBuilder(n_timesteps=50, n_samples=10, n_dimensions=2, random_state=7)
        .for_class(0)
        .add_signal(gaussian_noise(sigma=1.0), dim=[0, 1], shared_randomness=shared)
        .build()
    )


ds_same = build_noise_dataset(shared=True)
ds_diff = build_noise_dataset(shared=False)

s = 0  # any sample
print("shared_randomness=True")
print("  dim0[:5]:", ds_same["X"][s, 0, :5].round(3))
print("  dim1[:5]:", ds_same["X"][s, 1, :5].round(3))

print("\nshared_randomness=False")
print("  dim0[:5]:", ds_diff["X"][s, 0, :5].round(3))
print("  dim1[:5]:", ds_diff["X"][s, 1, :5].round(3))

# Verify
comp_same = ds_same["components"][s]
assert np.allclose(comp_same.background[:, 0], comp_same.background[:, 1]), (
    "shared_randomness=True should give identical background across dims"
)

comp_diff = ds_diff["components"][s]
assert not np.allclose(comp_diff.background[:, 0], comp_diff.background[:, 1]), (
    "shared_randomness=False should give different background across dims"
)

print("\nDims match with shared_randomness=True: True")
print("Dims differ with shared_randomness=False: True")

shared_randomness=True
  dim0[:5]: [-0.814  0.648  0.669 -1.497  0.781]
  dim1[:5]: [-0.814  0.648  0.669 -1.497  0.781]

shared_randomness=False
  dim0[:5]: [-0.144  0.53   1.086 -1.469  0.96 ]
  dim1[:5]: [ 2.631  0.209 -0.311 -1.316 -1.227]

Dims match with shared_randomness=True: True
Dims differ with shared_randomness=False: True

channels_last format¶

The default output format is channels_first: X.shape == (N, D, T). Pass data_format="channels_last" to get (N, T, D) instead.

In [19]:

Copied!





ds_cl = (
    TimeSeriesBuilder(
        n_timesteps=100,
        n_samples=20,
        n_dimensions=3,
        random_state=0,
        data_format="channels_last",
    )
    .for_class(0)
    .add_signal(gaussian_noise(sigma=0.5))
    .for_class(1)
    .add_signal(gaussian_noise(sigma=0.5))
    .add_feature(gaussian_pulse(amplitude=1.0), length_pct=0.15, random_location=True)
    .build()
)

print("channels_last X shape:", ds_cl["X"].shape)  # (N, T, D)
assert ds_cl["X"].shape == (20, 100, 3)
ds_cl = (
    TimeSeriesBuilder(
        n_timesteps=100,
        n_samples=20,
        n_dimensions=3,
        random_state=0,
        data_format="channels_last",
    )
    .for_class(0)
    .add_signal(gaussian_noise(sigma=0.5))
    .for_class(1)
    .add_signal(gaussian_noise(sigma=0.5))
    .add_feature(gaussian_pulse(amplitude=1.0), length_pct=0.15, random_location=True)
    .build()
)

print("channels_last X shape:", ds_cl["X"].shape)  # (N, T, D)
assert ds_cl["X"].shape == (20, 100, 3)

channels_last X shape: (20, 100, 3)