Multivariate Time Series¶
This notebook demonstrates the multivariate API in xaitimesynth. The key parameters are:
| Parameter | Where | Description |
|---|---|---|
n_dimensions |
TimeSeriesBuilder(...) |
Number of channels/dimensions |
dim |
add_signal() / add_feature() |
Which dimensions to apply the component to (default: all) |
shared_location |
add_feature() |
Whether all specified dims share the same random feature position |
shared_randomness |
add_signal() / add_feature() |
Whether stochastic components use the same random values across dims |
data_format |
TimeSeriesBuilder(...) |
"channels_first" (N, D, T) or "channels_last" (N, T, D) |
import numpy as np
from lets_plot import LetsPlot
from xaitimesynth import (
TimeSeriesBuilder,
constant,
gaussian_noise,
gaussian_pulse,
random_walk,
plot_components,
)
LetsPlot.setup_html()
Basic multivariate dataset¶
Use n_dimensions=3 for a 3-channel dataset. The dim parameter targets specific channels; omitting it applies the component to all channels.
Here class 0 has no discriminating features. Class 1 has:
- A
constantoffset in dimensions 0 and 1 (at a random location, same position in both dims) - A
peakin dimension 2 only (at a fixed position)
dataset = (
TimeSeriesBuilder(n_timesteps=100, n_samples=30, n_dimensions=3, random_state=0)
.for_class(0)
.add_signal(random_walk(step_size=0.2)) # applied to all 3 dims
.add_signal(gaussian_noise(sigma=0.1))
.for_class(1)
.add_signal(random_walk(step_size=0.2))
.add_signal(gaussian_noise(sigma=0.1))
.add_feature(
constant(value=1.0),
dim=[0, 1], # only dims 0 and 1
length_pct=0.15,
random_location=True,
shared_location=True, # same position in dim 0 and dim 1
)
.add_feature(
gaussian_pulse(amplitude=2.0, width=5),
dim=[2], # dim 2 only
start_pct=0.4,
end_pct=0.6,
)
.build()
)
print("X shape: ", dataset["X"].shape) # (N, D, T) channels-first
print("y shape: ", dataset["y"].shape)
print("feature_masks keys: ", list(dataset["feature_masks"].keys()))
print("components length: ", len(dataset["components"]))
assert dataset["X"].shape == (30, 3, 100)
X shape: (30, 3, 100) y shape: (30,) feature_masks keys: ['class_1_feature_0_constant_dim0', 'class_1_feature_0_constant_dim1', 'class_1_feature_1_gaussian_pulse_dim2'] components length: 30
plots: list = plot_components(dataset)
for plot in plots:
plot.show()
feature_masks — per-dimension keys¶
Each feature gets a separate mask entry per dimension it applies to.
Key format: class_{label}_feature_{idx}_{type}_dim{dim}
feature_0→ theconstantoffset (applied to dims 0 and 1 → two keys)feature_1→ thepeak(applied to dim 2 only → one key)
All masks are bool arrays of shape (n_samples, n_timesteps), regardless of the number of dimensions.
for key, mask in dataset["feature_masks"].items():
print(f"{key}: shape={mask.shape}, dtype={mask.dtype}")
# No class-0 masks — class 0 has no features
assert all(k.startswith("class_1") for k in dataset["feature_masks"])
# All masks are bool (n_samples, n_timesteps)
for mask in dataset["feature_masks"].values():
assert mask.shape == (30, 100)
assert mask.dtype == bool
class_1_feature_0_constant_dim0: shape=(30, 100), dtype=bool class_1_feature_0_constant_dim1: shape=(30, 100), dtype=bool class_1_feature_1_gaussian_pulse_dim2: shape=(30, 100), dtype=bool
Components — multivariate shapes¶
For multivariate data, background and aggregated are 2D arrays (T, D), one column per dimension. The per-feature entries remain 1D (T,) because each feature targets a single dimension.
# Pick a class-1 sample
sample_idx = int(np.where(dataset["y"] == 1)[0][0])
comp = dataset["components"][sample_idx]
print("background shape: ", comp.background.shape) # (T, D)
print("aggregated shape: ", comp.aggregated.shape) # (T, D)
print("features keys: ", list(comp.features.keys()))
# aggregated.T must match X[i]
assert np.allclose(dataset["X"][sample_idx], comp.aggregated.T)
print("aggregated.T matches X[sample_idx]: True")
background shape: (100, 3) aggregated shape: (100, 3) features keys: ['feature_0_constant_dim0', 'feature_0_constant_dim1', 'feature_1_gaussian_pulse_dim2'] aggregated.T matches X[sample_idx]: True
shared_location¶
When a feature is placed at a random location across multiple dimensions:
shared_location=True(default): the feature falls at the same timesteps in every specified dimension for a given sampleshared_location=False: each dimension gets its own independent random position
# shared_location=True — same position in dim 0 and dim 1
ds_shared = (
TimeSeriesBuilder(n_timesteps=100, n_samples=20, n_dimensions=2, random_state=42)
.for_class(0)
.add_signal(gaussian_noise(sigma=0.1))
.for_class(1)
.add_signal(gaussian_noise(sigma=0.1))
.add_feature(
gaussian_pulse(amplitude=1.0),
dim=[0, 1],
length_pct=0.1,
random_location=True,
shared_location=True,
)
.build()
)
# For every sample the dim0 and dim1 masks must be identical
masks_shared = ds_shared["feature_masks"]
key_dim0 = [k for k in masks_shared if k.endswith("dim0")][0]
key_dim1 = [k for k in masks_shared if k.endswith("dim1")][0]
assert np.array_equal(masks_shared[key_dim0], masks_shared[key_dim1])
i = int(np.where(ds_shared["y"] == 1)[0][0])
start0 = np.where(masks_shared[key_dim0][i])[0][0]
start1 = np.where(masks_shared[key_dim1][i])[0][0]
print(f"shared_location=True — dim0 start: {start0}, dim1 start: {start1}")
print("Masks identical across dims: True")
for plot in plot_components(ds_shared):
plot.show()
shared_location=True — dim0 start: 89, dim1 start: 89 Masks identical across dims: True
# shared_location=False — independent random positions per dim
ds_indep = (
TimeSeriesBuilder(n_timesteps=100, n_samples=20, n_dimensions=2, random_state=42)
.for_class(0)
.add_signal(gaussian_noise(sigma=0.1))
.for_class(1)
.add_signal(gaussian_noise(sigma=0.1))
.add_feature(
gaussian_pulse(amplitude=1.0),
dim=[0, 1],
length_pct=0.1,
random_location=True,
shared_location=False,
)
.build()
)
masks_indep = ds_indep["feature_masks"]
key_dim0 = [k for k in masks_indep if k.endswith("dim0")][0]
key_dim1 = [k for k in masks_indep if k.endswith("dim1")][0]
# Positions differ for at least one sample
starts_dim0 = [
np.where(masks_indep[key_dim0][i])[0][0]
for i in range(len(ds_indep["y"]))
if ds_indep["y"][i] == 1
]
starts_dim1 = [
np.where(masks_indep[key_dim1][i])[0][0]
for i in range(len(ds_indep["y"]))
if ds_indep["y"][i] == 1
]
assert starts_dim0 != starts_dim1, (
"Expected at least one difference with shared_location=False"
)
# Show first class-1 sample
i = int(np.where(ds_indep["y"] == 1)[0][0])
print(
f"shared_location=False — dim0 start: {np.where(masks_indep[key_dim0][i])[0][0]}, dim1 start: {np.where(masks_indep[key_dim1][i])[0][0]}"
)
print("Positions differ across dims for at least one sample: True")
shared_location=False — dim0 start: 25, dim1 start: 8 Positions differ across dims for at least one sample: True
shared_randomness¶
For stochastic components (e.g. gaussian_noise), shared_randomness controls whether all specified dimensions draw from the same random sequence or independent ones.
shared_randomness=True: identical values across dims — useful for simulating a noise source that affects multiple channels equallyshared_randomness=False(default): independent noise per dim
def build_noise_dataset(shared: bool) -> dict:
return (
TimeSeriesBuilder(n_timesteps=50, n_samples=10, n_dimensions=2, random_state=7)
.for_class(0)
.add_signal(gaussian_noise(sigma=1.0), dim=[0, 1], shared_randomness=shared)
.build()
)
ds_same = build_noise_dataset(shared=True)
ds_diff = build_noise_dataset(shared=False)
s = 0 # any sample
print("shared_randomness=True")
print(" dim0[:5]:", ds_same["X"][s, 0, :5].round(3))
print(" dim1[:5]:", ds_same["X"][s, 1, :5].round(3))
print("\nshared_randomness=False")
print(" dim0[:5]:", ds_diff["X"][s, 0, :5].round(3))
print(" dim1[:5]:", ds_diff["X"][s, 1, :5].round(3))
# Verify
comp_same = ds_same["components"][s]
assert np.allclose(comp_same.background[:, 0], comp_same.background[:, 1]), (
"shared_randomness=True should give identical background across dims"
)
comp_diff = ds_diff["components"][s]
assert not np.allclose(comp_diff.background[:, 0], comp_diff.background[:, 1]), (
"shared_randomness=False should give different background across dims"
)
print("\nDims match with shared_randomness=True: True")
print("Dims differ with shared_randomness=False: True")
shared_randomness=True dim0[:5]: [-0.814 0.648 0.669 -1.497 0.781] dim1[:5]: [-0.814 0.648 0.669 -1.497 0.781] shared_randomness=False dim0[:5]: [-0.144 0.53 1.086 -1.469 0.96 ] dim1[:5]: [ 2.631 0.209 -0.311 -1.316 -1.227] Dims match with shared_randomness=True: True Dims differ with shared_randomness=False: True
channels_last format¶
The default output format is channels_first: X.shape == (N, D, T). Pass data_format="channels_last" to get (N, T, D) instead.
ds_cl = (
TimeSeriesBuilder(
n_timesteps=100,
n_samples=20,
n_dimensions=3,
random_state=0,
data_format="channels_last",
)
.for_class(0)
.add_signal(gaussian_noise(sigma=0.5))
.for_class(1)
.add_signal(gaussian_noise(sigma=0.5))
.add_feature(gaussian_pulse(amplitude=1.0), length_pct=0.15, random_location=True)
.build()
)
print("channels_last X shape:", ds_cl["X"].shape) # (N, T, D)
assert ds_cl["X"].shape == (20, 100, 3)
channels_last X shape: (20, 100, 3)