Clustering and visualization of time-series whole-brain activity data of C. elegans using WormTensor

Load libraries

Install WormTensor package from CRAN or GitHub in advance and then type the code below in the R console window.

library(WormTensor)

worm_download

worm_download is a function to retrieve data from figshare for a total of 28 animals (24 normal and 4 noisy). If there is no argument, mSBD distance matrices (including 24 normal animals) will be downloaded.

object <- worm_download()

as_worm_tensor

as_worm_tensor is a function to generate a WormTensor object from distance matrices. A WormTensor object S4 class is used by worm_membership, worm_clustering, worm_evaluate, and worm_visualize.

object <- as_worm_tensor(object$Ds)

worm_membership

worm_membership is a function to generate a membership tensor from a WormTensor object with distance matrices. Set the assumed number of clusters to k(>=2).

object <- worm_membership(object, k=6)

worm_clustering

worm_clustering is a function to generate a clustering result from a WormTensor object with a membership tensor.

object <- worm_clustering(object)

worm_evaluate

worm_evaluate is a function to generate an evaluation result from a WormTensor object with a worm_clustering result.

object <- worm_evaluate(object)

worm_visualizeworm_visualizeis a function to visualizeworm_clusteringandworm_evaluate` results.

object <- worm_visualize(object)
Figure1a : Silhouette plots
Figure1a : Silhouette plots
Figure1b : Dimensional reduction Plots colored by cluster
Figure1b : Dimensional reduction Plots colored by cluster
Figure1c : Dimensional reduction Plots colored by no. of identified cells
Figure1c : Dimensional reduction Plots colored by no. of identified cells
Figure1d : ARI with a merge result and each animal(with MCMI)
Figure1d : ARI with a merge result and each animal(with MCMI)

Pipe Operation

The above functions can also be run by connecting them with R’s native pipe.

worm_download()$Ds |>
    as_worm_tensor() |>
        worm_membership(k=6) |>
            worm_clustering() |>
                worm_evaluate() |>
                    worm_visualize() -> object

Pipe Operation (with Labels)

If you have a label for the cells, you can use it for external evaluation.

# Sample Labels
worm_download()$Ds |>
    as_worm_tensor() |>
        worm_membership(k=6) |>
            worm_clustering() -> object
labels <- list(
    label1 = sample(3, length(object@clustering), replace=TRUE),
    label2 = sample(4, length(object@clustering), replace=TRUE),
    label3 = sample(5, length(object@clustering), replace=TRUE))
# WormTensor (with Labels)
worm_download()$Ds |>
    as_worm_tensor() |>
        worm_membership(k=6) |>
            worm_clustering() |>
                worm_evaluate(labels) |>
                    worm_visualize() -> object_labels
Figure2a : Silhouette plots
Figure2a : Silhouette plots
Figure2b : Dimensional reduction Plots colored by cluster
Figure2b : Dimensional reduction Plots colored by cluster
Figure2c : Dimensional reduction Plots colored by no. of identified cells
Figure2c : Dimensional reduction Plots colored by no. of identified cells
Figure2d : ARI with a merge result and each animal(with MCMI)
Figure2d : ARI with a merge result and each animal(with MCMI)
Figure2e : Dimensional reduction Plots colored by label
Figure2e : Dimensional reduction Plots colored by label
Figure2f : Consistency of labels and cluster members
Figure2f : Consistency of labels and cluster members

worm_distance

worm_distance helps you analyze your time-series data matrices with WormTensor. worm_distance is a function to convert time-series data matrices into distance matrices. The distance matrices can be used for analysis by WormTensor.

# Toy data (data of 3 animals)
n_cell_x <- 13
n_cell_y <- 24
n_cell_z <- 29
n_cells <- 30
n_time_frames <- 100

# animal_x : 13 cells, 100 time frames
animal_x <- matrix(runif(n_cell_x*n_time_frames),
    nrow=n_cell_x, ncol=n_time_frames)
rownames(animal_x) <- sample(seq(n_cells), n_cell_x)
colnames(animal_x) <- seq(n_time_frames)

# animal_y : 24 cells, 100 time frames
animal_y <- matrix(runif(n_cell_y*n_time_frames),
    nrow=n_cell_y, ncol=n_time_frames)
rownames(animal_y) <- sample(seq(n_cells), n_cell_y)
colnames(animal_y) <- seq(n_time_frames)

# animal_z : 29 cells, 100 time frames
animal_z <- matrix(runif(n_cell_z*n_time_frames),
    nrow=n_cell_z, ncol=n_time_frames)
rownames(animal_z) <- sample(seq(n_cells), n_cell_z)
colnames(animal_z) <- seq(n_time_frames)

# Input list for worm_distnce
X <- list(animal_x=animal_x,
    animal_y=animal_y,
    animal_z=animal_z)

# Pipe Operation
# tsne.perplexity must be adjusted for data size
worm_distance(X, "mSBD") |>
    as_worm_tensor() |>
        worm_membership(k=6) |>
            worm_clustering() |>
                worm_evaluate() |>
                    worm_visualize(tsne.perplexity=5) -> object

Session Information

#> R version 4.4.2 (2024-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.1 LTS
#> 
#> Matrix products: default
#> BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3 
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so;  LAPACK version 3.12.0
#> 
#> locale:
#>  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=C              
#>  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
#>  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
#> 
#> time zone: Etc/UTC
#> tzcode source: system (glibc)
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] WormTensor_0.1.1 rmarkdown_2.29  
#> 
#> loaded via a namespace (and not attached):
#>  [1] ade4_1.7-22         tidyselect_1.2.1    viridisLite_0.4.2  
#>  [4] farver_2.1.2        dplyr_1.1.4         fastmap_1.2.0      
#>  [7] promises_1.3.2      shinyjs_2.1.0       digest_0.6.37      
#> [10] mime_0.12           lifecycle_1.0.4     factoextra_1.0.7   
#> [13] cluster_2.1.8       magrittr_2.0.3      compiler_4.4.2     
#> [16] rlang_1.1.5         sass_0.4.9          tools_4.4.2        
#> [19] yaml_2.3.10         knitr_1.49          ggsignif_0.6.4     
#> [22] labeling_0.4.3      plyr_1.8.9          abind_1.4-8        
#> [25] Rtsne_0.17          withr_3.0.2         purrr_1.0.2        
#> [28] sys_3.4.3           grid_4.4.2          stats4_4.4.2       
#> [31] ggpubr_0.6.0        xtable_1.8-4        e1071_1.7-16       
#> [34] colorspace_2.1-1    aricode_1.0.3       ggplot2_3.5.1      
#> [37] scales_1.3.0        iterators_1.0.14    MASS_7.3-64        
#> [40] cli_3.6.3           generics_0.1.3      RcppParallel_5.1.10
#> [43] RSpectra_0.16-2     reshape2_1.4.4      usedist_0.4.0      
#> [46] cachem_1.1.0        proxy_0.4-27        stringr_1.5.1      
#> [49] splines_4.4.2       modeltools_0.2-23   parallel_4.4.2     
#> [52] clValid_0.7         vctrs_0.6.5         Matrix_1.7-2       
#> [55] jsonlite_1.8.9      carData_3.0-5       car_3.1-3          
#> [58] ggrepel_0.9.6       rstatix_0.7.2       Formula_1.2-5      
#> [61] clue_0.3-66         maketools_1.3.1     foreach_1.5.2      
#> [64] jquerylib_0.1.4     tidyr_1.3.1         glue_1.8.0         
#> [67] dtw_1.23-1          codetools_0.2-20    cowplot_1.1.3      
#> [70] flexclust_1.4-2     uwot_0.2.2          stringi_1.8.4      
#> [73] gtable_0.3.6        rTensor_1.4.8       later_1.4.1        
#> [76] munsell_0.5.1       tibble_3.2.1        pillar_1.10.1      
#> [79] htmltools_0.5.8.1   clusterSim_0.51-5   R6_2.5.1           
#> [82] evaluate_1.0.3      shiny_1.10.0        lattice_0.22-6     
#> [85] backports_1.5.0     dtwclust_6.0.0      broom_1.0.7        
#> [88] httpuv_1.6.15       bslib_0.8.0         class_7.3-23       
#> [91] Rcpp_1.0.14         nlme_3.1-166        mgcv_1.9-1         
#> [94] xfun_0.50           buildtools_1.0.0    pkgconfig_2.0.3