6_Summary.Rmd

---
output: html_document
editor_options: 
  chunk_output_type: console
---


### Metaclustering Matrix


```{r, message=FALSE, warning=FALSE}
table <- table_clusters # [c("Index", "EGA", "EFA_3", "Mixture")]

# Initialize matrix
m <- matrix(data=NA, nrow=nrow(table), ncol=nrow(table), dimnames = list(table$Index, table$Index))


for(row in row.names(m)) {
  for(col in colnames(m)) {
    if(row == col) {
      m[row, col] <- 0
      next
    } 
    subset <- table[table$Index %in% c(row, col), ]
    rez <- sapply(subset[2:ncol(subset)], function(x) {
      if(any(is.na(x))) {
        NA
      } else {
        ifelse(length(unique(x[!is.na(x)])) == 1, 0, 1)
      }
      })
    m[row, col] <- sum(rez, na.rm = TRUE) / length(na.omit(rez))
  }
}
```


```{r, message=FALSE, warning=FALSE}
m <- correlation::cor_sort(m, distance = "raw", hclust_method = "average")
# colnames(m) <- clean_names(colnames(m))
# rownames(m) <- clean_names(rownames(m))
# hrv_cols <- clean_names(hrv_cols)
# colnames(z) <- clean_names(colnames(z))

abs(m - 1) %>% 
  as.data.frame() %>% 
  rownames_to_column("Index1") %>% 
  datawizard::reshape_longer(cols = hrv_cols, colnames_to = "Index2", values_to = "Probability") %>% 
  mutate(Index1 = factor(Index1, levels = row.names(m)),
         Index2 = factor(Index2, levels = rev(row.names(m)))) %>% 
  ggplot(aes(x = Index1, y = Index2)) +
  geom_tile(aes(fill = Probability)) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  labs(x = NULL, y = NULL) +
  scale_fill_gradient2(low = "#2196F3", mid = "#9C27B0", high = "#FF5722", midpoint = 0.5) +
  labs(title = "Metaclustering Heatmap", subtitle = "Probability of being grouped together accross multiple methods.")

# ggsave("figures/figure_probability_matrix.png", width=12*0.9, height=9*0.9, dpi = 450)
```

### Hierarchical Structure of Metaclustering

```{r, message=FALSE, warning=FALSE}
model <- hclust(as.dist(m), method="average") 
clusters <- cutree(model, h = 0.8)

clusters

# clusters2 <- c(
#   pNN50 = 1, pNN20 = 1, CVI = 1, RMSSD = 1, S = 1, HTI = 1,  MCVNN = 1,
#   MadNN = 1, IQRNN = 1, CVNN = 1, SDNN = 1, SD2 = 1, 
#   MeanNN = 2, MedianNN = 2,
#   ApEn = 2, ShanEn = 2, MSE = 2,
#   DFA_alpha2_DimMean = 3,
#   LF = 3, VHF = 3, HF = 3, LnHF = 3,
#   CMSE = 3, RCMSE = 3,
#   LZC = 3, SampEn = 3, 
#   FuzzyEn = 3, TINN = 3,
#   DFA_alpha1_ExpMean = 3, CD = 3, DFA_alpha1_ExpRange = 3, KFD = 3, 
#   SD1SD2 = 4, HFn = 4, HFD = 4,
#   DFA_alpha1_DimMean = 4, DFA_alpha2_ExpRange = 4, DFA_alpha2_DimRange = 4,
#   DFA_alpha2 = 4, DFA_alpha2_ExpMean = 4, CSI_Modified = 4,
#   CSI = 4, LFn = 4, LFHF = 4, DFA_alpha1 = 4, 
#   PAS = 5, PSS = 5, PIP = 5, IALS = 5,
#   C1a = 6, Ca = 6, PI = 6, C2a = 6,
#   DFA_alpha1_DimRange = 6,
#   AI = 6, GI = 6, SI = 6)

# identify center of each cluster 
dis <- parameters::cluster_centers(datawizard::data_transpose(z), clusters)

# reverse dis to have larger value for more central params
centrality <- datawizard::data_rescale(attributes(dis)$distance, to = c(max(attributes(dis)$distance), min(attributes(dis)$distance)))
names(centrality) <- hrv_cols

plot_dendrogram(model, clusters = clusters, ylim = -0.2, nudge_y = -0.03, angle = 90, points = TRUE, dis = centrality, h=0.8) +
  # Lines to show "larger" clusters
    # geom_line(data = data.frame(y = c(-0.02, -0.02), x = c(0, 5)), aes(x = x, y = y), size = 0.5, color = "#607D8B") +
    # geom_line(data = data.frame(y = c(-0.02, -0.02), x = c(6, 15)), aes(x = x, y = y), size = 0.5, color = "#607D8B") +
    # geom_line(data = data.frame(y = c(-0.02, -0.02), x = c(16, 22)), aes(x = x, y = y), size = 0.5, color = "#607D8B") +
    # geom_line(data = data.frame(y = c(-0.02, -0.02), x = c(23, 34)), aes(x = x, y = y), size = 0.5, color = "#607D8B") +
    # geom_line(data = data.frame(y = c(-0.02, -0.02), x = c(35, 42)), aes(x = x, y = y), size = 0.5, color = "#607D8B") +
  theme(legend.position = "none")

# ggsave(file = "figures/meta-dendrogram.svg", plot=dendro, width=15, height=10)
```


### Metaclustering Network

#### Centrality
```{r, message=FALSE, warning=FALSE}
nodes <- data.frame(name = row.names(m),
                    Cluster = as.character(clusters))
row.names(nodes) <- NULL

edges <- m %>%
  as.data.frame() %>% 
  datawizard::data_rename_rows(NULL) %>% 
  datawizard::data_rename(NULL) %>% 
  datawizard::reshape_longer(rows_to = "from", 
                             names_to = "to",
                             values_to = "Probability") %>% 
  mutate(to = as.numeric(to)) %>% 
  filter(Probability > 0.8)


graph <- tidygraph::tbl_graph(nodes = nodes, edges = edges, directed = FALSE) %>% 
  tidygraph::activate(nodes) %>%
  mutate(Centrality = tidygraph::centrality_degree()) 

as.list(graph)$nodes %>% 
  group_by(Cluster) %>% 
  arrange(desc(Centrality)) %>%
  knitr::kable()
```


#### Network Graph

```{r, message=FALSE, warning=FALSE}
graph %>% 
  ggraph::ggraph(layout = "graphopt") + 
    geom_edge_arc(aes(colour = Probability, width = Probability), strength = 0.05) + 
    geom_node_point(aes(colour = Cluster, size = Centrality)) + 
    geom_node_text(aes(label = name), fontface = "bold", repel = FALSE) +
    ggraph::theme_graph() +
    scale_edge_width(range = c(0.05, 0.5), guide = "none") +
    scale_color_material_d(palette = "rainbow", guide = "none") +
    scale_edge_color_gradientn(limits = c(0, 1), colours=c("#607D8B", "#263238"), guide = "none") +
    scale_size_continuous(range = c(4, 20), guide = "none")
```

<!-- #### Create table -->

<!-- ```{r, message=FALSE, warning=FALSE, eval=FALSE, echo=FALSE} -->
<!-- level_1 <- c(rep("Distribution", 17), -->
<!--              rep("Frequency/Complexity", 28), -->
<!--              rep("Harmony", 12)) -->

<!-- level_2 <- c(rep("Dispersion", 12), -->
<!--              rep("Centrality", 5), -->
<!--              rep("Absolute Frequency/Complexity", 15), -->
<!--              rep("Relative Frequency/Complexity", 13), -->
<!--              rep("Heart Rate Fragmentation", 4), -->
<!--              rep("Heart Rate Asymmetry", 8)) -->


<!-- indices <- c("pNN50", "pNN20", "CVI", "RMSSD", "S", "HTI", "MCVNN", -->
<!--              "MadNN", "IQRNN", "CVNN", "SDNN", "SD2", -->

<!--              "MeanNN",   "MedianNN", -->
<!--              "ApEn", "ShanEn", "MSE", -->

<!--              "DFA α2 DimMean", -->
<!--              "LF", "VHF", "HF", "LnHF", -->
<!--              "CMSE", "RCMSE", "LZC", "SampEn", "FuzzyEn", "TINN", "DFA α1 ExpMean", -->
<!--              "CD", "DFA α1 ExpRange", "KFD", -->

<!--              "SD1SD2", "HFn", "HFD", -->
<!--              "DFA α1 DimMean", "DFA α2 ExpRange", "DFA α2 DimRange", -->
<!--              "DFA α2", "DFA α2 ExpMean", "CSI (modified)", -->
<!--              "LFn", "CSI", "LFHF", "DFA α1", -->

<!--              "PAS", "IALS", "PSS", "PIP", -->

<!--              "C1a", "C2a", "Ca", "PI", -->
<!--              "DFA α1 DimRange", -->
<!--              "AI", "GI", "SI") -->

<!-- labels <- c("Proportion of successive NN interval differences larger than 50ms", -->
<!--             "Proportion of successive NN interval differences larger than 20ms", -->
<!--             "Cardiac Vagal Index", -->
<!--             "Root mean square of successive NN interval differences", -->
<!--             "Area of ellipse in Poincaré plot", -->
<!--             "Integral of the density of the NN interval histogram divided by its height", -->
<!--             "MadNN divided by MedianNN", -->
<!--             "The median absolute deviation of the NN intervals", -->
<!--             "The interquartile range (IQR) of the NN intervals", -->
<!--             "SDNN divided by MeanNN", "The standard deviation of the RR intervals", -->
<!--             "The spread of NN intervals on the Poincaré plot along the line of identity.", -->

<!--              "The mean of the NN intervals.", "The median of the NN intervals.", -->
<!--              "The Approximate Entropy", "The Shannon Entropy", "The Multiscale Entropy", -->

<!--              "The multifractal detrended fluctuation analysis (MDFA) corresponding to long-term correlations. Dimmean is the mean of singularity dimensions", -->
<!--              "Power spectrum in the frequency range of 0.04-0.15 Hz", -->
<!--              "Power spectrum in the frequency range of 0.4-0.5 Hz", -->
<!--              "Power spectrum in the frequency range of 0.15-0.4 Hz", -->
<!--              "The natural logarithm of HF", -->
<!--              "The Composite Multiscale Entropy", "The Refined Composite Multiscale Entropy", -->
<!--              "The Lempel-Ziv complexity", "The Sample Entropy", "The Fuzzy Entropy", -->
<!--              "The baseline width of the NN interval histogram", -->
<!--              "The MDFA corresponding to short-term correlation. ExpMean is the mean of singularity exponents", -->
<!--              "Correlation Dimension", "The MDFA corresponding to short-term correlation. ExpRange is the range of singularity exponents", -->
<!--              "Katz Fractal Dimension", -->

<!--              "The ratio between short and long term fluctuations of the NN intervals", "The normalized HF", "Higuchi Fractal Dimension", -->
<!--              "The MDFA corresponding to short-term correlation. DimMean is the mean of singularity dimensions", "The MDFA corresponding to long-term correlation. ExpRange is the range of singularity exponents", "The MDFA corresponding to long-term correlation. DimRange is the range of singularity dimensions", -->
<!--              "The DFA corresponding to long-term correlation", "The MDFA corresponding to long-term correlation. ExpMean is the mean of singularity exponents", "The Cardiac Symapathetic Index (modified)", -->
<!--              "The normalized LF", "The Cardiac Sympathetic Index", "The ratio between LF and HF", "The DFA corresponding to short-term corrleation", -->

<!--              "The percentage of NN intervals in alternation segments", -->
<!--              "The inverse of the average length of the acceleration/deceleration segments", -->
<!--              "The percentage of short segments", -->
<!--              "The percentage of inflection points of the NN intervals series", -->

<!--              "The contributions of heart rate accelerations to short-term HRV", -->
<!--             "The contributions of heart rate accelerations to long-term HRV", -->
<!--             "The total contributions of heart rate accelerations to HRV", -->
<!--             "The Porta's Index", -->
<!--              "The MDFA corresponding to short-term correlation. DimRange is the range of singularity dimensions", -->
<!--              "The Area Index", "The Guzik's Index", "The Slope Index") -->


<!-- df_summary <- data.frame(level_1, level_2, indices, labels) -->
<!-- df_summary$Centrality <- as.data.frame(centrality)[df_summary$indices, ] -->
<!-- df_summary <- df_summary %>% -->
<!--   group_by(level_1, level_2) %>% -->
<!--   arrange(desc(Centrality), .by_group = TRUE) %>%  -->
<!--   mutate(Centrality = round(Centrality, digits = 1)) -->
<!--   # rename("Level 1" = "level_1", -->
<!--   #        "Level 2" = "level_2", -->
<!--   #        "HRV Indices" = "indices", -->
<!--   #        "Description" = "labels") -->
<!-- write.csv(df_summary, "figures/Summary_Table.csv", row.names = FALSE) -->


<!-- ``` -->