---
title: ""
author: ""
date: ""
abstract: ""
subtitle: ""
output: word_document
---

\pagenumbering{gobble}


```{r, echo=T, message=F, warning=F, eval = F}

# Loads or install packages function ----
using <-function(...) {
    libs<-unlist(list(...))
    req<-unlist(lapply(libs,require,character.only=TRUE))
    need<-libs[req==FALSE]
    if(length(need)>0){ 
        install.packages(need)
        lapply(need,require,character.only=TRUE)
    }
}

# Loading the libraries calling the 'using' function  
using("frequencyConnectedness",
      "lubridate",
      "tidyverse",
      "rugarch",
      "vars",
      "xlsx",
      "readxl",
      "tseries",
      "corrplot",
      "latticeExtra",
      "ggrepel",
      "moments"
      )

```

\newpage 
```{r, echo=T, message=F, warning=F, eval=F}

# ======================================================================= #
# ======================== Loading Data ================================= #
# ======================================================================= #

# Setting working directory ----
setwd("~WORKING DIRECTORY")

# Getting market cap info 
mcap <- read_excel("MarketCap_250.xlsx", sheet = "Selected")

# Loading the price data ----
data <- read_excel("Crypto_prices.xlsx", sheet = "prices")
# ====================================================================== #

# price sub-samples --
price_18 <- data %>% filter(year(Date) == 2018)
price_19 <- data %>% filter(year(Date) == 2019)
price_20 <- data %>% filter(year(Date) == 2020)
price_21 <- data %>% filter(year(Date) == 2021)
price_22 <- data %>% filter(year(Date) == 2022)

# Computing returns 
r_all <- apply(log((cbind(data[,-1]))), 2, diff)

# Computing returns sub periods 
# Get a list of unique years from the date column
years <- unique(as.numeric(format(data$Date, "%Y")))

# Create an empty list to store the new tibble objects
yearly_data_list <- list()

# Loop through the years and filter the data for each year
for (year in years) {
  year_data <- data %>% filter(as.numeric(format(Date, "%Y")) == year)
  yearly_data_list[[as.character(year)]] <- year_data
}

# Creating sub periods of the data set 
r_18 <- apply(log((cbind(yearly_data_list$`2018`[,-1]))), 2, diff)
r_19 <- apply(log((cbind(yearly_data_list$`2019`[,-1]))), 2, diff)
r_20 <- apply(log((cbind(yearly_data_list$`2020`[,-1]))), 2, diff)
r_21 <- apply(log((cbind(yearly_data_list$`2021`[,-1]))), 2, diff)
r_22 <- apply(log((cbind(yearly_data_list$`2022`[,-1]))), 2, diff)
```

\newpage 


```{r, eval=F}
# ======================================================================= #
# ===================== Creating functions ============================== #
# ======================================================================= #


# ====================================================================== #
# ================ Descriptive statistics ============================== #
# ====================================================================== #

descriptive_table <- function(data = r_all){
  descriptive_table_all <- tibble(
  Mean = colMeans(data)*365,
  StdDev = apply(data, 2, sd)*sqrt(365),
  Min = apply(data, 2, min),
  Max = apply(data, 2, max),
  Kurtosis = apply(data, 2, kurtosis),
  Skewness = apply(data, 2, skewness)
  ) %>%
  mutate(
    Sharpe = round(Mean/StdDev,5)
    )
  return(descriptive_table_all)
}
#========================================================================#
# ----

# ====================================================================== #
# ===================== Graph functions ================================ #
# ====================================================================== #

# Making a template for line graphs ----
line_graph <- function(data, 
                       x, 
                       y, 
                       Title="", 
                       ylab="", 
                       xlab="", 
                       isDate=FALSE, 
                       xmax = 10, 
                       xmin = 0,
                       len = 6) {
  
  if(isDate == TRUE){
      data %>%
    ggplot(aes(x = as.Date(x), y = y)) + 
    geom_line(size = 0.6,color = "#45BCCA") +
    labs(
     title = Title,
    # subtitle = "Monthly spot price observations",
     x = xlab,
     y = ylab
     ) +
    # General theme
    theme_base(base_size = 14) +
    # Setting the theme details
    theme(
     axis.title = element_text(size = 11),
     title = element_text(colour = "Black", size = 12, family = "sans"),
     plot.subtitle = element_text(size = 10, hjust = 0.5),
     plot.title = element_text(hjust = 0.5),
     panel.grid.major = element_blank(),
     panel.grid.minor = element_blank(),
     panel.background = element_blank(),
     panel.border =  element_rect(colour = "gray", fill=NA, size=0.5)
        ) +
      scale_x_date(date_labels =  "%Y", date_breaks = "1 year") +
      scale_y_continuous(breaks = seq(xmin, xmax, len = len))
  } else{
    data %>%
    ggplot(aes(x = x, y = y)) + 
    geom_line(size = 0.6,color = "#45BCCA") +
    labs(
     title = Title,
    # subtitle = "Monthly spot price observations",
     x = xlab,
     y = ylab
     ) +
    # General theme
    theme_base(base_size = 14) +
    # Setting the theme details
    theme(
     axis.title = element_text(size = 11),
     title = element_text(colour = "Black", size = 12, family = "sans"),
     plot.subtitle = element_text(size = 10, hjust = 0.5),
     plot.title = element_text(hjust = 0.5),
     panel.grid.major = element_blank(),
     panel.grid.minor = element_blank(),
     panel.background = element_blank(),
     panel.border =  element_rect(colour = "gray", fill=NA, size=0.5)
        ) 
      
    }
}

#========================================================================#
# ----

# Correlation plot ----
corr_plot <- function(data = m_all){
  corrplot(data, tl.cex = 0.55, order = "original", 
           tl.col = "Black", 
           type = "lower", 
           tl.srt = 360,
           tl.offset = 1,
           cl.ratio = 0.1
           )
}

#========================================================================#
# ----

# Function for rolling spillover plot ----
rolling_spillover_plot <- function(data = rolling_return, 
                                   title = "Total return spillover index"){

  # Create ggplot object
  rolling_plot <- ggplot(data, aes(x = as.Date(date))) +
    # Add line for sp_100
    geom_line(aes(y = sp_100, color = "sp_100")) +
    # Add line for sp_200
    geom_line(aes(y = sp_200, color = "sp_200")) +
    # Add line for sp_300
    geom_line(aes(y = sp_300, color = "sp_300")) +
    # Add legend for line colors
    scale_color_manual(values = c("sp_100" = "red", 
                                  "sp_200" = "blue", 
                                  "sp_300" = "green"),
                       ) +
    # Add axis labels and plot title
    xlab("") + ylab("Percentage") +
    ggtitle(title) + 
    theme_classic() +
    theme(
       axis.title = element_text(size = 11),
       title = element_text(colour = "Black", size = 12, family = "sans"),
       plot.subtitle = element_text(size = 10, hjust = 0.5),
       plot.title = element_text(hjust = 0.5),
       panel.grid.major = element_blank(),
       panel.grid.minor = element_blank(),
       panel.background = element_blank(),
       panel.border =  element_rect(colour = "gray", fill=NA, size=0.5),
          ) + 
    scale_x_date(date_labels =  "%Y", date_breaks = "1 year") +
    scale_y_continuous(breaks=(seq(0, 100, 5)), limits = c(85, 100))
  
}

#========================================================================#
# ----

#========================================================================#
# =================== VAR with HAC errors  ============================= #
#========================================================================#

# Modification of VAR from the vars package to take HAC ----
"VAR_HAC" <- function (y,
                       p = 1,
                       type = c("const", "trend", "both", "none"),
                       season = NULL,
                       exogen = NULL,
                       lag.max = NULL,
                       ic = c("AIC", "HQ", "SC", "FPE"),
                       se_type = c("HC0", "HC1", "HC2", "HC3")
                       ) {
  
  y <- as.matrix(y)
  
  if (any(is.na(y))) 
    stop("\nNAs in y.\n")
  
  if (ncol(y) < 2) 
    stop("The matrix 'y' should contain at least two variables. 
         For univariate analysis consider ar() and arima() in package stats.\n")
  
  if (is.null(colnames(y))) {
    colnames(y) <- paste("y", 1:ncol(y), sep = "")
    warning(paste("No column names supplied in y, using:", 
                  paste(colnames(y), collapse = ", "), ", instead.\n"))
  }
  
  colnames(y) <- make.names(colnames(y))
  y.orig <- y
  type <- match.arg(type)
  obs <- dim(y)[1]
  K <- dim(y)[2]
  if(!is.null(lag.max)){
    lag.max <- abs(as.integer(lag.max))
    ic <- paste(match.arg(ic), "(n)", sep = "")
    p <- VARselect(y, 
                   lag.max = lag.max, 
                   type = type, 
                   season = season, 
                   exogen = exogen)$selection[ic]
  }
  sample <- obs - p
  ylags <- embed(y, dimension = p + 1)[, -(1:K)]
  temp1 <- NULL
  for (i in 1:p) {
    temp <- paste(colnames(y), ".l", i, sep = "")
    temp1 <- c(temp1, temp)
  }
  colnames(ylags) <- temp1
  yend <- y[-c(1:p), ]
  if (type == "const") {
    rhs <- cbind(ylags, rep(1, sample))
    colnames(rhs) <- c(colnames(ylags), "const")
  }
  else if (type == "trend") {
    rhs <- cbind(ylags, seq(p + 1, length = sample))
    colnames(rhs) <- c(colnames(ylags), "trend")
  }
  else if (type == "both") {
    rhs <- cbind(ylags, rep(1, sample), seq(p + 1, length = sample))
    colnames(rhs) <- c(colnames(ylags), "const", "trend")
  }
  else if (type == "none") {
    rhs <- ylags
    colnames(rhs) <- colnames(ylags)
  }
  if (!(is.null(season))) {
    season <- abs(as.integer(season))
    dum <- (diag(season) - 1/season)[, -season]
    dums <- dum
    while (nrow(dums) < obs) {
      dums <- rbind(dums, dum)
    }
    dums <- dums[1:obs, ]
    colnames(dums) <- paste("sd", 1:ncol(dums), sep = "")
    rhs <- cbind(rhs, dums[-c(1:p), ])
  }
  if (!(is.null(exogen))) {
    exogen <- as.matrix(exogen)
    if (!identical(nrow(exogen), nrow(y))) {
      stop("\nDifferent row size of y and exogen.\n")
    }
    if (is.null(colnames(exogen))) {
      colnames(exogen) <- paste("exo", 1:ncol(exogen), 
                                sep = "")
      warning(paste("No column names supplied in exogen, using:", 
                    paste(colnames(exogen), collapse = ", "), ", instead.\n"))
    }
    colnames(exogen) <- make.names(colnames(exogen))
    tmp <- colnames(rhs)
    rhs <- cbind(rhs, exogen[-c(1:p), ])
    colnames(rhs) <- c(tmp, colnames(exogen))
  }
  
  
  datamat <- as.data.frame(rhs)
  colnames(datamat) <- colnames(rhs)
  equation <- list()
  library(estimatr)
  for (i in 1:K) {
    y <- yend[, i]
    equation[[colnames(yend)[i]]] <- lm_robust(y ~ -1 + ., 
                                               data = datamat, 
                                               se_type = se_type, 
                                               return_vcov = T)
    
    if(any(c("const", "both") %in% type)){
      attr(equation[[colnames(yend)[i]]]$terms, "intercept") <- 1
    }
  }
  call <- match.call()
  if("season" %in% names(call)) call$season <- eval(season)
  result <- list(varresult = equation, datamat = data.frame(cbind(yend, 
                                                                  rhs)), 
                 y = y.orig, 
                 type = type, 
                 p = p, 
                 K = K, 
                 obs = sample, 
                 totobs = sample + p, 
                 restrictions = NULL, 
                 call = call)
  class(result) <- "varest"
  return(result)
}
#========================================================================#
# ----

#========================================================================#
# ==================== Extended spillover table ======================== #
#========================================================================#

# Better function for decomposition of the VAR model ----
spillover_table <- function(VAR_data = "", n.ahead = 10, no.corr = F)
  {
  spillover12 <- spilloverDY12(VAR_data, n.ahead = n.ahead, no.corr = no.corr)
  # Splitting the data
  split_data <- as_tibble(unlist(spillover12[1])) %>%
    group_split(gl(n()/dim(VAR_data$y)[2], dim(VAR_data$y)[2]))
  # converting the new data, to column
  new_data <- bind_cols(split_data) %>% select(starts_with("value"))
  # Renaming the columns
  colnames(new_data) <- colnames(VAR_data$y)
  # Creating spillover table
  spillover_table = new_data[] %>%
    add_column(tickers = colnames(VAR_data$y), .before = 1) %>%
    mutate_if(is.numeric,funs(.* 100)) %>%
    mutate_if(is.numeric,funs(round(.,2)))
  # Adding Contribution to others, including to it self and others
  spillover_table_2 <- rbind(rbind(spillover_table, 
                                   c("Contribution to self and others",
                                     colSums(spillover_table[,-1]))),
                             c("Contribution to others", 
                               colSums(spillover_table[,-1] %>% 
                                         replace(., col(.) == row(.), 0))))
  
  # List of contribution to others ----
  c_to = pivot_longer(spillover_table_2[nrow(spillover_table_2),], 
                      cols = everything(),
                      names_to = "tickers",
                      values_to = "contribution to others") %>%
    slice(-1) %>%
    mutate(
    `contribution to others` = as.numeric(`contribution to others`)
  )
  # list of contrebution from others ----
  c_from = tibble(
    tickers = names(VAR_data$varresult),
    `contribution from others` = rowSums(spillover_table[,-1] %>% 
                                           replace(., col(.) == row(.), 0))
    )
  # Complete table
  complete_spillover_table = tibble(
    spillover_table_2,
    c_from = unlist(list(c(c_from$`contribution from others`, NA, NA)))
  )
  return(complete_spillover_table)
}
#========================================================================#
# ----

#========================================================================#
# ========= ARMA-GARCH information criterion loop ====================== #
#========================================================================#

# Findig optimal orders for GARCH-type model ----
opt_orders <- function(data = data, max_order = 3, type = "eGARCH"){
  
  data = r_all
  
  # Defining the range of possible orders for arma and garch
  arma_order = c(max_order,max_order)
  garch_order = c(max_order,max_order)
  
  # Creating a list of all the possible combinations
  orderList <- tibble(
    expand.grid(0:arma_order[1], 0:arma_order[2], 0:garch_order[1], 1:garch_order[2])
  ) %>%
    unite("armaOrder", 
          Var1, 
          Var2, 
          sep = ",") %>% 
    unite("garchOrder", Var3, Var4,sep = ",") %>%
    mutate(
      armaOrder = paste("c(", armaOrder, ")", sep= ""),
      garchOrder = paste("c(", garchOrder, ")", sep= ""),
    )
  
  # data frame to store results for all time series
  all_results <- data.frame()
  
  # loop through each return series
  for (i in 1:ncol(data)) {
  
    # data frame to store results for current time series
    results <- data.frame()
  
    # loop through all combinations of garch and arma order
    for (j in 1:nrow(orderList)) {
  
      # get garch and arma order for current combination
      go <- as.character(orderList$garchOrder[j])
      ao <- as.character(orderList$armaOrder[j])
  
      # create ugarchspec model for current combination
      fit.spec <- ugarchspec(variance.model = list(model = type, 
                                                   garchOrder = eval(parse(text=go))),
                             mean.model = list(armaOrder = eval(parse(text=ao)),
                                               include.mean = TRUE), 
                             distribution.model = "std"
                             )
  
      # fit the model to current time series
      garchfit <- ugarchfit(data = data[,i], spec = fit.spec, solver = "hybrid")
  
      # retrieve the information criteria
      info <- infocriteria(garchfit)[1]
  
      # store results for current combination in results data frame
      results <- rbind(results, 
                       data.frame(ticker = colnames(data)[i], 
                                  go = go, 
                                  ao = ao, 
                                  info = info))
    }
  
    # find the order that produces the lowest information criterion
    min_results <- results %>%
      filter(info == min(results$info))
  
    # append the minimum results to all_results data frame
    all_results <- rbind(all_results, min_results)
  }
  return(all_results)
}

#========================================================================#
# ----

#========================================================================#
# ===================== Multi ARCH test ================================ #
#========================================================================#

# ARCH-test for multiple variabels ----
multi_ARCH <- function(VAR_model = var_return_all, max_order = 10){
  
  arch_results <- data.frame(lags = numeric(),
                             test_stat = numeric(),
                             p_value = numeric())
  
  # for loop to test for ARCH effects up to 10 lags
  for (i in 1:max_order) {
    # run arch test
    arch_test <- arch.test(VAR_model, lags.multi = i, multivariate.only = TRUE)
    
    # extract relevant information from arch test result
    lags <- i
    test_stat <- arch_test$arch.mul$statistic
    p_value <- arch_test$arch.mul$p.value
    
    # add results to data frame
    arch_results <- rbind(arch_results, data.frame(lags = lags,
                                                   test_stat = test_stat,
                                                   p_value = p_value))
  } 
  return(arch_results)
}
#=======================================================================#
# ----

#========================================================================#
# =================== Multi serial test ================================ #
#========================================================================#

# Brush-Godfrey test for mulitple variables ----
multi_BG <- function(VAR_model = var_return_all, max_order = 10){
  
  BG_results <- data.frame(lags = numeric(),
                             test_stat = numeric(),
                             p_value = numeric())
  
  # for loop to test for ARCH effects up to 10 lags
  for (i in 1:max_order) {
    # run arch test
    serial_test <- serial.test(VAR_model, lags.bg = i, type = "BG")
    
    # extract relevant information from arch test result
    lags <- i
    test_stat <- serial_test$serial$statistic
    p_value <- serial_test$serial$p.value
    
    # add results to data frame
    BG_results <- rbind(BG_results, data.frame(lags = lags,
                                                   test_stat = test_stat,
                                                   p_value = p_value))
  } 
  return(BG_results)
}

#=======================================================================#
# ----

#=======================================================================#
# ===================== Multi ADF test ================================ #
#=======================================================================#

# ADF for multiple variables ----
multi_ADF <-function(series = r_all, IC = c("AIC", "HQ")){
  
  # Creating a list outside loop that will contain optimal lags
  results_list <- list()
  
  if (IC == "AIC") {
    for (i in 1:ncol(series)) {
    ts <- series[,i]
    optimal_lag_AIC  <- VARselect(ts)$selection[[1]]
    optimal_lag_HQ  <- VARselect(ts)$selection[[2]]
    adf_test <- adf.test(ts, k=opt_lag_all)
    
     results_list[[i]] <- data.frame(
          asset_name = colnames(series)[i],
          AIC_lag = optimal_lag_AIC,
          p_value = adf_test$p.value,
          test_statistic = adf_test$statistic
          )
  }
  results_tibble <- bind_rows(results_list)
  # results_tibble
    
  return(results_tibble)
  } else{
    
    for (i in 1:ncol(series)) {
    ts <- series[,i]
    optimal_lag_AIC  <- VARselect(ts)$selection[[1]]
    optimal_lag_HQ  <- VARselect(ts)$selection[[2]]
    adf_test <- adf.test(ts, k=opt_lag_all)
    
     results_list[[i]] <- data.frame(
          asset_name = colnames(series)[i],
          HQ_lag = optimal_lag_HQ,
          p_value = adf_test$p.value,
          test_statistic = adf_test$statistic
          )
  }
  results_tibble <- bind_rows(results_list)
  # results_tibble
    
  return(results_tibble)
    
  }
  # Augmented Dicky-Fuller: Unit root test with optmal lag order ---- 
  
}
#========================================================================#
# ----

#========================================================================#
# ====================== Criteria table ================================ #
#========================================================================#

# Displaying optimal lags given different types of information criterion ----
criterion_table <- function(data = r_all, type = c("r", "v")){

  r_criterion <- data.frame(criterion = c("AIC", "HQ", "SC", "FPE"), 
                   lag_all = integer(4), 
                   lag_18 = integer(4), 
                   lag_19 = integer(4), 
                   lag_20 = integer(4), 
                   lag_21 = integer(4), 
                   lag_22 = integer(4))
  
  # iterate over criteria
  for (i in 1:4) {
    # get lag_all for criterion i
    lag_all <- vars::VARselect(data, lag.max = 10, type = "const")$selection[[i]]
    # iterate over years
    for (year in c("18", "19", "20", "21", "22")) {
      # get lag for criterion i and year
      lag <- vars::VARselect(get(paste0(type, "_", year)), 
                             lag.max = 10, 
                             type = "const")$selection[[i]]
      # fill in data.frame
      r_criterion[i, paste0("lag_", year)] <- lag
    }
    
    # fill in lag_all column
    r_criterion[i, "lag_all"] <- lag_all
    
  }
  return(r_criterion)
}
#========================================================================#
# ----

#========================================================================#
# ============== Display significant values of VAR ===================== #
#========================================================================#

# Extracting p-values from the VAR model----
VAR_signif <- function(VAR_model = var_return_all){
  
  coef <- as_tibble(coef(VAR_model))
  
  p_values <- as_tibble(lapply(coef, "[", , "Pr(>|t|)"))
  
  p_values <- p_values %>%
    mutate(colnames(VAR_model$datamat)[49:(length(VAR_model$datamat))], .before = 1)
    
  p_values <- p_values %>% rename("Regressor" = colnames(p_values[,1]))

  return(p_values)
}
# =======================================================================#
# ----

#========================================================================#
# ==================== Rolling Spillovers ============================== #
#========================================================================#

# Function for making a tibble of rolling spillvoers ----
rolling_spillovers <- function(r_series = r_all){
  
  opt_lag_all = vars::VARselect(r_series, lag.max = 10, type = "const")$selection[2]
  
  # Generating rolling data ----
  time_all <- as.character(as.Date(data[-1,]$Date, "%Y,%M,%D", tz = "UTC"))
  
  # Add the dates as an index to the return series 
  rownames(r_series) <- time_all
  
  # set the estimate parameters for the rolling window
  params_est = list(p = opt_lag_all, type = "const")
  
  
  # Get the rolling window estimates
  sp_return_300 <- spilloverRollingDY12(r_series, 
                                        n.ahead = 10, 
                                        no.corr = F, 
                                        "VAR", 
                                        params_est = params_est, 
                                        window = 300)
  
  sp_return_200 <- spilloverRollingDY12(r_series, 
                                        n.ahead = 10, 
                                        no.corr = F, 
                                        "VAR", 
                                        params_est = params_est, 
                                        window = 200)
  
  sp_return_100 <- spilloverRollingDY12(r_series, 
                                        n.ahead = 10, 
                                        no.corr = F, 
                                        "VAR", 
                                        params_est = params_est, 
                                        window = 100)
  
  
  sp_100 <- data.frame(
    date = data$Date[(100+1):length(data$Date)],
    sp_100 = overall(sp_return_100)
  )
  names(sp_100)[2] <- "sp_100"
  
  sp_200 <- data.frame(
    date = data$Date[(200+1):length(data$Date)],
    sp_200 = overall(sp_return_200)
  )
  names(sp_200)[2] <- "sp_200"
  
  sp_300 <- data.frame(
    date = data$Date[(300+1):length(data$Date)],
    sp_300 = overall(sp_return_300)
  )
  names(sp_300)[2] <- "sp_300"
  
  # Combine the data frames using bind_rows()
  combined_data <- bind_rows(sp_100, sp_200, sp_300)
  
  # Group by date and calculate sum of numbers for each date
  result <- combined_data %>% 
              group_by(date) %>% 
              summarize(sp_100 = sum(sp_100, na.rm = TRUE), 
                        sp_200 = sum(sp_200, na.rm = TRUE), 
                        sp_300 = sum(sp_300, na.rm = TRUE)) %>%
              # Replace 0 values with NA
              mutate_all(~ replace(., . == 0, NA))
  return(result)
}
#========================================================================#
# ----

#========================================================================#
# ================== Similarity index ================================== #
#========================================================================#

# Test similarity index ----
similarity_index <- function(A, B) {
  n <- length(A)
  ranks_A <- sapply(A, function(x) match(x, B))
  ranks_B <- 1:n
  distances <- abs(ranks_A - ranks_B)
  similarities <- 1 - distances / pmax(n - ranks_B, ranks_B - 1)
  mean(similarities)
}


# Function for repeated minimum return similarity index
similarity_index_min <- function(listA, listB, n_reps) {
  min_sim_index <- Inf
  for (i in 1:n_reps) {
    listB <- sample(listA)
    sim_index <- similarity_index(listA, listB)
    if (sim_index < min_sim_index) {
      min_sim_index <- sim_index
    }
  }
  return(min_sim_index)
}

#========================================================================#
# ----

```


```{r, eval=F}

# ====================================================================== #
# ================ Descriptive statistics ============================== #
# ====================================================================== #

descriptives <- descriptive_table(r_all)


# Statistics quantile table for full period ----
quantile_full <- tibble(
  Quantile = c("25%", "50%", "75%"),
  Mean = round(quantile(descriptives$Mean, c(0.25,0.5,0.75)),2),
  StdDev = round(quantile(descriptives$StdDev, c(0.25,0.5,0.75)),2),
  Min = round(quantile(descriptives$Min, c(0.25,0.5,0.75)),2),
  Max = round(quantile(descriptives$Max, c(0.25,0.5,0.75)),2),
  Kurtosis = round(quantile(descriptives$Kurtosis, c(0.25,0.5,0.75)),2),
  Skewness = round(quantile(descriptives$Skewness, c(0.25,0.5,0.75)),2),
  Sharpe = round(quantile(descriptives$Sharpe, c(0.25,0.5,0.75)),2)
)

# Correlations ----
m_all = cor(r_all, method = "spearman")

# Print plot 
corr_plot(m_all)



#  The equally weighted index ----

# Create a date object outside loop
EW_index <- data.frame(date = data[,1])

# Iterate and make Equally weighted index 
for (i in 1:nrow(data[,-1])) {
  indx <- sum(data[i,-1])/ncol(data[,-1])  
  EW_index[i,2] <- indx
}

# Get natural log returns 
EW_index_return <- EW_index %>% mutate(across(2:ncol(.), ~ (log(.x / lag(.x))))) %>% slice(-1) 

# Combine date, returns, and index
data <- data.frame(
  Year = EW_index[,1][1:(length(EW_index$V2)-1)],
  Volatility = EW_index_return$V2,
  Index = EW_index$V2[1:(length(EW_index$V2)-1)]
)

# construct separate plots for each series
obj1 <- xyplot(Volatility ~ Year, data, type = "l" , lwd=1.5, col="#42D0E1")
obj2 <- xyplot(Index ~ Year, data, type = "l", lwd=1.5, col="#343232") 

# Make the plot with second y axis:
EW_plot <- doubleYScale(obj1, obj2, add.ylab2 = TRUE)


```



```{r, eval=F}


#========================================================================#
# =================== VAR model Returns ================================ #
#========================================================================#

# IC criteria for VAR models ----
p = 2 # AIC(n):1,   HQ(n):2,  SC(n):3,  FPE(n): 4

# Selecting optimal lag order for the VAR model, given "p" ----
opt_lag_all = vars::VARselect(r_all, lag.max = 10, type = "const")$selection[p]
opt_lag_18 = vars::VARselect(r_18, lag.max = 10, type = "const")$selection[p]
opt_lag_19 = vars::VARselect(r_19, lag.max = 10, type = "const")$selection[p]
opt_lag_20 = vars::VARselect(r_20, lag.max = 10, type = "const")$selection[p]
opt_lag_21 = vars::VARselect(r_21, lag.max = 10, type = "const")$selection[p]
opt_lag_22 = vars::VARselect(r_22, lag.max = 10, type = "const")$selection[p]

# Returning optimal lag orders for all samples ----
VAR_Return_lags <- criterion_table(data = r_all, type = "r")

# Estimation of VAR model fro return series ----
var_return_all = vars::VAR(r_all, p = opt_lag_all, type = "const")
var_return_18 = vars::VAR(r_18, p = opt_lag_18, type = "const")
var_return_19 = vars::VAR(r_19, p = opt_lag_19, type = "const")
var_return_20 = vars::VAR(r_20, p = opt_lag_20, type = "const")
var_return_21 = vars::VAR(r_21, p = opt_lag_21, type = "const")
var_return_22 = vars::VAR(r_22, p = opt_lag_22, type = "const")

# Estimation of VAR model Including HAC ----
var_return_all_HAC = VAR_HAC(r_all, p = opt_lag_all, type = "const", se_type = "HC2")
var_return_18_HAC = VAR_HAC(r_18, p = opt_lag_18, type = "const", se_type = "HC2")
var_return_19_HAC = VAR_HAC(r_19, p = opt_lag_19, type = "const", se_type = "HC2")
var_return_20_HAC = VAR_HAC(r_20, p = opt_lag_20, type = "const", se_type = "HC2")
var_return_21_HAC = VAR_HAC(r_21, p = opt_lag_21, type = "const", se_type = "HC2")
var_return_22_HAC = VAR_HAC(r_22, p = opt_lag_22, type = "const", se_type = "HC2")

# Extracting p-values from the VAR model----
var_pvals <- VAR_signif(VAR_model = var_return_all_HAC)

# Show only significant values as % of total observations ----
signif_VAR_return <- VAR_signif(VAR_model = var_return_all_HAC) %>%
  mutate(across(-1, ~ ifelse(. <= 0.05, ., NA))) %>%
  summarize(across(everything(), ~ sum(!is.na(.))/n()*100, .names = "{.col}")) %>%
  pivot_longer(everything(), names_to = "Regressors", values_to = "Percentage") %>% slice(-1) %>%
  arrange(desc(Percentage))


# ====================================================================== #
# ================== Diagnostics for return ============================ #
# ====================================================================== #

# Augemted Dick Fuller test on the returns ----
adf_test <- multi_ADF(series = r_all, IC="AIC")

# Serial correlation ----
serial_test_all <- multi_BG(var_return_all, max_order = 10)

# Heteroskedasticity ----
arch_test <- multi_ARCH(var_return_all, max_order = 10)

# Normal distribution of the residuals ----
norm_test_all <- normality.test(var_return_all, multivariate.only = TRUE)

norm_test_tibble <- tibble(
  Values = c("test statistic", "p-value"),
  JB_Test = c(norm_test_all$jb.mul$JB$statistic, norm_test_all$jb.mul$JB$p.value),
  kurtosis = c(norm_test_all$jb.mul$Kurtosis$statistic, norm_test_all$jb.mul$Kurtosis$p.value),
  skewness = c(norm_test_all$jb.mul$Skewness$statistic, norm_test_all$jb.mul$Skewness$p.value)
)


# Testing for structural breaks in the residuals of the VAR model ----
stability_test <- stability(var_return_all, type = "OLS-CUSUM")

# List outside loop 
stability_plots <- list()
# OLS-based CUSUM test
for (i in stability_test$names){
  stability_plots[[i]] <- plot(stability_test$stability[[i]], main = i)
}

  
```



```{r, eval=F}

#========================================================================#
# ===================== Return Spillovers ============================== #
#========================================================================#

# Creating spillover talbes for all periods ----
spillovertb_all = spilloverDY12(var_return_all, n.ahead = 10, no.corr = F)
spillovertb_18 = spilloverDY12(var_return_18, n.ahead = 10, no.corr = F)
spillovertb_19 = spilloverDY12(var_return_19, n.ahead = 10, no.corr = F)
spillovertb_20 = spilloverDY12(var_return_20, n.ahead = 10, no.corr = F)
spillovertb_21 = spilloverDY12(var_return_21, n.ahead = 10, no.corr = F)
spillovertb_22 = spilloverDY12(var_return_22, n.ahead = 10, no.corr = F)

# Making a spillover table object (returning tibble)
spillover_table_return <- spillover_table(VAR_data = var_return_all, n.ahead = 10, no.corr = F)
spillover_index <- round(sum(spillover_table_return[1:48,50])/48,2) 


#========================================================================#
# ==================== Rolling Spillovers ============================== #
#========================================================================#

# Getting rolling spillover data for 100, 200, and 300 rolling days ----
# rolling_return <- rolling_spillovers(r_series = r_all)

# Retrieving pre-saved rollover data 
rolling_return <- read_excel("rolling_spillover_return.xlsx", sheet = "rolling_spillover")

# Plotting rolling spillover
rolling_spillover_graph <- rolling_spillover_plot(data = rolling_return, title = "Total return spillover index")

# Printing graph
rolling_spillover_graph

```



```{r, eval=F}
#========================================================================#
# ====================== Marginal model ================================ #
#========================================================================#

# # Optimal orders for all GARCH-type models. 
# gna_order <- opt_orders(data = r_all, max_order = 1, type = "eGARCH")

# Loading pre-estimated optimal orders ----
gna_order <- read_excel("InfoCriteria_results.xlsx", sheet = "AIC OR HQC")

# Initializing a list to store the GARCH model results (Outside loop)
garch_results <- list()
  
for (i in 1:nrow(gna_order)) {
  
  # Extract the garchOrder and armaOrder values and convert them to numeric vectors
  garchOrder_vec <- as.numeric(strsplit(gsub("c\\(|\\)", "", gna_order$garchOrder[i]), ",")[[1]])
  armaOrder_vec <- as.numeric(strsplit(gsub("c\\(|\\)", "", gna_order$armaOrder[i]), ",")[[1]])
  
  # Create a row_spec object for the current row of gna_order
  row_spec <- ugarchspec(variance.model = list(model = "eGARCH", 
                                               garchOrder = garchOrder_vec),
                         mean.model = list(armaOrder = armaOrder_vec, 
                                           include.mean = TRUE),
                         distribution.model = "std")
  # Fit the GARCH model for the i-th column
  garch_results[[i]] <- ugarchfit(data = r_all[,i], spec = row_spec)
}

# Assign the results to garch_results_i
for (i in 1:ncol(r_all)) {
  assign(paste0("garch_results_", i), garch_results[[i]])
}

# Returning P-values or Estimates for GARCH model ---- 

# function to generate tibble for each garch result
generate_tibble <- function(result) {
  coef_names <- names(result@fit$coef)
  Estimats <- round(as.tibble(result@fit$robust.matcoef)[, 1],4)
  p_values <- round(as.tibble(result@fit$robust.matcoef)[, 4],4)
  tibble(coef_names, Estimats, p_values)
}

# generate a list of tibbles using the generate_tibble function
tibble_list <- map(garch_results, generate_tibble)

# Create a function that takes a tibble and returns it grouped by `coef_names`
group_by_coef_names <- function(tib, select = "Estimate") {
  tib %>%
    group_by(coef_names) %>%
    summarise_at(vars(contains(select)), mean, na.rm = TRUE)
}

# Use purrr::reduce to apply the function to all tibbles in the list, combining them into one tibble
combined_tibble <- purrr::reduce(tibble_list, full_join, by = "coef_names") %>%
  group_by_coef_names() %>% 
  rename_with(~gsub(".USD", "", colnames(r_all)), -coef_names)


```


```{r, eval=F}

#========================================================================#
# ================= Marginal model diagnostics ========================= #
#========================================================================#

# Loop over the garch_results_i objects and extract the residuals
for (i in 1:48) {
  # Extract the ith garch_results_ object
  fit_opt <- get(paste0("garch_results_", i))
  
  # Extract the residuals from the ith ugarchfit object
  resid_opt <- residuals(fit_opt, standardize = TRUE)
  
  # Store the residuals in a separate object named resid_i
  assign(paste0("resid_opt_", i), resid_opt)
}

# Autocorrelation (Ljung-Box Tests) ----
# Create an empty data frame to store the p-values
Box_results <- data.frame(
  ticker = character(),
  statistic = numeric(),
  p.value = numeric(),
  stringsAsFactors = FALSE
)

# Loop over the residuals and run the Box test
for (i in 1:48) {
  # Extract the ith residual vector
  resid <- get(paste0("resid_opt_", i))
  
  # Run the Box test on the ith residual vector
  box_test <- Box.test(resid^2, lag = min(10,48/2), type = "Ljung-Box")
  
  # Append the p-value to the data frame
  Box_results <- rbind(Box_results, 
                             data.frame(ticker = colnames(r_all)[i], 
                                        statistic = box_test[[1]],
                                        p.value = box_test$p.value,
                                        stringsAsFactors = FALSE
                                        )
                             )
}

# Normality test ----

# Create an empty data frame to store the results
shapiro_results <- data.frame(ticker = character(), statistic = numeric(), p.value = numeric())

for (i in 1:48) {  # replace `n` with the number of objects you have
  # Perform Shapiro-Wilk test on the i-th object
  shapiro <- shapiro.test(as.vector(get(paste0("resid_opt_", i))))
  
  # Store the results in the data frame
  shapiro_results <- rbind(shapiro_results, data.frame(ticker = colnames(r_all)[i],
                                                       statistic = shapiro$statistic, 
                                                       p.value = shapiro$p.value
                                                       )
                           )
}


```




```{r, eval=F}

#===============================================================================#
# =================== Retrieving volatility component ========================= #
#===============================================================================#

# Initialising empty matrix to store the results
volatilities = matrix(nrow = nrow(r_all), ncol = ncol(r_all))

# Extracting the volatility component of every garch model
for (i in 1:ncol(r_all)) {
  # Extract the conditional standard deviation from the i-th fitted GARCH model
  volatilities[,i] <- garch_results[[i]]@fit$sigma
}

# Assign the matrix to the object volatilities
volatilities <- as.data.frame(volatilities)
names(volatilities) <- colnames(r_all)
volatilities <- cbind(date = data[,1], volatilities)

# Convert date column to a format that only shows the year
volatilities$year <- format(volatilities$date, "%Y")

# Create a list of data frames, one for each year
df_list <- split(volatilities, volatilities$year)

# Create separate data frames for each year
v_all <- as.data.frame(volatilities) %>% select(-c(date, year)) 
v_18 <- df_list$`2018` %>% select(-c(date, year))
v_19 <- df_list$`2019` %>% select(-c(date, year))
v_20 <- df_list$`2020` %>% select(-c(date, year)) 
v_21 <- df_list$`2021` %>% select(-c(date, year))
v_22 <- df_list$`2022` %>% select(-c(date, year))

# =====================================================================
# ----

```


```{r, eval=F}

#===============================================================================#
# ======================== VAR model of volatilities ========================== #
#===============================================================================#


#  AIC(n):1,   HQ(n):2,  SC(n):3,  FPE(n): 4
p = 2  

# Selecting lag length for VAR model ----
#' 1: AIC
opt_vol_lag_all = vars::VARselect(v_all, lag.max = 10, type = "const")$selection[p]
opt_vol_lag_18 = vars::VARselect(v_18, lag.max = 10, type = "const")$selection[p]
opt_vol_lag_19 = vars::VARselect(v_19, lag.max = 10, type = "const")$selection[p]
opt_vol_lag_20 = vars::VARselect(v_20, lag.max = 10, type = "const")$selection[p]
opt_vol_lag_21 = vars::VARselect(v_21, lag.max = 10, type = "const")$selection[p]
opt_vol_lag_22 = vars::VARselect(v_22, lag.max = 10, type = "const")$selection[p]


# Showcasing the criterion ----
VAR_Volatility_lags <- criterion_table(data = v_all, type = "v")

# ====================================
# Adding the volatilities to the VAR model ----
var_vol_all <- VAR(v_all, p = opt_vol_lag_all, type = "const")
var_vol_18 <- VAR(v_18, p = opt_vol_lag_18, type = "const")
var_vol_19 <- VAR(v_19, p = opt_vol_lag_19, type = "const")
var_vol_20 <- VAR(v_20, p = opt_vol_lag_20, type = "const")
var_vol_21 <- VAR(v_21, p = opt_vol_lag_21, type = "const")
var_vol_22 <- VAR(v_22, p = opt_vol_lag_22, type = "const")

# Estimation of VAR model Including HAC ----
var_vol_all_HAC = VAR_HAC(v_all, p = opt_vol_lag_all, type = "const", se_type = "HC2")
var_vol_18_HAC = VAR_HAC(v_18, p = opt_vol_lag_18, type = "const", se_type = "HC2")
var_vol_19_HAC = VAR_HAC(v_19, p = opt_vol_lag_19, type = "const", se_type = "HC2")
var_vol_20_HAC = VAR_HAC(v_20, p = opt_vol_lag_20, type = "const", se_type = "HC2")
var_vol_21_HAC = VAR_HAC(v_21, p = opt_vol_lag_21, type = "const", se_type = "HC2")
var_vol_22_HAC = VAR_HAC(v_22, p = opt_vol_lag_22, type = "const", se_type = "HC2")



# Extracting p-values from the VAR model----
var_v_pval <- VAR_signif(VAR_model = var_vol_all_HAC)

# Show only significant values as % of total observations ----
signif_VAR_Volatility <- VAR_signif(VAR_model = var_vol_all_HAC) %>%
  mutate(across(-1, ~ ifelse(. <= 0.05, ., NA))) %>%
  summarize(across(everything(), ~ sum(!is.na(.))/n()*100, .names = "{.col}")) %>%
  pivot_longer(everything(), names_to = "Regressors", values_to = "Percentage") %>% slice(-1) %>%
  arrange(desc(Percentage))

```

```{r, eval=F}

#===============================================================================#
# ===================== Diagnostics for volatility VAR ======================== #
#===============================================================================#

# Augemted Dick Fuller test on the returns ----
adf_test <- multi_ADF(series = v_all, IC="AIC")

# Serial correlation ----
serial_test_all <- multi_BG(var_vol_all, max_order = 10)

# Heteroskedasticity ----
arch_test_vol <- multi_ARCH(var_vol_all, max_order = 10)

# Normal distribution of the residuals ----
norm_test_vol <- normality.test(var_vol_all, multivariate.only = TRUE)

norm_test_tibble_vol <- tibble(
  Values = c("test statistic", "p-value"),
  JB_Test = c(norm_test_vol$jb.mul$JB$statistic, norm_test_vol$jb.mul$JB$p.value),
  kurtosis = c(norm_test_vol$jb.mul$Kurtosis$statistic, norm_test_vol$jb.mul$Kurtosis$p.value),
  skewness = c(norm_test_vol$jb.mul$Skewness$statistic, norm_test_vol$jb.mul$Skewness$p.value)
)


# Structural breaks in the residuals of the VAR model ----
stability_vol_test <- stability(var_vol_all, type = "OLS-CUSUM")

# List outside loop 
stability_plots_vol <- list()
# OLS-based CUSUM test
for (i in stability_vol_test$names){
  stability_plots[[i]] <- plot(stability_vol_test$stability[[i]], main = i)
}


```


```{r, eval=F}

#========================================================================#
# ================= Volatility Spillovers ============================== #
#========================================================================#

# Creating spillover talbes for all periods ----
spillovertb_all = spilloverDY12(var_return_all, n.ahead = 10, no.corr = F)
spillovertb_18 = spilloverDY12(var_return_18, n.ahead = 10, no.corr = F)
spillovertb_19 = spilloverDY12(var_return_19, n.ahead = 10, no.corr = F)
spillovertb_20 = spilloverDY12(var_return_20, n.ahead = 10, no.corr = F)
spillovertb_21 = spilloverDY12(var_return_21, n.ahead = 10, no.corr = F)
spillovertb_22 = spilloverDY12(var_return_22, n.ahead = 10, no.corr = F)

# Making a spillover table object (returning tibble)
spillover_table_volatility <- spillover_table(VAR_data = var_vol_all, n.ahead = 10, no.corr = F)

#========================================================================#
# ==================== Rolling Spillovers ============================== #
#========================================================================#

# loading previously computed rolling computations  from Excel sheet
rolling_spillover_data_vol <- read_excel("rolling_spillover_volatility.xlsx", sheet = "rolling_spillover")

# Plotting rolling spillover
rolling_spillover_graph_vol <- rolling_spillover_plot(data = rolling_spillover_data_vol, 
                                                      title = "Total return spillover index")

# Printing graph
rolling_spillover_graph_vol

```

```{r, eval=F}

#========================================================================#
# ================== Exploratory analysis ============================== #
#========================================================================#

# Displaying largest contributors of spillover shocks ----

spillover_table__all <- spillover_table(VAR_data = var_vol_all, n.ahead = 10, no.corr = F)
spillover_table__18 <- spillover_table(VAR_data = var_vol_18, n.ahead = 10, no.corr = F)
spillover_table__19 <- spillover_table(VAR_data = var_vol_19, n.ahead = 10, no.corr = F)
spillover_table__20 <- spillover_table(VAR_data = var_vol_20, n.ahead = 10, no.corr = F)
spillover_table__21 <- spillover_table(VAR_data = var_vol_21, n.ahead = 10, no.corr = F)
spillover_table__22 <- spillover_table(VAR_data = var_vol_22, n.ahead = 10, no.corr = F)

c_all <- tibble(
  tickers = colnames(r_all),
  c_to = as.double(unlist(spillover_table__all[nrow(spillover_table__all),][2:49]))   
) %>% arrange(desc(c_to))

c_18 <- tibble(
  tickers = colnames(r_all),
  c_to = as.double(unlist(spillover_table__18[nrow(spillover_table__18),][2:49]))   
) %>% arrange(desc(c_to))

c_19 <- tibble(
  tickers = colnames(r_all),
  c_to = as.double(unlist(spillover_table__19[nrow(spillover_table__19),][2:49]))   
) %>% arrange(desc(c_to))

c_20 <- tibble(
  tickers = colnames(r_all),
  c_to = as.double(unlist(spillover_table__20[nrow(spillover_table__20),][2:49]))   
) %>% arrange(desc(c_to))

c_21 <- tibble(
  tickers = colnames(r_all),
  c_to = as.double(unlist(spillover_table__21[nrow(spillover_table__21),][2:49]))   
) %>% arrange(desc(c_to))

c_22 <- tibble(
  tickers = colnames(r_all),
  c_to = as.double(unlist(spillover_table__22[nrow(spillover_table__22),][2:49]))   
) %>% arrange(desc(c_to))


# Create a combined tibble with all the data
combined <- bind_rows(list(
  Static = c_all,
  `2018` = c_18,
  `2019` = c_19,
  `2020` = c_20,
  `2021` = c_21,
  `2022` = c_22
), .id = "id")

# Group by ticker and id, and filter to keep only the top 3 c_to values
top <- combined %>%
  group_by(id) %>%
  top_n(3, c_to)

# Removing -USD from tickers
top$tickers <- gsub("-USD", "", top$tickers)

# plot the data
ggplot(top, aes(x = id, y = c_to, label = tickers, color = tickers)) +
  geom_point(size = 3) +
  geom_text_repel(size = 2.5, nudge_y = 2, box.padding = 0.5, point.padding = 0.5) +
  labs(x = "Period", y = "Percentage", title = "") +
  theme_classic() +
  guides(color = FALSE)


```



```{r, eval=F}

#=======================================================================#
# ================== Exploring similarities =========================== #
#=======================================================================#


# Creating a list of all the lists
c_lists <- list(c_all$tickers, c_18$tickers, c_19$tickers, c_20$tickers, c_21$tickers, c_22$tickers)

# Create an empty matrix to store the results
results_matrix <- matrix(nrow = length(c_lists), ncol = length(c_lists))

# set the column and row names to the names of the c_..$tickers lists
colnames(results_matrix) <- paste0("c_", 1:length(c_lists), ".tickers")
rownames(results_matrix) <- paste0("c_", 1:length(c_lists), ".tickers")
colnames(results_matrix) <- rownames(results_matrix) <- c("all", "18", "19", "20", "21", "22")

# iterate through all the combinations and calculate the similarity index
for (i in 1:length(c_lists)) {
  for (j in 1:length(c_lists)) {
    results_matrix[i,j] <- similarity_index(c_lists[[i]], c_lists[[j]])
  }
}

# Only show lower triangle of the matrix
results_matrix[upper.tri(results_matrix)] <- 0

# Set diagonal elements to 0
diag(results_matrix) <- 0

# Preview matrix
results_matrix


#========================================================================#
# ================== Random order simulation =========================== #
#========================================================================#

# Simulating lowest possible similarity index ----

# Generate a list of 48 letters ----
listi <-c(LETTERS, paste0(rep(LETTERS, 26), LETTERS))[1:48]

# Sample list A be such that it is a permutation of A
listj <- sample(listi)

# Find the minimum value of the similarity index for n random permutations
similarity_index_min(listi, listj, n_reps = 100000)

```