--- title: "" author: "" date: "" abstract: "" subtitle: "" output: word_document --- \pagenumbering{gobble} ```{r, echo=T, message=F, warning=F, eval = F} # Loads or install packages function ---- using <-function(...) { libs<-unlist(list(...)) req<-unlist(lapply(libs,require,character.only=TRUE)) need<-libs[req==FALSE] if(length(need)>0){ install.packages(need) lapply(need,require,character.only=TRUE) } } # Loading the libraries calling the 'using' function using("frequencyConnectedness", "lubridate", "tidyverse", "rugarch", "vars", "xlsx", "readxl", "tseries", "corrplot", "latticeExtra", "ggrepel", "moments" ) ``` \newpage ```{r, echo=T, message=F, warning=F, eval=F} # ======================================================================= # # ======================== Loading Data ================================= # # ======================================================================= # # Setting working directory ---- setwd("~WORKING DIRECTORY") # Getting market cap info mcap <- read_excel("MarketCap_250.xlsx", sheet = "Selected") # Loading the price data ---- data <- read_excel("Crypto_prices.xlsx", sheet = "prices") # ====================================================================== # # price sub-samples -- price_18 <- data %>% filter(year(Date) == 2018) price_19 <- data %>% filter(year(Date) == 2019) price_20 <- data %>% filter(year(Date) == 2020) price_21 <- data %>% filter(year(Date) == 2021) price_22 <- data %>% filter(year(Date) == 2022) # Computing returns r_all <- apply(log((cbind(data[,-1]))), 2, diff) # Computing returns sub periods # Get a list of unique years from the date column years <- unique(as.numeric(format(data$Date, "%Y"))) # Create an empty list to store the new tibble objects yearly_data_list <- list() # Loop through the years and filter the data for each year for (year in years) { year_data <- data %>% filter(as.numeric(format(Date, "%Y")) == year) yearly_data_list[[as.character(year)]] <- year_data } # Creating sub periods of the data set r_18 <- apply(log((cbind(yearly_data_list$`2018`[,-1]))), 2, diff) r_19 <- apply(log((cbind(yearly_data_list$`2019`[,-1]))), 2, diff) r_20 <- apply(log((cbind(yearly_data_list$`2020`[,-1]))), 2, diff) r_21 <- apply(log((cbind(yearly_data_list$`2021`[,-1]))), 2, diff) r_22 <- apply(log((cbind(yearly_data_list$`2022`[,-1]))), 2, diff) ``` \newpage ```{r, eval=F} # ======================================================================= # # ===================== Creating functions ============================== # # ======================================================================= # # ====================================================================== # # ================ Descriptive statistics ============================== # # ====================================================================== # descriptive_table <- function(data = r_all){ descriptive_table_all <- tibble( Mean = colMeans(data)*365, StdDev = apply(data, 2, sd)*sqrt(365), Min = apply(data, 2, min), Max = apply(data, 2, max), Kurtosis = apply(data, 2, kurtosis), Skewness = apply(data, 2, skewness) ) %>% mutate( Sharpe = round(Mean/StdDev,5) ) return(descriptive_table_all) } #========================================================================# # ---- # ====================================================================== # # ===================== Graph functions ================================ # # ====================================================================== # # Making a template for line graphs ---- line_graph <- function(data, x, y, Title="", ylab="", xlab="", isDate=FALSE, xmax = 10, xmin = 0, len = 6) { if(isDate == TRUE){ data %>% ggplot(aes(x = as.Date(x), y = y)) + geom_line(size = 0.6,color = "#45BCCA") + labs( title = Title, # subtitle = "Monthly spot price observations", x = xlab, y = ylab ) + # General theme theme_base(base_size = 14) + # Setting the theme details theme( axis.title = element_text(size = 11), title = element_text(colour = "Black", size = 12, family = "sans"), plot.subtitle = element_text(size = 10, hjust = 0.5), plot.title = element_text(hjust = 0.5), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), panel.border = element_rect(colour = "gray", fill=NA, size=0.5) ) + scale_x_date(date_labels = "%Y", date_breaks = "1 year") + scale_y_continuous(breaks = seq(xmin, xmax, len = len)) } else{ data %>% ggplot(aes(x = x, y = y)) + geom_line(size = 0.6,color = "#45BCCA") + labs( title = Title, # subtitle = "Monthly spot price observations", x = xlab, y = ylab ) + # General theme theme_base(base_size = 14) + # Setting the theme details theme( axis.title = element_text(size = 11), title = element_text(colour = "Black", size = 12, family = "sans"), plot.subtitle = element_text(size = 10, hjust = 0.5), plot.title = element_text(hjust = 0.5), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), panel.border = element_rect(colour = "gray", fill=NA, size=0.5) ) } } #========================================================================# # ---- # Correlation plot ---- corr_plot <- function(data = m_all){ corrplot(data, tl.cex = 0.55, order = "original", tl.col = "Black", type = "lower", tl.srt = 360, tl.offset = 1, cl.ratio = 0.1 ) } #========================================================================# # ---- # Function for rolling spillover plot ---- rolling_spillover_plot <- function(data = rolling_return, title = "Total return spillover index"){ # Create ggplot object rolling_plot <- ggplot(data, aes(x = as.Date(date))) + # Add line for sp_100 geom_line(aes(y = sp_100, color = "sp_100")) + # Add line for sp_200 geom_line(aes(y = sp_200, color = "sp_200")) + # Add line for sp_300 geom_line(aes(y = sp_300, color = "sp_300")) + # Add legend for line colors scale_color_manual(values = c("sp_100" = "red", "sp_200" = "blue", "sp_300" = "green"), ) + # Add axis labels and plot title xlab("") + ylab("Percentage") + ggtitle(title) + theme_classic() + theme( axis.title = element_text(size = 11), title = element_text(colour = "Black", size = 12, family = "sans"), plot.subtitle = element_text(size = 10, hjust = 0.5), plot.title = element_text(hjust = 0.5), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), panel.border = element_rect(colour = "gray", fill=NA, size=0.5), ) + scale_x_date(date_labels = "%Y", date_breaks = "1 year") + scale_y_continuous(breaks=(seq(0, 100, 5)), limits = c(85, 100)) } #========================================================================# # ---- #========================================================================# # =================== VAR with HAC errors ============================= # #========================================================================# # Modification of VAR from the vars package to take HAC ---- "VAR_HAC" <- function (y, p = 1, type = c("const", "trend", "both", "none"), season = NULL, exogen = NULL, lag.max = NULL, ic = c("AIC", "HQ", "SC", "FPE"), se_type = c("HC0", "HC1", "HC2", "HC3") ) { y <- as.matrix(y) if (any(is.na(y))) stop("\nNAs in y.\n") if (ncol(y) < 2) stop("The matrix 'y' should contain at least two variables. For univariate analysis consider ar() and arima() in package stats.\n") if (is.null(colnames(y))) { colnames(y) <- paste("y", 1:ncol(y), sep = "") warning(paste("No column names supplied in y, using:", paste(colnames(y), collapse = ", "), ", instead.\n")) } colnames(y) <- make.names(colnames(y)) y.orig <- y type <- match.arg(type) obs <- dim(y)[1] K <- dim(y)[2] if(!is.null(lag.max)){ lag.max <- abs(as.integer(lag.max)) ic <- paste(match.arg(ic), "(n)", sep = "") p <- VARselect(y, lag.max = lag.max, type = type, season = season, exogen = exogen)$selection[ic] } sample <- obs - p ylags <- embed(y, dimension = p + 1)[, -(1:K)] temp1 <- NULL for (i in 1:p) { temp <- paste(colnames(y), ".l", i, sep = "") temp1 <- c(temp1, temp) } colnames(ylags) <- temp1 yend <- y[-c(1:p), ] if (type == "const") { rhs <- cbind(ylags, rep(1, sample)) colnames(rhs) <- c(colnames(ylags), "const") } else if (type == "trend") { rhs <- cbind(ylags, seq(p + 1, length = sample)) colnames(rhs) <- c(colnames(ylags), "trend") } else if (type == "both") { rhs <- cbind(ylags, rep(1, sample), seq(p + 1, length = sample)) colnames(rhs) <- c(colnames(ylags), "const", "trend") } else if (type == "none") { rhs <- ylags colnames(rhs) <- colnames(ylags) } if (!(is.null(season))) { season <- abs(as.integer(season)) dum <- (diag(season) - 1/season)[, -season] dums <- dum while (nrow(dums) < obs) { dums <- rbind(dums, dum) } dums <- dums[1:obs, ] colnames(dums) <- paste("sd", 1:ncol(dums), sep = "") rhs <- cbind(rhs, dums[-c(1:p), ]) } if (!(is.null(exogen))) { exogen <- as.matrix(exogen) if (!identical(nrow(exogen), nrow(y))) { stop("\nDifferent row size of y and exogen.\n") } if (is.null(colnames(exogen))) { colnames(exogen) <- paste("exo", 1:ncol(exogen), sep = "") warning(paste("No column names supplied in exogen, using:", paste(colnames(exogen), collapse = ", "), ", instead.\n")) } colnames(exogen) <- make.names(colnames(exogen)) tmp <- colnames(rhs) rhs <- cbind(rhs, exogen[-c(1:p), ]) colnames(rhs) <- c(tmp, colnames(exogen)) } datamat <- as.data.frame(rhs) colnames(datamat) <- colnames(rhs) equation <- list() library(estimatr) for (i in 1:K) { y <- yend[, i] equation[[colnames(yend)[i]]] <- lm_robust(y ~ -1 + ., data = datamat, se_type = se_type, return_vcov = T) if(any(c("const", "both") %in% type)){ attr(equation[[colnames(yend)[i]]]$terms, "intercept") <- 1 } } call <- match.call() if("season" %in% names(call)) call$season <- eval(season) result <- list(varresult = equation, datamat = data.frame(cbind(yend, rhs)), y = y.orig, type = type, p = p, K = K, obs = sample, totobs = sample + p, restrictions = NULL, call = call) class(result) <- "varest" return(result) } #========================================================================# # ---- #========================================================================# # ==================== Extended spillover table ======================== # #========================================================================# # Better function for decomposition of the VAR model ---- spillover_table <- function(VAR_data = "", n.ahead = 10, no.corr = F) { spillover12 <- spilloverDY12(VAR_data, n.ahead = n.ahead, no.corr = no.corr) # Splitting the data split_data <- as_tibble(unlist(spillover12[1])) %>% group_split(gl(n()/dim(VAR_data$y)[2], dim(VAR_data$y)[2])) # converting the new data, to column new_data <- bind_cols(split_data) %>% select(starts_with("value")) # Renaming the columns colnames(new_data) <- colnames(VAR_data$y) # Creating spillover table spillover_table = new_data[] %>% add_column(tickers = colnames(VAR_data$y), .before = 1) %>% mutate_if(is.numeric,funs(.* 100)) %>% mutate_if(is.numeric,funs(round(.,2))) # Adding Contribution to others, including to it self and others spillover_table_2 <- rbind(rbind(spillover_table, c("Contribution to self and others", colSums(spillover_table[,-1]))), c("Contribution to others", colSums(spillover_table[,-1] %>% replace(., col(.) == row(.), 0)))) # List of contribution to others ---- c_to = pivot_longer(spillover_table_2[nrow(spillover_table_2),], cols = everything(), names_to = "tickers", values_to = "contribution to others") %>% slice(-1) %>% mutate( `contribution to others` = as.numeric(`contribution to others`) ) # list of contrebution from others ---- c_from = tibble( tickers = names(VAR_data$varresult), `contribution from others` = rowSums(spillover_table[,-1] %>% replace(., col(.) == row(.), 0)) ) # Complete table complete_spillover_table = tibble( spillover_table_2, c_from = unlist(list(c(c_from$`contribution from others`, NA, NA))) ) return(complete_spillover_table) } #========================================================================# # ---- #========================================================================# # ========= ARMA-GARCH information criterion loop ====================== # #========================================================================# # Findig optimal orders for GARCH-type model ---- opt_orders <- function(data = data, max_order = 3, type = "eGARCH"){ data = r_all # Defining the range of possible orders for arma and garch arma_order = c(max_order,max_order) garch_order = c(max_order,max_order) # Creating a list of all the possible combinations orderList <- tibble( expand.grid(0:arma_order[1], 0:arma_order[2], 0:garch_order[1], 1:garch_order[2]) ) %>% unite("armaOrder", Var1, Var2, sep = ",") %>% unite("garchOrder", Var3, Var4,sep = ",") %>% mutate( armaOrder = paste("c(", armaOrder, ")", sep= ""), garchOrder = paste("c(", garchOrder, ")", sep= ""), ) # data frame to store results for all time series all_results <- data.frame() # loop through each return series for (i in 1:ncol(data)) { # data frame to store results for current time series results <- data.frame() # loop through all combinations of garch and arma order for (j in 1:nrow(orderList)) { # get garch and arma order for current combination go <- as.character(orderList$garchOrder[j]) ao <- as.character(orderList$armaOrder[j]) # create ugarchspec model for current combination fit.spec <- ugarchspec(variance.model = list(model = type, garchOrder = eval(parse(text=go))), mean.model = list(armaOrder = eval(parse(text=ao)), include.mean = TRUE), distribution.model = "std" ) # fit the model to current time series garchfit <- ugarchfit(data = data[,i], spec = fit.spec, solver = "hybrid") # retrieve the information criteria info <- infocriteria(garchfit)[1] # store results for current combination in results data frame results <- rbind(results, data.frame(ticker = colnames(data)[i], go = go, ao = ao, info = info)) } # find the order that produces the lowest information criterion min_results <- results %>% filter(info == min(results$info)) # append the minimum results to all_results data frame all_results <- rbind(all_results, min_results) } return(all_results) } #========================================================================# # ---- #========================================================================# # ===================== Multi ARCH test ================================ # #========================================================================# # ARCH-test for multiple variabels ---- multi_ARCH <- function(VAR_model = var_return_all, max_order = 10){ arch_results <- data.frame(lags = numeric(), test_stat = numeric(), p_value = numeric()) # for loop to test for ARCH effects up to 10 lags for (i in 1:max_order) { # run arch test arch_test <- arch.test(VAR_model, lags.multi = i, multivariate.only = TRUE) # extract relevant information from arch test result lags <- i test_stat <- arch_test$arch.mul$statistic p_value <- arch_test$arch.mul$p.value # add results to data frame arch_results <- rbind(arch_results, data.frame(lags = lags, test_stat = test_stat, p_value = p_value)) } return(arch_results) } #=======================================================================# # ---- #========================================================================# # =================== Multi serial test ================================ # #========================================================================# # Brush-Godfrey test for mulitple variables ---- multi_BG <- function(VAR_model = var_return_all, max_order = 10){ BG_results <- data.frame(lags = numeric(), test_stat = numeric(), p_value = numeric()) # for loop to test for ARCH effects up to 10 lags for (i in 1:max_order) { # run arch test serial_test <- serial.test(VAR_model, lags.bg = i, type = "BG") # extract relevant information from arch test result lags <- i test_stat <- serial_test$serial$statistic p_value <- serial_test$serial$p.value # add results to data frame BG_results <- rbind(BG_results, data.frame(lags = lags, test_stat = test_stat, p_value = p_value)) } return(BG_results) } #=======================================================================# # ---- #=======================================================================# # ===================== Multi ADF test ================================ # #=======================================================================# # ADF for multiple variables ---- multi_ADF <-function(series = r_all, IC = c("AIC", "HQ")){ # Creating a list outside loop that will contain optimal lags results_list <- list() if (IC == "AIC") { for (i in 1:ncol(series)) { ts <- series[,i] optimal_lag_AIC <- VARselect(ts)$selection[[1]] optimal_lag_HQ <- VARselect(ts)$selection[[2]] adf_test <- adf.test(ts, k=opt_lag_all) results_list[[i]] <- data.frame( asset_name = colnames(series)[i], AIC_lag = optimal_lag_AIC, p_value = adf_test$p.value, test_statistic = adf_test$statistic ) } results_tibble <- bind_rows(results_list) # results_tibble return(results_tibble) } else{ for (i in 1:ncol(series)) { ts <- series[,i] optimal_lag_AIC <- VARselect(ts)$selection[[1]] optimal_lag_HQ <- VARselect(ts)$selection[[2]] adf_test <- adf.test(ts, k=opt_lag_all) results_list[[i]] <- data.frame( asset_name = colnames(series)[i], HQ_lag = optimal_lag_HQ, p_value = adf_test$p.value, test_statistic = adf_test$statistic ) } results_tibble <- bind_rows(results_list) # results_tibble return(results_tibble) } # Augmented Dicky-Fuller: Unit root test with optmal lag order ---- } #========================================================================# # ---- #========================================================================# # ====================== Criteria table ================================ # #========================================================================# # Displaying optimal lags given different types of information criterion ---- criterion_table <- function(data = r_all, type = c("r", "v")){ r_criterion <- data.frame(criterion = c("AIC", "HQ", "SC", "FPE"), lag_all = integer(4), lag_18 = integer(4), lag_19 = integer(4), lag_20 = integer(4), lag_21 = integer(4), lag_22 = integer(4)) # iterate over criteria for (i in 1:4) { # get lag_all for criterion i lag_all <- vars::VARselect(data, lag.max = 10, type = "const")$selection[[i]] # iterate over years for (year in c("18", "19", "20", "21", "22")) { # get lag for criterion i and year lag <- vars::VARselect(get(paste0(type, "_", year)), lag.max = 10, type = "const")$selection[[i]] # fill in data.frame r_criterion[i, paste0("lag_", year)] <- lag } # fill in lag_all column r_criterion[i, "lag_all"] <- lag_all } return(r_criterion) } #========================================================================# # ---- #========================================================================# # ============== Display significant values of VAR ===================== # #========================================================================# # Extracting p-values from the VAR model---- VAR_signif <- function(VAR_model = var_return_all){ coef <- as_tibble(coef(VAR_model)) p_values <- as_tibble(lapply(coef, "[", , "Pr(>|t|)")) p_values <- p_values %>% mutate(colnames(VAR_model$datamat)[49:(length(VAR_model$datamat))], .before = 1) p_values <- p_values %>% rename("Regressor" = colnames(p_values[,1])) return(p_values) } # =======================================================================# # ---- #========================================================================# # ==================== Rolling Spillovers ============================== # #========================================================================# # Function for making a tibble of rolling spillvoers ---- rolling_spillovers <- function(r_series = r_all){ opt_lag_all = vars::VARselect(r_series, lag.max = 10, type = "const")$selection[2] # Generating rolling data ---- time_all <- as.character(as.Date(data[-1,]$Date, "%Y,%M,%D", tz = "UTC")) # Add the dates as an index to the return series rownames(r_series) <- time_all # set the estimate parameters for the rolling window params_est = list(p = opt_lag_all, type = "const") # Get the rolling window estimates sp_return_300 <- spilloverRollingDY12(r_series, n.ahead = 10, no.corr = F, "VAR", params_est = params_est, window = 300) sp_return_200 <- spilloverRollingDY12(r_series, n.ahead = 10, no.corr = F, "VAR", params_est = params_est, window = 200) sp_return_100 <- spilloverRollingDY12(r_series, n.ahead = 10, no.corr = F, "VAR", params_est = params_est, window = 100) sp_100 <- data.frame( date = data$Date[(100+1):length(data$Date)], sp_100 = overall(sp_return_100) ) names(sp_100)[2] <- "sp_100" sp_200 <- data.frame( date = data$Date[(200+1):length(data$Date)], sp_200 = overall(sp_return_200) ) names(sp_200)[2] <- "sp_200" sp_300 <- data.frame( date = data$Date[(300+1):length(data$Date)], sp_300 = overall(sp_return_300) ) names(sp_300)[2] <- "sp_300" # Combine the data frames using bind_rows() combined_data <- bind_rows(sp_100, sp_200, sp_300) # Group by date and calculate sum of numbers for each date result <- combined_data %>% group_by(date) %>% summarize(sp_100 = sum(sp_100, na.rm = TRUE), sp_200 = sum(sp_200, na.rm = TRUE), sp_300 = sum(sp_300, na.rm = TRUE)) %>% # Replace 0 values with NA mutate_all(~ replace(., . == 0, NA)) return(result) } #========================================================================# # ---- #========================================================================# # ================== Similarity index ================================== # #========================================================================# # Test similarity index ---- similarity_index <- function(A, B) { n <- length(A) ranks_A <- sapply(A, function(x) match(x, B)) ranks_B <- 1:n distances <- abs(ranks_A - ranks_B) similarities <- 1 - distances / pmax(n - ranks_B, ranks_B - 1) mean(similarities) } # Function for repeated minimum return similarity index similarity_index_min <- function(listA, listB, n_reps) { min_sim_index <- Inf for (i in 1:n_reps) { listB <- sample(listA) sim_index <- similarity_index(listA, listB) if (sim_index < min_sim_index) { min_sim_index <- sim_index } } return(min_sim_index) } #========================================================================# # ---- ``` ```{r, eval=F} # ====================================================================== # # ================ Descriptive statistics ============================== # # ====================================================================== # descriptives <- descriptive_table(r_all) # Statistics quantile table for full period ---- quantile_full <- tibble( Quantile = c("25%", "50%", "75%"), Mean = round(quantile(descriptives$Mean, c(0.25,0.5,0.75)),2), StdDev = round(quantile(descriptives$StdDev, c(0.25,0.5,0.75)),2), Min = round(quantile(descriptives$Min, c(0.25,0.5,0.75)),2), Max = round(quantile(descriptives$Max, c(0.25,0.5,0.75)),2), Kurtosis = round(quantile(descriptives$Kurtosis, c(0.25,0.5,0.75)),2), Skewness = round(quantile(descriptives$Skewness, c(0.25,0.5,0.75)),2), Sharpe = round(quantile(descriptives$Sharpe, c(0.25,0.5,0.75)),2) ) # Correlations ---- m_all = cor(r_all, method = "spearman") # Print plot corr_plot(m_all) # The equally weighted index ---- # Create a date object outside loop EW_index <- data.frame(date = data[,1]) # Iterate and make Equally weighted index for (i in 1:nrow(data[,-1])) { indx <- sum(data[i,-1])/ncol(data[,-1]) EW_index[i,2] <- indx } # Get natural log returns EW_index_return <- EW_index %>% mutate(across(2:ncol(.), ~ (log(.x / lag(.x))))) %>% slice(-1) # Combine date, returns, and index data <- data.frame( Year = EW_index[,1][1:(length(EW_index$V2)-1)], Volatility = EW_index_return$V2, Index = EW_index$V2[1:(length(EW_index$V2)-1)] ) # construct separate plots for each series obj1 <- xyplot(Volatility ~ Year, data, type = "l" , lwd=1.5, col="#42D0E1") obj2 <- xyplot(Index ~ Year, data, type = "l", lwd=1.5, col="#343232") # Make the plot with second y axis: EW_plot <- doubleYScale(obj1, obj2, add.ylab2 = TRUE) ``` ```{r, eval=F} #========================================================================# # =================== VAR model Returns ================================ # #========================================================================# # IC criteria for VAR models ---- p = 2 # AIC(n):1, HQ(n):2, SC(n):3, FPE(n): 4 # Selecting optimal lag order for the VAR model, given "p" ---- opt_lag_all = vars::VARselect(r_all, lag.max = 10, type = "const")$selection[p] opt_lag_18 = vars::VARselect(r_18, lag.max = 10, type = "const")$selection[p] opt_lag_19 = vars::VARselect(r_19, lag.max = 10, type = "const")$selection[p] opt_lag_20 = vars::VARselect(r_20, lag.max = 10, type = "const")$selection[p] opt_lag_21 = vars::VARselect(r_21, lag.max = 10, type = "const")$selection[p] opt_lag_22 = vars::VARselect(r_22, lag.max = 10, type = "const")$selection[p] # Returning optimal lag orders for all samples ---- VAR_Return_lags <- criterion_table(data = r_all, type = "r") # Estimation of VAR model fro return series ---- var_return_all = vars::VAR(r_all, p = opt_lag_all, type = "const") var_return_18 = vars::VAR(r_18, p = opt_lag_18, type = "const") var_return_19 = vars::VAR(r_19, p = opt_lag_19, type = "const") var_return_20 = vars::VAR(r_20, p = opt_lag_20, type = "const") var_return_21 = vars::VAR(r_21, p = opt_lag_21, type = "const") var_return_22 = vars::VAR(r_22, p = opt_lag_22, type = "const") # Estimation of VAR model Including HAC ---- var_return_all_HAC = VAR_HAC(r_all, p = opt_lag_all, type = "const", se_type = "HC2") var_return_18_HAC = VAR_HAC(r_18, p = opt_lag_18, type = "const", se_type = "HC2") var_return_19_HAC = VAR_HAC(r_19, p = opt_lag_19, type = "const", se_type = "HC2") var_return_20_HAC = VAR_HAC(r_20, p = opt_lag_20, type = "const", se_type = "HC2") var_return_21_HAC = VAR_HAC(r_21, p = opt_lag_21, type = "const", se_type = "HC2") var_return_22_HAC = VAR_HAC(r_22, p = opt_lag_22, type = "const", se_type = "HC2") # Extracting p-values from the VAR model---- var_pvals <- VAR_signif(VAR_model = var_return_all_HAC) # Show only significant values as % of total observations ---- signif_VAR_return <- VAR_signif(VAR_model = var_return_all_HAC) %>% mutate(across(-1, ~ ifelse(. <= 0.05, ., NA))) %>% summarize(across(everything(), ~ sum(!is.na(.))/n()*100, .names = "{.col}")) %>% pivot_longer(everything(), names_to = "Regressors", values_to = "Percentage") %>% slice(-1) %>% arrange(desc(Percentage)) # ====================================================================== # # ================== Diagnostics for return ============================ # # ====================================================================== # # Augemted Dick Fuller test on the returns ---- adf_test <- multi_ADF(series = r_all, IC="AIC") # Serial correlation ---- serial_test_all <- multi_BG(var_return_all, max_order = 10) # Heteroskedasticity ---- arch_test <- multi_ARCH(var_return_all, max_order = 10) # Normal distribution of the residuals ---- norm_test_all <- normality.test(var_return_all, multivariate.only = TRUE) norm_test_tibble <- tibble( Values = c("test statistic", "p-value"), JB_Test = c(norm_test_all$jb.mul$JB$statistic, norm_test_all$jb.mul$JB$p.value), kurtosis = c(norm_test_all$jb.mul$Kurtosis$statistic, norm_test_all$jb.mul$Kurtosis$p.value), skewness = c(norm_test_all$jb.mul$Skewness$statistic, norm_test_all$jb.mul$Skewness$p.value) ) # Testing for structural breaks in the residuals of the VAR model ---- stability_test <- stability(var_return_all, type = "OLS-CUSUM") # List outside loop stability_plots <- list() # OLS-based CUSUM test for (i in stability_test$names){ stability_plots[[i]] <- plot(stability_test$stability[[i]], main = i) } ``` ```{r, eval=F} #========================================================================# # ===================== Return Spillovers ============================== # #========================================================================# # Creating spillover talbes for all periods ---- spillovertb_all = spilloverDY12(var_return_all, n.ahead = 10, no.corr = F) spillovertb_18 = spilloverDY12(var_return_18, n.ahead = 10, no.corr = F) spillovertb_19 = spilloverDY12(var_return_19, n.ahead = 10, no.corr = F) spillovertb_20 = spilloverDY12(var_return_20, n.ahead = 10, no.corr = F) spillovertb_21 = spilloverDY12(var_return_21, n.ahead = 10, no.corr = F) spillovertb_22 = spilloverDY12(var_return_22, n.ahead = 10, no.corr = F) # Making a spillover table object (returning tibble) spillover_table_return <- spillover_table(VAR_data = var_return_all, n.ahead = 10, no.corr = F) spillover_index <- round(sum(spillover_table_return[1:48,50])/48,2) #========================================================================# # ==================== Rolling Spillovers ============================== # #========================================================================# # Getting rolling spillover data for 100, 200, and 300 rolling days ---- # rolling_return <- rolling_spillovers(r_series = r_all) # Retrieving pre-saved rollover data rolling_return <- read_excel("rolling_spillover_return.xlsx", sheet = "rolling_spillover") # Plotting rolling spillover rolling_spillover_graph <- rolling_spillover_plot(data = rolling_return, title = "Total return spillover index") # Printing graph rolling_spillover_graph ``` ```{r, eval=F} #========================================================================# # ====================== Marginal model ================================ # #========================================================================# # # Optimal orders for all GARCH-type models. # gna_order <- opt_orders(data = r_all, max_order = 1, type = "eGARCH") # Loading pre-estimated optimal orders ---- gna_order <- read_excel("InfoCriteria_results.xlsx", sheet = "AIC OR HQC") # Initializing a list to store the GARCH model results (Outside loop) garch_results <- list() for (i in 1:nrow(gna_order)) { # Extract the garchOrder and armaOrder values and convert them to numeric vectors garchOrder_vec <- as.numeric(strsplit(gsub("c\\(|\\)", "", gna_order$garchOrder[i]), ",")[[1]]) armaOrder_vec <- as.numeric(strsplit(gsub("c\\(|\\)", "", gna_order$armaOrder[i]), ",")[[1]]) # Create a row_spec object for the current row of gna_order row_spec <- ugarchspec(variance.model = list(model = "eGARCH", garchOrder = garchOrder_vec), mean.model = list(armaOrder = armaOrder_vec, include.mean = TRUE), distribution.model = "std") # Fit the GARCH model for the i-th column garch_results[[i]] <- ugarchfit(data = r_all[,i], spec = row_spec) } # Assign the results to garch_results_i for (i in 1:ncol(r_all)) { assign(paste0("garch_results_", i), garch_results[[i]]) } # Returning P-values or Estimates for GARCH model ---- # function to generate tibble for each garch result generate_tibble <- function(result) { coef_names <- names(result@fit$coef) Estimats <- round(as.tibble(result@fit$robust.matcoef)[, 1],4) p_values <- round(as.tibble(result@fit$robust.matcoef)[, 4],4) tibble(coef_names, Estimats, p_values) } # generate a list of tibbles using the generate_tibble function tibble_list <- map(garch_results, generate_tibble) # Create a function that takes a tibble and returns it grouped by `coef_names` group_by_coef_names <- function(tib, select = "Estimate") { tib %>% group_by(coef_names) %>% summarise_at(vars(contains(select)), mean, na.rm = TRUE) } # Use purrr::reduce to apply the function to all tibbles in the list, combining them into one tibble combined_tibble <- purrr::reduce(tibble_list, full_join, by = "coef_names") %>% group_by_coef_names() %>% rename_with(~gsub(".USD", "", colnames(r_all)), -coef_names) ``` ```{r, eval=F} #========================================================================# # ================= Marginal model diagnostics ========================= # #========================================================================# # Loop over the garch_results_i objects and extract the residuals for (i in 1:48) { # Extract the ith garch_results_ object fit_opt <- get(paste0("garch_results_", i)) # Extract the residuals from the ith ugarchfit object resid_opt <- residuals(fit_opt, standardize = TRUE) # Store the residuals in a separate object named resid_i assign(paste0("resid_opt_", i), resid_opt) } # Autocorrelation (Ljung-Box Tests) ---- # Create an empty data frame to store the p-values Box_results <- data.frame( ticker = character(), statistic = numeric(), p.value = numeric(), stringsAsFactors = FALSE ) # Loop over the residuals and run the Box test for (i in 1:48) { # Extract the ith residual vector resid <- get(paste0("resid_opt_", i)) # Run the Box test on the ith residual vector box_test <- Box.test(resid^2, lag = min(10,48/2), type = "Ljung-Box") # Append the p-value to the data frame Box_results <- rbind(Box_results, data.frame(ticker = colnames(r_all)[i], statistic = box_test[[1]], p.value = box_test$p.value, stringsAsFactors = FALSE ) ) } # Normality test ---- # Create an empty data frame to store the results shapiro_results <- data.frame(ticker = character(), statistic = numeric(), p.value = numeric()) for (i in 1:48) { # replace `n` with the number of objects you have # Perform Shapiro-Wilk test on the i-th object shapiro <- shapiro.test(as.vector(get(paste0("resid_opt_", i)))) # Store the results in the data frame shapiro_results <- rbind(shapiro_results, data.frame(ticker = colnames(r_all)[i], statistic = shapiro$statistic, p.value = shapiro$p.value ) ) } ``` ```{r, eval=F} #===============================================================================# # =================== Retrieving volatility component ========================= # #===============================================================================# # Initialising empty matrix to store the results volatilities = matrix(nrow = nrow(r_all), ncol = ncol(r_all)) # Extracting the volatility component of every garch model for (i in 1:ncol(r_all)) { # Extract the conditional standard deviation from the i-th fitted GARCH model volatilities[,i] <- garch_results[[i]]@fit$sigma } # Assign the matrix to the object volatilities volatilities <- as.data.frame(volatilities) names(volatilities) <- colnames(r_all) volatilities <- cbind(date = data[,1], volatilities) # Convert date column to a format that only shows the year volatilities$year <- format(volatilities$date, "%Y") # Create a list of data frames, one for each year df_list <- split(volatilities, volatilities$year) # Create separate data frames for each year v_all <- as.data.frame(volatilities) %>% select(-c(date, year)) v_18 <- df_list$`2018` %>% select(-c(date, year)) v_19 <- df_list$`2019` %>% select(-c(date, year)) v_20 <- df_list$`2020` %>% select(-c(date, year)) v_21 <- df_list$`2021` %>% select(-c(date, year)) v_22 <- df_list$`2022` %>% select(-c(date, year)) # ===================================================================== # ---- ``` ```{r, eval=F} #===============================================================================# # ======================== VAR model of volatilities ========================== # #===============================================================================# # AIC(n):1, HQ(n):2, SC(n):3, FPE(n): 4 p = 2 # Selecting lag length for VAR model ---- #' 1: AIC opt_vol_lag_all = vars::VARselect(v_all, lag.max = 10, type = "const")$selection[p] opt_vol_lag_18 = vars::VARselect(v_18, lag.max = 10, type = "const")$selection[p] opt_vol_lag_19 = vars::VARselect(v_19, lag.max = 10, type = "const")$selection[p] opt_vol_lag_20 = vars::VARselect(v_20, lag.max = 10, type = "const")$selection[p] opt_vol_lag_21 = vars::VARselect(v_21, lag.max = 10, type = "const")$selection[p] opt_vol_lag_22 = vars::VARselect(v_22, lag.max = 10, type = "const")$selection[p] # Showcasing the criterion ---- VAR_Volatility_lags <- criterion_table(data = v_all, type = "v") # ==================================== # Adding the volatilities to the VAR model ---- var_vol_all <- VAR(v_all, p = opt_vol_lag_all, type = "const") var_vol_18 <- VAR(v_18, p = opt_vol_lag_18, type = "const") var_vol_19 <- VAR(v_19, p = opt_vol_lag_19, type = "const") var_vol_20 <- VAR(v_20, p = opt_vol_lag_20, type = "const") var_vol_21 <- VAR(v_21, p = opt_vol_lag_21, type = "const") var_vol_22 <- VAR(v_22, p = opt_vol_lag_22, type = "const") # Estimation of VAR model Including HAC ---- var_vol_all_HAC = VAR_HAC(v_all, p = opt_vol_lag_all, type = "const", se_type = "HC2") var_vol_18_HAC = VAR_HAC(v_18, p = opt_vol_lag_18, type = "const", se_type = "HC2") var_vol_19_HAC = VAR_HAC(v_19, p = opt_vol_lag_19, type = "const", se_type = "HC2") var_vol_20_HAC = VAR_HAC(v_20, p = opt_vol_lag_20, type = "const", se_type = "HC2") var_vol_21_HAC = VAR_HAC(v_21, p = opt_vol_lag_21, type = "const", se_type = "HC2") var_vol_22_HAC = VAR_HAC(v_22, p = opt_vol_lag_22, type = "const", se_type = "HC2") # Extracting p-values from the VAR model---- var_v_pval <- VAR_signif(VAR_model = var_vol_all_HAC) # Show only significant values as % of total observations ---- signif_VAR_Volatility <- VAR_signif(VAR_model = var_vol_all_HAC) %>% mutate(across(-1, ~ ifelse(. <= 0.05, ., NA))) %>% summarize(across(everything(), ~ sum(!is.na(.))/n()*100, .names = "{.col}")) %>% pivot_longer(everything(), names_to = "Regressors", values_to = "Percentage") %>% slice(-1) %>% arrange(desc(Percentage)) ``` ```{r, eval=F} #===============================================================================# # ===================== Diagnostics for volatility VAR ======================== # #===============================================================================# # Augemted Dick Fuller test on the returns ---- adf_test <- multi_ADF(series = v_all, IC="AIC") # Serial correlation ---- serial_test_all <- multi_BG(var_vol_all, max_order = 10) # Heteroskedasticity ---- arch_test_vol <- multi_ARCH(var_vol_all, max_order = 10) # Normal distribution of the residuals ---- norm_test_vol <- normality.test(var_vol_all, multivariate.only = TRUE) norm_test_tibble_vol <- tibble( Values = c("test statistic", "p-value"), JB_Test = c(norm_test_vol$jb.mul$JB$statistic, norm_test_vol$jb.mul$JB$p.value), kurtosis = c(norm_test_vol$jb.mul$Kurtosis$statistic, norm_test_vol$jb.mul$Kurtosis$p.value), skewness = c(norm_test_vol$jb.mul$Skewness$statistic, norm_test_vol$jb.mul$Skewness$p.value) ) # Structural breaks in the residuals of the VAR model ---- stability_vol_test <- stability(var_vol_all, type = "OLS-CUSUM") # List outside loop stability_plots_vol <- list() # OLS-based CUSUM test for (i in stability_vol_test$names){ stability_plots[[i]] <- plot(stability_vol_test$stability[[i]], main = i) } ``` ```{r, eval=F} #========================================================================# # ================= Volatility Spillovers ============================== # #========================================================================# # Creating spillover talbes for all periods ---- spillovertb_all = spilloverDY12(var_return_all, n.ahead = 10, no.corr = F) spillovertb_18 = spilloverDY12(var_return_18, n.ahead = 10, no.corr = F) spillovertb_19 = spilloverDY12(var_return_19, n.ahead = 10, no.corr = F) spillovertb_20 = spilloverDY12(var_return_20, n.ahead = 10, no.corr = F) spillovertb_21 = spilloverDY12(var_return_21, n.ahead = 10, no.corr = F) spillovertb_22 = spilloverDY12(var_return_22, n.ahead = 10, no.corr = F) # Making a spillover table object (returning tibble) spillover_table_volatility <- spillover_table(VAR_data = var_vol_all, n.ahead = 10, no.corr = F) #========================================================================# # ==================== Rolling Spillovers ============================== # #========================================================================# # loading previously computed rolling computations from Excel sheet rolling_spillover_data_vol <- read_excel("rolling_spillover_volatility.xlsx", sheet = "rolling_spillover") # Plotting rolling spillover rolling_spillover_graph_vol <- rolling_spillover_plot(data = rolling_spillover_data_vol, title = "Total return spillover index") # Printing graph rolling_spillover_graph_vol ``` ```{r, eval=F} #========================================================================# # ================== Exploratory analysis ============================== # #========================================================================# # Displaying largest contributors of spillover shocks ---- spillover_table__all <- spillover_table(VAR_data = var_vol_all, n.ahead = 10, no.corr = F) spillover_table__18 <- spillover_table(VAR_data = var_vol_18, n.ahead = 10, no.corr = F) spillover_table__19 <- spillover_table(VAR_data = var_vol_19, n.ahead = 10, no.corr = F) spillover_table__20 <- spillover_table(VAR_data = var_vol_20, n.ahead = 10, no.corr = F) spillover_table__21 <- spillover_table(VAR_data = var_vol_21, n.ahead = 10, no.corr = F) spillover_table__22 <- spillover_table(VAR_data = var_vol_22, n.ahead = 10, no.corr = F) c_all <- tibble( tickers = colnames(r_all), c_to = as.double(unlist(spillover_table__all[nrow(spillover_table__all),][2:49])) ) %>% arrange(desc(c_to)) c_18 <- tibble( tickers = colnames(r_all), c_to = as.double(unlist(spillover_table__18[nrow(spillover_table__18),][2:49])) ) %>% arrange(desc(c_to)) c_19 <- tibble( tickers = colnames(r_all), c_to = as.double(unlist(spillover_table__19[nrow(spillover_table__19),][2:49])) ) %>% arrange(desc(c_to)) c_20 <- tibble( tickers = colnames(r_all), c_to = as.double(unlist(spillover_table__20[nrow(spillover_table__20),][2:49])) ) %>% arrange(desc(c_to)) c_21 <- tibble( tickers = colnames(r_all), c_to = as.double(unlist(spillover_table__21[nrow(spillover_table__21),][2:49])) ) %>% arrange(desc(c_to)) c_22 <- tibble( tickers = colnames(r_all), c_to = as.double(unlist(spillover_table__22[nrow(spillover_table__22),][2:49])) ) %>% arrange(desc(c_to)) # Create a combined tibble with all the data combined <- bind_rows(list( Static = c_all, `2018` = c_18, `2019` = c_19, `2020` = c_20, `2021` = c_21, `2022` = c_22 ), .id = "id") # Group by ticker and id, and filter to keep only the top 3 c_to values top <- combined %>% group_by(id) %>% top_n(3, c_to) # Removing -USD from tickers top$tickers <- gsub("-USD", "", top$tickers) # plot the data ggplot(top, aes(x = id, y = c_to, label = tickers, color = tickers)) + geom_point(size = 3) + geom_text_repel(size = 2.5, nudge_y = 2, box.padding = 0.5, point.padding = 0.5) + labs(x = "Period", y = "Percentage", title = "") + theme_classic() + guides(color = FALSE) ``` ```{r, eval=F} #=======================================================================# # ================== Exploring similarities =========================== # #=======================================================================# # Creating a list of all the lists c_lists <- list(c_all$tickers, c_18$tickers, c_19$tickers, c_20$tickers, c_21$tickers, c_22$tickers) # Create an empty matrix to store the results results_matrix <- matrix(nrow = length(c_lists), ncol = length(c_lists)) # set the column and row names to the names of the c_..$tickers lists colnames(results_matrix) <- paste0("c_", 1:length(c_lists), ".tickers") rownames(results_matrix) <- paste0("c_", 1:length(c_lists), ".tickers") colnames(results_matrix) <- rownames(results_matrix) <- c("all", "18", "19", "20", "21", "22") # iterate through all the combinations and calculate the similarity index for (i in 1:length(c_lists)) { for (j in 1:length(c_lists)) { results_matrix[i,j] <- similarity_index(c_lists[[i]], c_lists[[j]]) } } # Only show lower triangle of the matrix results_matrix[upper.tri(results_matrix)] <- 0 # Set diagonal elements to 0 diag(results_matrix) <- 0 # Preview matrix results_matrix #========================================================================# # ================== Random order simulation =========================== # #========================================================================# # Simulating lowest possible similarity index ---- # Generate a list of 48 letters ---- listi <-c(LETTERS, paste0(rep(LETTERS, 26), LETTERS))[1:48] # Sample list A be such that it is a permutation of A listj <- sample(listi) # Find the minimum value of the similarity index for n random permutations similarity_index_min(listi, listj, n_reps = 100000) ```