---------------------------------------------- # An Algorithm to get # # 'the most linear independent' Regressors # ---------------------------------------------- # load DAX and Component data: daxwithcomp = read.table("C:/Users/Admin/desktop/DAXwithComp.txt",header=TRUE,sep=";") head(daxwithcomp) tail(daxwithcomp) names(daxwithcomp) # let's take the last 4 years of data, 2011 - 2014: daxwithcomp[1558,] daxwithcomp[1559,] daxwithcomp = daxwithcomp[-(1:1558),] head(daxwithcomp) tail(daxwithcomp) dim(daxwithcomp) # remove all rows with NA's: daxwithcomp = na.omit(daxwithcomp) dim(daxwithcomp) dax = daxwithcomp[,1] uls = daxwithcomp[,2:31] # `uls' for `underlyings' str(uls) # uls is dataframe res = lm(dax ~ uls) # doesn't work # let's change uls from dataframe to matrix: uls = as.matrix(uls) res = lm(dax ~ uls) # this works summary(res) plot(dax,type="l") lines(res$fit,col=2) # Regression without intercept: res = lm(dax ~ -1 + uls) summary(res) plot(dax,type="l") lines(res$fit,col=2) # we want to implement the following algorithm: # we calculate the quantity # # p_j := ||P_X(-j) x_j||^2 / ||x_j||^2 # # for all valid underlyings (30 at the beginning) # and remove the underlying j for which p_j is # maximal. # let us try to encode the valid underlying by a # boolean vector of length 30: # for example, validuls = rep(FALSE,30) validuls[3] = TRUE validuls[7] = TRUE validuls # should mean, that we only want to use underlying 3 # and underlying 7 for the dax-regression, how is this # done technically? Actually very easy: res = lm(dax ~ uls[,validuls]) res # or without intercept: res = lm(dax ~ -1 + uls[,validuls]) res #------------------------------------------------ # start algorithm: # validuls = rep(TRUE,30) namesuls = colnames(uls) namesuls pj = rep(0,30) for(iter in 1:29) { pj = rep(0,30) for(i in 1:30) { # use only the remaining valid underlyings: if(validuls[i]==TRUE) { y = uls[,i] # remove this y from the others: uls_ohne_i = validuls uls_ohne_i[i] = FALSE X = uls[,uls_ohne_i] #res = lm(y ~ X) res = lm(y ~ -1 + X) pj[i] = sum(res$fit^2)/sum(y^2) } } imax = which.max(pj) validuls[imax] = FALSE print(pj) print( paste("underlying",namesuls[imax],"has been removed.") ) #res = lm( dax ~ uls[,validuls] ) res = lm( dax ~ -1 + uls[,validuls] ) par(mfrow=c(1,2)) info = paste("after",iter,"iterations") plot(dax,type="l",main=info) lines(res$fit,col=2) plot(dax-res$fit,type="l",main=info) readline("press enter to show regression summary..") print(summary(res)) readline("press enter to remove next underlying..") } # # end algorithm #------------------------------------------------ # let's put this into a function: GetLinIndUls = function( uls , n_uls , useintercept ) { n = length(uls[1,]) validuls = rep(TRUE,n) namesuls = colnames(uls) names(validuls) = namesuls for(iter in 1:(n-n_uls)) { pj = rep(0,n) for(i in 1:n) { if( validuls[i]==TRUE ) { y = uls[,i] uls_ohne_i = validuls uls_ohne_i[i] = FALSE X = uls[,uls_ohne_i] if(useintercept == FALSE) { res = lm(y ~ -1 + X) } else { res = lm(y ~ X) } pj[i] = sum(res$fit^2)/sum(y^2) } } imax = which.max(pj) validuls[imax] = FALSE } return(validuls) } # For some day d we look 60 days in the past and # apply the algorithm above to determine the 10 # most linear independent underlyings. Then we move # on week by week to see how this set of underlyings # is changing: dim(uls) validulstotal = rep(0,30) for( d in seq(from=60,to=950,by = 5) ) { validuls = GetLinIndUls(uls[(d-59):d,],10,TRUE) validulstotal = validulstotal + validuls validulsnames = names(validuls)[validuls] print(validulsnames) } validulstotal