-------------------------------------- # Einlesen von Daten und # # weitere Regressionsbeispiele # -------------------------------------- ?read.table() # Daten nach R importieren: den Datei-Pfad "C:/..." unten anpassen: # ACHTUNG: Es muss so ein Schraegstrich / sein, nicht so \ einer, im # Windows-Dateiexplorer wird \ benutzt: daxwithcomp = read.table("C:/Users/detlef/HSRM/Vorlesungen/WS201920/Oekonometrie/DAXwithComp.txt",header=TRUE,sep=";") # alles mal anschauen: daxwithcomp # nur die ersten oder letzten 6 Zeilen: head(daxwithcomp) tail(daxwithcomp) # 20 statt 6 Zeilen: head(daxwithcomp,20) tail(daxwithcomp,20) # Goesse: dim(daxwithcomp) # kompakte Uebersicht der numerischen Daten: summary(daxwithcomp) # eventuelle Namen von Spalten oder Zeilen: names(daxwithcomp) # Informationen ueber die Objekt/Daten-Struktur: class(daxwithcomp) str(daxwithcomp) mode(daxwithcomp) # ..mh, wuerde man eigentlich data.frame erwarten.. # let's take the last 4 years of data, 2011 - 2014: daxwithcomp[1558,] daxwithcomp[1559,] daxwithcomp = daxwithcomp[-(1:1558),] head(daxwithcomp) tail(daxwithcomp) dim(daxwithcomp) # remove all rows with NA's: daxwithcomp = na.omit(daxwithcomp) dim(daxwithcomp) # let's check some underlyings: head(daxwithcomp) dax = daxwithcomp[,1] alv = daxwithcomp[,3] bas = daxwithcomp[,4] plot(dax,type="l") plot(alv,type="l") plot(bas,type="l") # normalize to 100 at start to compare all three: ndax = dax/dax[1]*100 nalv = alv/alv[1]*100 nbas = bas/bas[1]*100 plot( ndax , type="l" , ylim = c( min(ndax,nalv,nbas) , max(ndax,nalv,nbas) ) ) lines(nalv,col="red") lines(nbas,col="green") # Now let's determine the DAX-Composition # through Regression: uls = daxwithcomp[,2:31] # 'uls' for 'underlyings' str(uls) # uls is dataframe res = lm(dax ~ uls) # doesn't work # let's change uls from dataframe to matrix: uls = as.matrix(uls) res = lm(dax ~ uls) # this works summary(res) plot(dax,type="l") lines(res$fit,col=2) # there is no constant term in DAX, # thus: Regression without intercept res = lm(dax ~ -1 + uls) summary(res) plot(dax,type="l") lines(res$fit,col=2) ----------------------------- # S&P500 - Regression # ----------------------------- # Importieren der Daten nach R: spx = read.table("C:/Users/detlef/HSRM/Vorlesungen/WS201920/Oekonometrie/SPX.txt",header=TRUE,sep=";") spx # quite large.. head(spx) tail(spx) names(spx) # technical information: class(spx) str(spx) # ist vom Daten-Typ "data.frame" mode(spx) # extract columns: days = spx$Date index = spx$Adj.Close plot(index,type="l") plot(log(index),type="l") # versuchen wir, die tatsaechlichen Zeiten auf die # x-Achse zu bekommen: plot(days,index) # takes some time until we get something # which is not that what we want.. str(days) # ist "Factor", muesste vielleicht sowas # wie "Date" sein.. class(days) mode(days) str(index) class(index) mode(index) #----------------------------------------------------------- # in the following we take a closer look to date-formatting, # see also the 4 pdf-pages "Date Formatting in R" auf der # Vorlesungshomepage: days = as.Date(days) # doesn't work days = as.Date(days,format="%d-%m-%y") str(days) # ok, ist jetzt "Date" class(days) mode(days) head(days) tail(days) plot(days,index) # technically we have Date format now, # but 1950 has turned to 2050.. # we fix this by hand: # dates can be added and subtracted, so let's try the following: days[1] startdate = as.Date("1950-01-03") startdate wrongstartdate = days[1] wrongstartdate days[1] - wrongstartdate + startdate days[2] - wrongstartdate + startdate # ok, that seem to work. # we have to correct only entries with year >= 2050: # extract the 4-digit year from the date: years = format(days,"%Y") years head(years) tail(years) length(years) # let's try this: days = ifelse( years>=2050, as.Date( days - wrongstartdate + startdate ), days) head(days) # internally, date-values are given by integers equal to the number of # days since January 1st 1970, with negative numbers for earlier dates. # days now is represented by these integers. Let's try to see the actual # dates again: class(days) days = as.Date(days) # does not work days2 = format(days, format="%d-%m-%Y") days2 # also does not work.. refDate = as.Date("1970-01-01") refDate class(refDate) days3 = refDate + days head(days3) tail(days3) # ok, finally we've made it... # eventually there is a more quick solution.. days = days3 # End of we take a closer look to date-formating. #----------------------------------------------------------- # now we should get a nice plot: plot(days,index) plot(days,index,type="l") plot(days,log(index)) # alright, looks all good now plot(days,log(index),type="l") --------------------------------- # Bestimmen der Wachstumsrate r # --------------------------------- # in order to do the regression, we take the logarithm to obtain: # # log(SP500_t) = log(S_0) + r*(t-t_0) # # thus we can do a simple linear regression with 1 regressor being # the vector x = t_k - t_0 and y = log(SP500_{t_k}): logindex = log(index) times = (days-days[1])/365.25 # t-t_0 in year-fraction head(times) tail(times) # looks good, 65, almost 66 years. # the fact that we calculate t-t_0 in year-fraction means that # r will have the meaning of a yearly growth rate: # now the actual regression, just 1 line of code: res = lm(logindex ~ times) res # beta0 = log(S_0) and beta1 = r: res$coeff r = res$coeff[2] S0 = exp(res$coeff[1]) r # a growth rate of 6.9% per year S0 # comparable to spx[1,] spx[1,] # let's look at the fit: plot(times,logindex) points(times,res$fit,col="red") plot(times,index) points(times,exp(res$fit),col="red")