#----------------# # Aufgabe 1 # #----------------# ?read.table() # Daten nach R importieren: den Datei-Pfad "C:/..." unten anpassen: # ACHTUNG: Es muss so ein Schraegstrich / sein, nicht so \ einer, im # Windows-Dateiexplorer wird \ benutzt: dax = read.table("C:/Users/detlef/HSRM/Vorlesungen/WS201819/Oekonometrie/DAXwithComp.txt", header=TRUE, sep=";" ) # alles mal anschauen: dax # nur die ersten oder letzten 6 Zeilen: head(dax) tail(dax) # 20 statt 6 Zeilen: head(dax,20) tail(dax,20) # kompakte Uebersicht der numerischen Daten: summary(dax) # Informationen ueber die Objekt/Daten-Struktur: str(dax) mode(dax) class(dax) # eventuelle Namen von Spalten oder Zeilen: names(dax) dax[,4] # die 4.Spalte dax$BAS.DE # ist dasselbe dax$BAS # ist dasselbe # jetzt noch die plots: plot(dax$GDAX) plot(dax$GDAX,type="l") plot(dax$BAS,type="l") # oder: gdax = dax$GDAX bas = dax$BAS plot(gdax,type="l") plot(bas,type="l") #----------------# # Aufgabe 2 # #----------------# ------- # 2a) # ------- # Importieren der Daten nach R: spx = read.table("C:/Users/detlef/HSRM/Vorlesungen/WS201819/Oekonometrie/SPX.txt",header=TRUE,sep=";") spx # quite large.. head(spx) tail(spx) names(spx) # technical information: str(spx) # ist vom Daten-Typ "data.frame" class(spx) mode(spx) # extract columns: days = spx$Date index = spx$Adj.Close plot(index) plot(log(index)) # versuchen wir, die tatsaechlichen Zeiten auf die # x-Achse zu bekommen: plot(days,index) # takes some time until we get something # which is not that what we want.. str(days) # ist "Factor", muesste vielleicht sowas # wie "Date" sein.. class(days) mode(days) str(index) class(index) mode(index) #----------------------------------------------------------- # in the following we take a closer look to date-formatting, # see also the 4 pdf-pages "Date Formatting in R" auf der # Vorlesungs-homepage: days = as.Date(days) # doesn't work days = as.Date(days,format="%d-%m-%y") str(days) # ok, ist jetzt "Date" class(days) mode(days) head(days) tail(days) plot(days,index) # technically we have Date format now, # but 1950 has turned to 2050.. # we fix this by hand: # dates can be added and subtracted, so let's try the following: days[1] startdate = as.Date("1950-01-03") startdate wrongstartdate = days[1] wrongstartdate days[1] - wrongstartdate + startdate days[2] - wrongstartdate + startdate # ok, that seem to work. # we have to correct only entries with year >= 2050: # extract the 4-digit year from the date: years = format(days,"%Y") years head(years) tail(years) length(years) # let's try this: days = ifelse( years>=2050, as.Date( days - wrongstartdate + startdate ), days) head(days) # internally, date-values are given by integers equal to the number of # days since January 1st 1970, with negative numbers for earlier dates. # days now is represented by these integers. Let's try to see the actual # dates again: class(days) days = as.Date(days) # does not work days2 = format(days, format="%d-%m-%Y") days2 # also does not work.. refDate = as.Date("1970-01-01") refDate class(refDate) days3 = refDate + days head(days3) tail(days3) # ok, finally we've made it... # eventually there is a more quick solution.. days = days3 # End of we take a closer look to date-formating. #----------------------------------------------------------- # now we should get a nice plot: plot(days,index) plot(days,index,type="l") plot(days,log(index)) # alright, looks all good now plot(days,log(index),type="l") ------- # 2b) # ------- # in order to do the regression, we take the logarithm to obtain: # # log(SP500_t) = log(S_0) + r*(t-t_0) # # thus we can do a simple linear regression with 1 regressor being # the vector x = t_k - t_0 and y = log(SP500_{t_k}): logindex = log(index) times = (days-days[1])/365.25 # t-t_0 in year-fraction head(times) tail(times) # looks good, 65, almost 66 years. # the fact that we calculate t-t_0 in year-fraction means that # r will have the meaning of a yearly growth rate: # now the actual regression, just 1 line of code: res = lm(logindex ~ times) res # beta0 = log(S_0) and beta1 = r: res$coeff r = res$coeff[2] S0 = exp(res$coeff[1]) r # a growth rate of about 7% per year S0 # comparable to spx[1,] spx[1,] # let's look at the fit: plot(times,logindex) points(times,res$fit,col="red") plot(times,index) points(times,exp(res$fit),col="red") ------- # 2c) # ------- summary(res) # Vermutung: Std.Error ist die Groesse # # sqrt( hat(s^2)* (X^T*X)^(-1)_{j,j} ) n = length(times) n p = 2 # number of regressors, including constant hat_s_squared = 1/(n-p) * sum(res$residuals^2) X = cbind(rep(1,n),times) # Regressoren mit Konstante XTXinv = solve(t(X)%*%X) # die Matrix (X^T*X)^{-1} XTXinv XTXinv00 = XTXinv[1,1] XTXinv11 = XTXinv[2,2] stderr0 = sqrt( hat_s_squared * XTXinv00 ) stderr1 = sqrt( hat_s_squared * XTXinv11 ) stderr0 stderr1 summary(res) # ok, das passt ------- # 2d) # ------- x90 = qt(0.95,df=n-p) x90 x95 = qt(0.975,df=n-p) x95 x99 = qt(0.995,df=n-p) x99 r = res$coeff[2] r rup90 = r + x90*stderr1 rup95 = r + x95*stderr1 rup99 = r + x99*stderr1 rdown90 = r - x90*stderr1 rdown95 = r - x95*stderr1 rdown99 = r - x99*stderr1 confint90 = c(rdown90,rup90)*100 # in Prozent confint95 = c(rdown95,rup95)*100 confint99 = c(rdown99,rup99)*100 confint90 confint95 confint99 # quite narrow intervalls, # due to large n