## http://cran.r-project.org/doc/contrib/usingR.pdf
## 1. Starting Up
## 1.1 Getting started under Windows
## 1.2 Use of an Editor Script Window
## 1.3 A Short R Session
austpop <- read.table("austpop.txt", header=TRUE)
austpop
> austpop
Year NSW Vic. Qld SA WA Tas. NT ACT Aust.
1 1917 1904 1409 683 440 306 193 5 3 4941
2 1927 2402 1727 873 565 392 211 4 8 6182
3 1937 2693 1853 993 589 457 233 6 11 6836
4 1947 2985 2055 1106 646 502 257 11 17 7579
5 1957 3625 2656 1413 873 688 326 21 38 9640
6 1967 4295 3274 1700 1110 879 375 62 103 11799
7 1977 5002 3837 2130 1286 1204 415 104 214 14192
8 1987 5617 4210 2675 1393 1496 449 158 265 16264
9 1997 6274 4605 3401 1480 1798 474 187 310 18532
names(austpop)
> names(austpop)
[1] "Year" "NSW" "Vic." "Qld" "SA" "WA" "Tas." "NT" "ACT"
[10] "Aust."
-----------------------------python
### austpop <- read.table("austpop.txt", header=TRUE)
file = open('austpop.txt')
header = file.readline().replace('"','').split()
d = [(name, ' attach("hills.RData")
> summary(hills)
distance climb time
Min. : 2.000 Min. : 300 Min. : 15.95
1st Qu.: 4.500 1st Qu.: 725 1st Qu.: 28.00
Median : 6.000 Median :1000 Median : 39.75
Mean : 7.529 Mean :1815 Mean : 57.88
3rd Qu.: 8.000 3rd Qu.:2200 3rd Qu.: 68.62
Max. :28.000 Max. :7500 Max. :204.62
> hills
distance climb time
Greenmantle 2.5 650 16.083
Carnethy 6.0 2500 48.350
Craig Dunain 6.0 900 33.650
Ben Rha 7.5 800 45.600
Ben Lomond 8.0 3070 62.267
Goatfell 8.0 2866 73.217
Bens of Jura 16.0 7500 204.617
Cairnpapple 6.0 800 36.367
Scolty 5.0 800 29.750
Traprain 6.0 650 39.750
Lairig Ghru 28.0 2100 192.667
Dollar 5.0 2000 43.050
Lomonds 9.5 2200 65.000
Cairn Table 6.0 500 44.133
Eildon Two 4.5 1500 26.933
Cairngorm 10.0 3000 72.250
Seven Hills 14.0 2200 98.417
Knock Hill 3.0 350 78.650
Black Hill 4.5 1000 17.417
Creag Beag 5.5 600 32.567
Kildcon Hill 3.0 300 15.950
Meall Ant-Suidhe 3.5 1500 27.900
Half Ben Nevis 6.0 2200 47.633
Cow Hill 2.0 900 17.933
N Berwick Law 3.0 600 18.683
Creag Dubh 4.0 2000 26.217
Burnswark 6.0 800 34.433
Largo Law 5.0 950 28.567
Criffel 6.5 1750 50.500
Acmony 5.0 500 20.950
Ben Nevis 10.0 4400 85.583
Knockfarrel 6.0 600 32.383
Two Breweries 18.0 5200 170.250
Cockleroi 4.5 850 28.100
Moffat Chase 20.0 5000 159.833
-----------------------------python
###> attach("hills.RData")
###> summary(hills)
import Rtools
hills = Rtools.readtable_DF('hills.txt')
Rtools.summary(hills)
climb distance time
Min. : 300 Min. : 2 Min. : 15.95
1st Qu.: 725 1st Qu.: 4.5 1st Qu.: 28
Median : 1000 Median : 6 Median : 39.75
Mean : 1815.3 Mean : 7.5286 Mean : 57.876
3rd Qu.: 2200 3rd Qu.: 8 3rd Qu.: 68.625
Max. : 7500 Max. : 28 Max. : 204.62
###> pairs(hills)
#
Rtools.ScatterMatrix(hills)
###> options(digits=3)
###> cor(hills)
### distance climb time
###distance 1.000 0.652 0.920
###climb 0.652 1.000 0.805
###time 0.920 0.805 1.000
Rtools.corrcoefDF(hills)
climb distance time
climb 1 0.6523 0.8052
distance 0.6523 1 0.9196
time 0.8052 0.9196 1
#----------------------------------------------------------------
###> plot(distance ~ stretch,data=elasticband, pch=16)
Rtools.scatterDF(elasticband, 'stretch', 'distance')
###> elastic.lm <- lm(distance~stretch,data=elasticband)
(a_s,b_s,r,tt,stderr)=linregress(array(elasticband['stretch']), array(elasticband['distance']))
(4.5535714285714288,
-63.571428571428584,
0.79699954090878578,
0.031860702945051264,
0.87860816982116141)
slope, intercept, r, two-tailed prob, stderr-of-the-estimate
stderr_est = sqrt(np.sum(resid**2)/5.)
16.332045624651993
stderr_b =
###> lm(distance ~stretch,data=elasticband)
###Call:
###lm(formula = distance ~ stretch, data = elasticband)
###Coefficients:
###(Intercept) stretch
###-63.571 4.554
###More complete information is available by typing
###> summary(lm(distance~stretch,data=elasticband))
###Call:
###lm(formula = distance ~ stretch, data = elasticband)
###
###Residuals:
### 1 2 3 4 5 6 7
### -2.1071 0.3214 -18.0000 -1.8929 27.7857 -13.3214 7.2143
###
###Coefficients:
### Estimate Std. Error t value Pr(>|t|)
###(Intercept) -63.571 74.332 0.855 0.4315
###stretch 4.554 1.543 2.951 0.0319 *
###---
###Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
###
###Residual standard error: 16.33 on 5 degrees of freedom
###Multiple R-squared: 0.6352, Adjusted R-squared: 0.5622
###F-statistic: 8.706 on 1 and 5 DF, p-value: 0.03186
import Rtools
Rtools.LinRegDF(elasticband, 'stretch', 'distance', summary=True)
#----------------------------------------------------------------
###2. The following ten observations, taken during the years 1970-79, are on October snow cover for Eurasia.
###(Snow cover is in millions of square kilometers):
###year snow.cover
###1970 6.5
###1971 12.0
###1972 14.9
###1973 10.0
###1974 10.7
###1975 7.9
###1976 21.9
###1977 12.5
###1978 14.5
###1979 9.2
###i. Enter the data into R. [Section 1.3.1 showed one way to do this. To save keystrokes, enter the successive
###years as 1970:1979]
###ii. Plot snow.cover versus year.
###iii Use the hist() command to plot a histogram of the snow cover values.
###iv. Repeat ii and iii after taking logarithms of snow cover.
snow = P.DataFrame(array(zip(range(1970,1980),[6.5, 12., 14.9, 10., 10.7, 7.9, 21.9, 12.5, 14.5, 9.2])), index=range(10), columns=['year','snow_cover'])
Rtools.scatterDF(snow['year'], snow['snow_cover'])
Rtools.histDF(snow, 'snow_cover')
snow['snow_cover'] = log(snow['snow_cover'])
Rtools.scatterDF(snow['year'], snow['snow_cover'])
Rtools.histDF(snow, 'snow_cover')