吴裕雄--天生自然 R语言开发学习:高级数据管理(续三)
2020-12-13 06:20
标签:statistic 技术 table cto param raid round variable alc 吴裕雄--天生自然 R语言开发学习:高级数据管理(续三) 标签:statistic 技术 table cto param raid round variable alc 原文地址:https://www.cnblogs.com/tszr/p/11175339.html#-----------------------------------#
# R in Action (2nd ed): Chapter 5 #
# Advanced data management #
# requires that the reshape2 #
# package has been installed #
# install.packages("reshape2") #
#-----------------------------------#
# Class Roster Dataset
Student "John Davis","Angela Williams","Bullwinkle Moose",
"David Jones","Janice Markhammer",
"Cheryl Cushing","Reuven Ytzrhak",
"Greg Knox","Joel England","Mary Rayburn")
math )
science )
english )
roster data.frame(Student, math, science, english,
stringsAsFactors=FALSE)
# Listing 5.1 - Calculating the mean and standard deviation
x )
mean(x)
sd(x)
n length(x)
meanx n
css )
sdx ))
meanx
sdx
# Listing 5.2 - Generating pseudo-random numbers from
# a uniform distribution
runif(5)
runif(5)
set.seed(1234)
runif(5)
set.seed(1234)
runif(5)
# Listing 5.3 - Generating data from a multivariate
# normal distribution
library(MASS)
mean )
sigma ,
6721.2, 4700.9, -16.5,
-47.1, -16.5, 0.3), nrow=3, ncol=3)
set.seed(1234)
mydata , mean, sigma)
mydata as.data.frame(mydata)
names(mydata) "y", "x1", "x2")
dim(mydata)
head(mydata, n=10)
# Listing 5.4 - Applying functions to data objects
a
sqrt(a)
b )
round(b)
c )
c
log(c)
mean(c)
# Listing 5.5 - Applying a function to the rows (columns) of a matrix
mydata )
mydata
apply(mydata, 1, mean)
apply(mydata, 2, mean)
apply(mydata, 2, mean, trim=.4)
# Listing 5.6 - A solution to the learning example
options(digits=2)
Student "John Davis", "Angela Williams", "Bullwinkle Moose",
"David Jones", "Janice Markhammer", "Cheryl Cushing",
"Reuven Ytzrhak", "Greg Knox", "Joel England",
"Mary Rayburn")
Math )
Science )
English )
roster data.frame(Student, Math, Science, English,
stringsAsFactors=FALSE)
z ])
score , mean)
roster cbind(roster, score)
y ))
roster$grade[score >= y[1]] "A"
roster$grade[score = y[2]] "B"
roster$grade[score = y[3]] "C"
roster$grade[score = y[4]] "D"
roster$grade[score "F"
name " ")
Lastname "[", 2)
Firstname "[", 1)
roster ])
roster roster[order(Lastname,Firstname),]
roster
# Listing 5.4 - A switch example
feelings "sad", "afraid")
for (i in feelings)
print(
switch(i,
happy = "I am glad you are happy",
afraid = "There is nothing to fear",
sad = "Cheer up",
angry = "Calm down now"
)
)
# Listing 5.5 - mystats(): a user-written function for
# summary statistics
mystats print=FALSE) {
if (parametric) {
center sd(x)
} else {
center mad(x)
}
if (print & parametric) {
cat("Mean=", center, "\n", "SD=", spread, "\n")
} else if (print & !parametric) {
cat("Median=", center, "\n", "MAD=", spread, "\n")
}
result spread)
return(result)
}
# trying it out
set.seed(1234)
x )
y mystats(x)
y print=TRUE)
# mydate: a user-written function using switch
mydate "long") {
switch(type,
long = format(Sys.time(), "%A %B %d %Y"),
short = format(Sys.time(), "%m-%d-%y"),
cat(type, "is not a recognized type\n"))
}
mydate("long")
mydate("short")
mydate()
mydate("medium")
# Listing 5.9 - Transposing a dataset
cars ]
cars
t(cars)
# Listing 5.10 - Aggregating data
options(digits=3)
attach(mtcars)
aggdata list(cyl,gear),
FUN=mean, na.rm=TRUE)
aggdata
# Using the reshape2 package
library(reshape2)
# input data
mydata " ", text="
ID Time X1 X2
1 1 5 6
1 2 3 5
2 1 6 1
2 2 2 4
")
# melt data
md "ID", "Time"))
# reshaping with aggregation
dcast(md, ID~variable, mean)
dcast(md, Time~variable, mean)
dcast(md, ID~Time, mean)
# reshaping without aggregation
dcast(md, ID+Time~variable)
dcast(md, ID+variable~Time)
dcast(md, ID~variable+Time)
文章标题:吴裕雄--天生自然 R语言开发学习:高级数据管理(续三)
文章链接:http://soscw.com/essay/32877.html