load("/framshare/fulldata.Rdata") fulldata$female-1->fulldata$female paste(fulldata$idmoth,fulldata$idfath,sep="_")->fulldata$family_id fulldata[fulldata$generation==3,]->g3 library(foreign) read.dta("~/framshare/bmi.g3.dta")->bmi subset(bmi,select=c("shareid","height","weight"))->bmi merge(g3,bmi,by.x="id",by.y="shareid")->g3 #Pairwise means and SDs yes, for height, for bmi, for education, and for age/sex. #But then we have to do the same pairwise counts of minor alleles and mean principle components for that pair so that we can run the analysis ultimately... pair.fun<-function(x,fun.nm,var,extra.arg) { x[order(x$id),]->x tmp<-list() nrow(x)->N for (i in 1:(N-1)) { index2<-(i+1):N index1<-rep(i,length(index2)) data.frame(paste(x$id[index1],x$id[index2],sep="_"),x[[var]][index1],x[[var]][index2])->tmp[[i]] } do.call("rbind",tmp)->df c('pair.id',paste(var,"1",sep="_"),paste(var,"2",sep="_"))->names(df) get(fun.nm)->fun apply(matrix(unlist(df[,2:3]),ncol=2,byrow=FALSE),1,fun)->df[[paste(fun.nm,var,sep="_")]] df } split(g3,g3$family_id)->fams sapply(fams,nrow)->index fams[index>1]->fams out<-list() for (var in c("bmi","height","bmi","education","age","sex")) { lapply(fams,pair.fun,fun.nm='mean',var=var)->v.mean lapply(fams,pair.fun,fun.nm='sd',var=var)->v.sd do.call("rbind",v.mean)->out[[paste(var,'mean',sep="_")]] do.call("rbind",v.sd)->v.sd v.sd[,c(1,4)]->out[[paste(var,'sd',sep="_")]] } list.merge<-function(L,by="pair.id") { L[[1]]->out for (i in 2:length(L)) merge(out,L[[i]],by=by,all=TRUE)->out out } list.merge(out)->pairs for (ii in 1:length(fams)) { fams[[ii]]->x x[order(x$id),]->x tmp<-list() for (i in 1:(nrow(x)-1)) { data.frame(paste(x$id[i],x$id[-i],sep="_"),names(fams)[ii])->tmp[[i]] } do.call("rbind",tmp)->tmp c('pair.id','family_id')->names(tmp) tmp->fams[[ii]] } do.call("rbind",fams)->fams merge(pairs,fams,by="pair.id")->pairs #save(pairs,file="/tmp/pairs_data.Rdata") save(pairs,file="/framingham/pairs_data.Rdata")