Batch Processing
1. Repeat the exercise from the Batch Processing Lecture (5 April), but do it using real data sets rather than purely simulated. Check with folks in your lab to see if there are multiple data sets available for analysis, or ask Nick, Lauren, or Emily for suggestions for other data sources. Stick to simple data analyses and graphics, but try to set it up as a batch process that will work on multiple files and save summary results to a common file.
If you can only find a single data set, then simulate a couple of others by following the methods in Homework #6 by selecting appropriate statistical distributions and estimating parameters for those from the real data.
Hopefully, this exercise will contribute to some actual work that you are trying to do in your research!
Note: I do not have any data that I am able to use so used batch processing code and resimulated
# Build a set of random files
##################################################
# function: file_builder
# create a set of random files for regression
# input: file_n = number of files to create
# : file_folder = name of folder for random files
# : file_size = c(min,max) number of rows in file
# : file_na = number on average of NA values per column
# output: set of random files
#-------------------------------------------------
<- function(file_n=10,
file_builder file_folder="RandomFiles/",
file_size=c(15,100),
file_na=3){
for (i in seq_len(file_n)) {
<- sample(file_size[1]:file_size[2],size=1) # get number of rows
file_length <- runif(file_length) # create random x
var_x <- runif(file_length) # create randon y
var_y <- data.frame(var_x,var_y) # bind into a data frame
df <- rpois(n=1,lambda=file_na) # determine NA number
bad_vals sample(nrow(df),size=bad_vals),1] <- NA # random NA in var_x
df[sample(nrow(df),size=bad_vals),2] <- NA # random NA in var_y
df[
# create label for file name with padded zeroes
<- paste(file_folder,
file_label "ranFile",
formatC(i,
width=3,
format="d",
flag="0"),
".csv",sep="")
# set up data file and incorporate time stamp and minimal metadata
write.table(cat("# Simulated random data file for batch processing","\n",
"# timestamp: ",as.character(Sys.time()),"\n",
"# LSC","\n",
"# ------------------------", "\n",
"\n",
file=file_label,
row.names="",
col.names="",
sep=""))
# now add the data frame
write.table(x=df,
file=file_label,
sep=",",
row.names=FALSE,
append=TRUE)
}
}
# Run regression model and extract stats
##################################################
# function: reg_stats
# fits linear model, extracts statistics
# input: 2-column data frame (x and y)
# output: slope, p-value, and r2
#-------------------------------------------------
<- function(d=NULL) {
reg_stats if(is.null(d)) {
<- runif(10)
x_var <- runif(10)
y_var <- data.frame(x_var,y_var)
d
}<- lm(data=d,d[,2]~d[,1])
. <- summary(.)
. <- list(slope=.$coefficients[2,1],
stats_list p_val=.$coefficients[2,4],
r2=.$r.squared)
return(stats_list)
}
# Body of Script
#--------------------------------------------
# Global variables
<- "RandomFiles/"
file_folder <- 100
n_files <- "StatsSummary.csv"
file_out #--------------------------------------------
# Create 100 random data sets
dir.create(file_folder)
## Warning in dir.create(file_folder): 'RandomFiles' already exists
file_builder(file_n=n_files)
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
## ""
## Warning in write.table(x = df, file = file_label, sep = ",", row.names =
## FALSE, : appending column names to file
<- list.files(path=file_folder)
file_names
# Create data frame to hold file summary statistics
<- seq_along(file_names)
ID <- file_names
file_name <- rep(NA,n_files)
slope <- rep(NA,n_files)
p_val <- rep(NA,n_files)
r2
<- data.frame(ID,file_name,slope,p_val,r2)
stats_out
# batch process by looping through individual files
for (i in seq_along(file_names)) {
<- read.table(file=paste(file_folder,file_names[i],sep=""),
data sep=",",
header=TRUE) # read in next data file
<- data[complete.cases(data),] # get clean cases
d_clean
<- reg_stats(d_clean) # pull regression stats from clean file
. 3:5] <- unlist(.) # unlist, copy into last 3 columns
stats_out[i,
}# set up output file and incorporate time stamp and minimal metadata
write.table(cat("# Summary stats for ",
"batch processing of regression models","\n",
"# timestamp: ",as.character(Sys.time()),"\n",
"# NJG","\n",
"# ------------------------", "\n",
"\n",
file=file_out,
row.names="",
col.names="",
sep=""))
## ""
# now add the data frame
write.table(x=stats_out,
file=file_out,
row.names=FALSE,
col.names=TRUE,
sep=",",
append=TRUE)
## Warning in write.table(x = stats_out, file = file_out, row.names = FALSE, :
## appending column names to file