#use readtable to create data frames of following unzipped files below
x.train <- read.table("UCI HAR Dataset/train/X_train.txt")
subject.train <- read.table("UCI HAR Dataset/train/subject_train.txt")
y.train <- read.table("UCI HAR Dataset/train/y_train.txt")
x.test <- read.table("UCI HAR Dataset/test/X_test.txt")
subject.test <- read.table("UCI HAR Dataset/test/subject_test.txt")
y.test <- read.table("UCI HAR Dataset/test/y_test.txt")
features <- read.table("UCI HAR Dataset/features.txt")
activity.labels <- read.table("UCI HAR Dataset/activity_labels.txt")
colnames(x.test) <- features[,2]
dataset_test <- cbind(subject.test,y.test,x.test)
colnames(dataset_test)[1] <- "subject"
colnames(dataset_test)[2] <- "activity"
test <- select(features, V2)
dataset_test <- select(dataset_test,subject,activity)
[1] Error: Can't bind data because some arguments have the same name
- features is a two column dataframe with the second columns containing the names for x.test
- subject.test is a single column data frame
- y.test is a single column data frame
- x.test is a wide data frame
After naming and binding these data frames I tried to use dplyr::select to select certain frames. However, I get an error returning dataset_test:
"Error: Can't bind data because some arguments have the same name"
However, test does not return an error and properly filters. Why is there the difference in behaviour?
The data I am using can be downloaded online. The data sources correspond to the variable names, except "_" are used instead of "."
dput
> dput(head(x.test[,1:5],2))
structure(list(V1 = c(0.25717778, 0.28602671), V2 = c(-0.02328523,
-0.013163359), V3 = c(-0.014653762, -0.11908252), V4 = c(-0.938404,
-0.97541469), V5 = c(-0.92009078, -0.9674579)), row.names = 1:2, class = "data.frame")
> dput(head(subject.test,2))
structure(list(V1 = c(2L, 2L)), row.names = 1:2, class = "data.frame")
> dput(head(y.test,2))
structure(list(V1 = c(5L, 5L)), row.names = 1:2, class = "data.frame")
> dput(head(features,2))
structure(list(V1 = 1:2, V2 = c("tBodyAcc-mean()-X", "tBodyAcc-mean()-Y"
)), row.names = 1:2, class = "data.frame")
dput
to provide some sample data. More details here – Irrelevancenames(dataset_test)
return? – Myosindput
– Myosin