So here a solution that uses the dir listing from the FTP. It is a little tricky because the FTP gives the date in linux format with either a timestamp or a year. Other than that it does it's job. I'm still not convinced this is reliable though. If packages are copied over to another server all timestmaps might be reset.
recent.packages.ftp <- function(){
setwd(tempdir())
download.file("ftp://cran.r-project.org/pub/R/src/contrib/", destfile=tempfile(), method="wget", extra="--no-htmlify");
#because of --no-htmlify the destfile argument does not work
datastring <- readLines(".listing");
unlink(".listing");
myexpr1 <- "(?<date>[A-Z][a-z]{2} [0-9]{2} [0-9]{2}:[0-9]{2}) (?<name>[a-zA-Z0-9\\.]{2,})_(?<version>[0-9\\.-]*).tar.gz$"
matches <- gregexpr(myexpr1, datastring, perl=TRUE);
packagelines <- as.logical(sapply(regmatches(datastring, matches), length));
#subset proper lines
matches <- matches[packagelines];
datastring <- datastring[packagelines];
N <- length(matches)
#from the ?regexpr manual
parse.one <- function(res, result) {
m <- do.call(rbind, lapply(seq_along(res), function(i) {
if(result[i] == -1) return("")
st <- attr(result, "capture.start")[i, ]
substring(res[i], st, st + attr(result, "capture.length")[i, ] - 1)
}))
colnames(m) <- attr(result, "capture.names")
m
}
#parse all records
mydf <- data.frame(date=rep(NA, N), name=rep(NA, N), version=rep(NA,N))
for(i in 1:N){
mydf[i,] <- parse.one(datastring[i], matches[[i]]);
}
row.names(mydf) <- NULL;
#convert dates
mydf$date <- strptime(mydf$date, format="%b %d %H:%M");
#So linux only displays dates for packages of less then six months old.
#However strptime will assume the current year for packages that don't have a timestamp
#Therefore for dates that are in the future, we subtract a year. We can use some margin for timezones.
infuture <- (mydf$date > Sys.time() + 31*24*60*60);
mydf$date[infuture] <- mydf$date[infuture] - 365*24*60*60;
#sort and return
mydf <- mydf[order(mydf$date),];
row.names(mydf) <- NULL;
return(mydf);
}
html
table usingXML::readHTMLTable
. is this what you were looking for? – Gorilla