diff --git a/NAMESPACE b/NAMESPACE index 1acd5bf..9f4fea7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -6,6 +6,7 @@ export(ls.socrata) export(read.socrata) export(read.socrataGEO) export(validateUrl) +export(write.socrata) importFrom(geojsonio,geojson_read) importFrom(httr,GET) importFrom(httr,add_headers) diff --git a/R/errorHandling.R b/R/errorHandling.R index 0d50253..6baa2e0 100644 --- a/R/errorHandling.R +++ b/R/errorHandling.R @@ -6,11 +6,18 @@ # but one that is not compatible with RSocrata. # See \url{https://github.com/Chicago/RSocrata/issues/16} # -# @param url - SOPA url +# @param url - SODA url +# @param optional email - The email to the Socrata account with read access to the dataset +# @param optional password - The password associated with the email to the Socrata account #' @importFrom httr stop_for_status GET add_headers -errorHandling <- function(url = "", app_token = NULL) { - rsp <- httr::GET(url, httr::add_headers("X-App-Token" = app_token)) - +errorHandling <- function(url = "", app_token = NULL, email = NULL, password = NULL) { + + if(is.null(email) && is.null(password)){ + rsp <- httr::GET(url, httr::add_headers("X-App-Token" = app_token)) + } else { # email and password are not NULL + rsp <- httr::GET(url, httr::add_headers("X-App-Token" = app_token), httr::authenticate(email, password)) + } + if (rsp$status_code == 200) { invisible("OK. Your request was successful.") diff --git a/R/metadata.R b/R/metadata.R index 2567fe5..a79a056 100644 --- a/R/metadata.R +++ b/R/metadata.R @@ -5,6 +5,8 @@ #' \code{http://DOMAIN/api/views/FOUR-FOUR/columns.json}, which is used here. #' #' @param url - A Socrata resource URL, or a Socrata "human-friendly" URL! +#' @param email - (optional) The email to the Socrata account with read access to the dataset +#' @param password - (optional) The password associated with the email to the Socrata account #' #' @source \url{http://stackoverflow.com/a/29782941} #' @@ -24,13 +26,13 @@ #' @author John Malc \email{cincenko@@outlook.com} #' #' @export -getMetadata <- function(url = "") { +getMetadata <- function(url = "", email = NULL, password = NULL) { urlParsedBase <- httr::parse_url(url) mimeType <- mime::guess_type(urlParsedBase$path) # use function below to get them using =COUNT(*) SODA query - gQRC <- getQueryRowCount(urlParsedBase, mimeType) + gQRC <- getQueryRowCount(urlParsedBase, mimeType, email, password) # create URL for metadata data frame fourByFour <- substr(basename(urlParsedBase$path), 1, 9) @@ -39,8 +41,15 @@ getMetadata <- function(url = "") { # execute it URL <- httr::build_url(urlParsed) - df <- jsonlite::fromJSON(URL) + if(is.null(email) && is.null(password)){ + df <- jsonlite::fromJSON(URL) + } else { # email and password are not NULL + response <- httr::GET(URL, httr::authenticate(email, password)) + response_content <- httr::content(response, as="text") + df <- jsonlite::fromJSON(response_content) + } + # number of rows can be sometimes "cached". If yes, then below we calculate the maximum number of # rows from all non-null and null fields. # If not, then it uses "getQueryRowCount" fnct with SODA =COUNT(*) SODA query. @@ -62,7 +71,7 @@ getMetadata <- function(url = "") { # @author Gene Leynes \email{gleynes@@gmail.com} # #' @importFrom httr GET build_url content -getQueryRowCount <- function(urlParsed, mimeType) { +getQueryRowCount <- function(urlParsed, mimeType, email = NULL, password = NULL) { ## Construct the count query based on the URL, if (is.null(urlParsed[['query']])) { ## If there is no query at all, create a simple count @@ -81,7 +90,7 @@ getQueryRowCount <- function(urlParsed, mimeType) { urlParsed[[c('path')]], cntQueryText) ## Execute the query to count the rows - totalRowsResult <- errorHandling(cntUrl, app_token = NULL) + totalRowsResult <- errorHandling(cntUrl, app_token = NULL, email, password) ## Parsing the result depends on the mime type if (mimeType == "application/json") { diff --git a/R/returnData.R b/R/returnData.R index 36f7dea..d18d22a 100644 --- a/R/returnData.R +++ b/R/returnData.R @@ -82,6 +82,8 @@ getContentAsDataFrame <- function(response) { #' @param limit - defaults to the max of 50000. See \url{http://dev.socrata.com/docs/paging.html}. #' @param domain - A Socrata domain, e.g \url{http://data.cityofchicago.org} #' @param fourByFour - a unique 4x4 identifier, e.g. "ydr8-5enu". See more \code{\link{isFourByFour}} +#' @param email - (optional) The email to the Socrata account with read access to the dataset +#' @param password - (optional) The password associated with the email to the Socrata account #' #' @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org} #' @@ -96,7 +98,7 @@ getContentAsDataFrame <- function(response) { #' #' @export read.socrata <- function(url = NULL, app_token = NULL, limit = 50000, domain = NULL, fourByFour = NULL, - query = NULL) { + query = NULL, email = NULL, password = NULL) { if (is.null(url) == TRUE) { buildUrl <- paste0(domain, "/resource/", fourByFour, ".json") @@ -107,16 +109,16 @@ read.socrata <- function(url = NULL, app_token = NULL, limit = 50000, domain = N validUrl <- validateUrl(url) parsedUrl <- httr::parse_url(validUrl) - response <- errorHandling(validUrl, app_token) + response <- errorHandling(validUrl, app_token, email, password) results <- getContentAsDataFrame(response) dataTypes <- getSodaTypes(response) - rowCount <- as.numeric(getMetadata(cleanQuest(validUrl))[1]) + rowCount <- as.numeric(getMetadata(cleanQuest(validUrl), email, password)[1]) ## More to come? Loop over pages implicitly while (nrow(results) < rowCount) { query_url <- paste0(validUrl, ifelse(is.null(parsedUrl$query), "?", "&"), "$offset=", nrow(results), "&$limit=", limit) - response <- errorHandling(query_url, app_token) + response <- errorHandling(query_url, app_token, email, password) page <- getContentAsDataFrame(response) results <- plyr::rbind.fill(results, page) # accumulate data } diff --git a/R/writeData.R b/R/writeData.R new file mode 100644 index 0000000..e11b0ca --- /dev/null +++ b/R/writeData.R @@ -0,0 +1,75 @@ +#' Methods required for uploading data to Socrata + +#' Wrap httr GET in some diagnostics +#' +#' In case of failure, report error details from Socrata. +#' +#' @param url - Socrata Open Data Application Program Interface (SODA) endpoint (JSON only for now) +#' @param json_data_to_upload - JSON encoded data to update your SODA endpoint with +#' @param http_verb - PUT or POST depending on update mode +#' @param email - email associated with Socrata account (will need write access to dataset) +#' @param password - password associated with Socrata account (will need write access to dataset) +#' @param app_token - optional app_token associated with Socrata account +#' @return httr a response object +#' @importFrom httr GET +#' +#' @noRd +checkUpdateResponse <- function(json_data_to_upload, url, http_verb, email, password, app_token = NULL) { + if(http_verb == "POST"){ + response <- httr::POST(url, + body = json_data_to_upload, + httr::authenticate(email, password), + httr::add_headers("X-App-Token" = app_token, + "Content-Type" = "application/json")) #, verbose()) + } else if(http_verb == "PUT"){ + response <- httr::PUT(url, + body = json_data_to_upload, + httr::authenticate(email, password), + httr::add_headers("X-App-Token" = app_token, + "Content-Type" = "application/json")) # , verbose()) + } + + # TODO: error handling + # errorHandling(response) + + return(response) +} + +#' Write to a Socrata dataset (full replace or upsert) +#' +#' @description Method for updating Socrata datasets +#' +#' @param dataframe - dataframe to upload to Socrata +#' @param dataset_json_endpoint - Socrata Open Data Application Program Interface (SODA) endpoint (JSON only for now) +#' @param update_mode - "UPSERT" or "REPLACE"; consult http://dev.socrata.com/publishers/getting-started.html +#' @param email - The email to the Socrata account with read access to the dataset +#' @param password - The password associated with the email to the Socrata account +#' @param app_token - a (non-required) string; SODA API token can be used to query the data +#' portal \url{http://dev.socrata.com/consumers/getting-started.html} +#' +#' @author Mark Silverberg \email{mark.silverberg@@socrata.com} +#' +#' @importFrom httr parse_url build_url +#' @importFrom plyr rbind.fill +#' +#' @export +write.socrata <- function(dataframe, dataset_json_endpoint, update_mode, email, password, app_token = NULL) { + + # translate update_mode to http_verbs + if(update_mode == "UPSERT"){ + http_verb <- "POST" + } else if(update_mode == "REPLACE") { + http_verb <- "PUT" + } else { + stop("update_mode must be UPSERT or REPLACE") + } + + # convert dataframe to JSON + dataframe_as_json_string <- jsonlite::toJSON(dataframe) + + # do the actual upload + response <- checkUpdateResponse(dataframe_as_json_string, dataset_json_endpoint, http_verb, email, password, app_token) + + return(response) + +} diff --git a/man/getMetadata.Rd b/man/getMetadata.Rd index e2510d2..b80d8bd 100644 --- a/man/getMetadata.Rd +++ b/man/getMetadata.Rd @@ -7,10 +7,14 @@ \url{http://stackoverflow.com/a/29782941} } \usage{ -getMetadata(url = "") +getMetadata(url = "", email = NULL, password = NULL) } \arguments{ \item{url}{- A Socrata resource URL, or a Socrata "human-friendly" URL!} + +\item{email}{- (optional) The email to the Socrata account with read access to the dataset} + +\item{password}{- (optional) The password associated with the email to the Socrata account} } \value{ a list (!) containing a number of rows & columns and a data frame of metadata diff --git a/man/read.socrata.Rd b/man/read.socrata.Rd index 6e63489..75eae3a 100644 --- a/man/read.socrata.Rd +++ b/man/read.socrata.Rd @@ -5,7 +5,7 @@ \title{Get a full Socrata data set as an R data frame} \usage{ read.socrata(url = NULL, app_token = NULL, limit = 50000, domain = NULL, - fourByFour = NULL, query = NULL) + fourByFour = NULL, query = NULL, email = NULL, password = NULL) } \arguments{ \item{url}{- A Socrata resource URL, or a Socrata "human-friendly" URL, @@ -26,15 +26,21 @@ portal \url{http://dev.socrata.com/consumers/getting-started.html}} \item{query}{- Based on query language called the "Socrata Query Language" ("SoQL"), see \url{http://dev.socrata.com/docs/queries.html}.} + +\item{email}{- (optional) The email to the Socrata account with read access to the dataset} + +\item{password}{- (optional) The password associated with the email to the Socrata account} } \description{ Manages throttling and POSIX date-time conversions. We support only .json suffix. } \examples{ +\dontrun{ df_1 <- read.socrata(url = "http://soda.demo.socrata.com/resource/4334-bgaj.csv") df_2 <- read.socrata(domain = "http://data.cityofchicago.org/", fourByFour = "ydr8-5enu") df_3 <- read.socrata(url = "http://data.cityofchicago.org/resource/ydr8-5enu.json") } +} \author{ Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@cityofchicago.org} } diff --git a/man/write.socrata.Rd b/man/write.socrata.Rd new file mode 100644 index 0000000..d3ada7c --- /dev/null +++ b/man/write.socrata.Rd @@ -0,0 +1,30 @@ +% Generated by roxygen2 (4.1.1): do not edit by hand +% Please edit documentation in R/writeData.R +\name{write.socrata} +\alias{write.socrata} +\title{Write to a Socrata dataset (full replace or upsert)} +\usage{ +write.socrata(dataframe, dataset_json_endpoint, update_mode, email, password, + app_token = NULL) +} +\arguments{ +\item{dataframe}{- dataframe to upload to Socrata} + +\item{dataset_json_endpoint}{- Socrata Open Data Application Program Interface (SODA) endpoint (JSON only for now)} + +\item{update_mode}{- "UPSERT" or "REPLACE"; consult http://dev.socrata.com/publishers/getting-started.html} + +\item{email}{- The email to the Socrata account with read access to the dataset} + +\item{password}{- The password associated with the email to the Socrata account} + +\item{app_token}{- a (non-required) string; SODA API token can be used to query the data +portal \url{http://dev.socrata.com/consumers/getting-started.html}} +} +\description{ +Method for updating Socrata datasets +} +\author{ +Mark Silverberg \email{mark.silverberg@socrata.com} +} + diff --git a/tests/testthat/test-readPrivateDataset.R b/tests/testthat/test-readPrivateDataset.R new file mode 100644 index 0000000..7ba03e5 --- /dev/null +++ b/tests/testthat/test-readPrivateDataset.R @@ -0,0 +1,26 @@ +context("Test reading private Socrata dataset with email and password") + +privateResourceToReadCsvUrl <- "https://soda.demo.socrata.com/resource/a9g2-feh2.csv" +privateResourceToReadJsonUrl <- "https://soda.demo.socrata.com/resource/a9g2-feh2.json" +socrataEmail <- Sys.getenv("SOCRATA_EMAIL", "") +socrataPassword <- Sys.getenv("SOCRATA_PASSWORD", "") + +test_that("read Socrata CSV that requires a login", { + # should error when no email and password are sent with the request + expect_error(read.socrata(url = privateResourceToReadCsvUrl)) + # try again, this time with email and password in the request + df <- read.socrata(url = privateResourceToReadCsvUrl, email = socrataEmail, password = socrataPassword) + # tests + expect_equal(2, ncol(df), label="columns") + expect_equal(3, nrow(df), label="rows") +}) + +test_that("read Socrata JSON that requires a login", { + # should error when no email and password are sent with the request + expect_error(read.socrata(url = privateResourceToReadJsonUrl)) + # try again, this time with email and password in the request + df <- read.socrata(url = privateResourceToReadJsonUrl, email = socrataEmail, password = socrataPassword) + # tests + expect_equal(2, ncol(df), label="columns") + expect_equal(3, nrow(df), label="rows") +}) diff --git a/tests/testthat/test-writeData.R b/tests/testthat/test-writeData.R new file mode 100644 index 0000000..011f861 --- /dev/null +++ b/tests/testthat/test-writeData.R @@ -0,0 +1,44 @@ +context("write Socrata datasets") + +socrataEmail <- Sys.getenv("SOCRATA_EMAIL", "") +socrataPassword <- Sys.getenv("SOCRATA_PASSWORD", "") + +test_that("add a row to a dataset", { + datasetToAddToUrl <- "https://soda.demo.socrata.com/resource/xh6g-yugi.json" + + # populate df_in with two columns, each with a random number + x <- sample(-1000:1000, 1) + y <- sample(-1000:1000, 1) + df_in <- data.frame(x,y) + + # write to dataset + write.socrata(df_in,datasetToAddToUrl,"UPSERT",socrataEmail,socrataPassword) + + # read from dataset and store last (most recent) row for comparisons / tests + df_out <- read.socrata(url = datasetToAddToUrl, email = socrataEmail, password = socrataPassword) + df_out_last_row <- tail(df_out, n=1) + + expect_equal(df_in$x, as.numeric(df_out_last_row$x), label = "x value") + expect_equal(df_in$y, as.numeric(df_out_last_row$y), label = "y value") +}) + + +test_that("fully replace a dataset", { + datasetToReplaceUrl <- "https://soda.demo.socrata.com/resource/kc76-ybeq.json" + + # populate df_in with two columns of random numbers + x <- sample(-1000:1000, 5) + y <- sample(-1000:1000, 5) + df_in <- data.frame(x,y) + + # write to dataset + write.socrata(df_in,datasetToReplaceUrl,"REPLACE",socrataEmail,socrataPassword) + + # read from dataset for comparisons / tests + df_out <- read.socrata(url = datasetToReplaceUrl, email = socrataEmail, password = socrataPassword) + + expect_equal(ncol(df_in), ncol(df_out), label="columns") + expect_equal(nrow(df_in), nrow(df_out), label="rows") + expect_equal(df_in$x, as.numeric(df_out$x), label = "x values") + expect_equal(df_in$y, as.numeric(df_out$y), label = "y values") +})