Skip to content
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export(ls.socrata)
export(read.socrata)
export(read.socrataGEO)
export(validateUrl)
export(write.socrata)
importFrom(geojsonio,geojson_read)
importFrom(httr,GET)
importFrom(httr,add_headers)
Expand Down
15 changes: 11 additions & 4 deletions R/errorHandling.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,18 @@
# but one that is not compatible with RSocrata.
# See \url{https://github.com/Chicago/RSocrata/issues/16}
#
# @param url - SOPA url
# @param url - SODA url
# @param optional email - The email to the Socrata account with read access to the dataset
# @param optional password - The password associated with the email to the Socrata account
#' @importFrom httr stop_for_status GET add_headers
errorHandling <- function(url = "", app_token = NULL) {
rsp <- httr::GET(url, httr::add_headers("X-App-Token" = app_token))

errorHandling <- function(url = "", app_token = NULL, email = NULL, password = NULL) {

if(is.null(email) && is.null(password)){
rsp <- httr::GET(url, httr::add_headers("X-App-Token" = app_token))
} else { # email and password are not NULL
rsp <- httr::GET(url, httr::add_headers("X-App-Token" = app_token), httr::authenticate(email, password))
}

if (rsp$status_code == 200) {
invisible("OK. Your request was successful.")

Expand Down
19 changes: 14 additions & 5 deletions R/metadata.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#' \code{http://DOMAIN/api/views/FOUR-FOUR/columns.json}, which is used here.
#'
#' @param url - A Socrata resource URL, or a Socrata "human-friendly" URL!
#' @param optional email - The email to the Socrata account with read access to the dataset
#' @param optional password - The password associated with the email to the Socrata account
#'
#' @source \url{http://stackoverflow.com/a/29782941}
#'
Expand All @@ -24,13 +26,13 @@
#' @author John Malc \email{cincenko@@outlook.com}
#'
#' @export
getMetadata <- function(url = "") {
getMetadata <- function(url = "", email = NULL, password = NULL) {

urlParsedBase <- httr::parse_url(url)
mimeType <- mime::guess_type(urlParsedBase$path)

# use function below to get them using =COUNT(*) SODA query
gQRC <- getQueryRowCount(urlParsedBase, mimeType)
gQRC <- getQueryRowCount(urlParsedBase, mimeType, email, password)

# create URL for metadata data frame
fourByFour <- substr(basename(urlParsedBase$path), 1, 9)
Expand All @@ -39,8 +41,15 @@ getMetadata <- function(url = "") {

# execute it
URL <- httr::build_url(urlParsed)
df <- jsonlite::fromJSON(URL)

if(is.null(email) && is.null(password)){
df <- jsonlite::fromJSON(URL)
} else { # email and password are not NULL
response <- httr::GET(URL, httr::authenticate(email, password))
response_content <- httr::content(response, as="text")
df <- jsonlite::fromJSON(response_content)
}

# number of rows can be sometimes "cached". If yes, then below we calculate the maximum number of
# rows from all non-null and null fields.
# If not, then it uses "getQueryRowCount" fnct with SODA =COUNT(*) SODA query.
Expand All @@ -62,7 +71,7 @@ getMetadata <- function(url = "") {
# @author Gene Leynes \email{gleynes@@gmail.com}
#
#' @importFrom httr GET build_url content
getQueryRowCount <- function(urlParsed, mimeType) {
getQueryRowCount <- function(urlParsed, mimeType, email = NULL, password = NULL) {
## Construct the count query based on the URL,
if (is.null(urlParsed[['query']])) {
## If there is no query at all, create a simple count
Expand All @@ -81,7 +90,7 @@ getQueryRowCount <- function(urlParsed, mimeType) {
urlParsed[[c('path')]], cntQueryText)

## Execute the query to count the rows
totalRowsResult <- errorHandling(cntUrl, app_token = NULL)
totalRowsResult <- errorHandling(cntUrl, app_token = NULL, email, password)

## Parsing the result depends on the mime type
if (mimeType == "application/json") {
Expand Down
10 changes: 6 additions & 4 deletions R/returnData.R
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ getContentAsDataFrame <- function(response) {
#' @param limit - defaults to the max of 50000. See \url{http://dev.socrata.com/docs/paging.html}.
#' @param domain - A Socrata domain, e.g \url{http://data.cityofchicago.org}
#' @param fourByFour - a unique 4x4 identifier, e.g. "ydr8-5enu". See more \code{\link{isFourByFour}}
#' @param optional email - The email to the Socrata account with read access to the dataset
#' @param optional password - The password associated with the email to the Socrata account
#'
#' @author Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@@cityofchicago.org}
#'
Expand All @@ -96,7 +98,7 @@ getContentAsDataFrame <- function(response) {
#'
#' @export
read.socrata <- function(url = NULL, app_token = NULL, limit = 50000, domain = NULL, fourByFour = NULL,
query = NULL) {
query = NULL, email = NULL, password = NULL) {

if (is.null(url) == TRUE) {
buildUrl <- paste0(domain, "/resource/", fourByFour, ".json")
Expand All @@ -107,16 +109,16 @@ read.socrata <- function(url = NULL, app_token = NULL, limit = 50000, domain = N
validUrl <- validateUrl(url)
parsedUrl <- httr::parse_url(validUrl)

response <- errorHandling(validUrl, app_token)
response <- errorHandling(validUrl, app_token, email, password)
results <- getContentAsDataFrame(response)
dataTypes <- getSodaTypes(response)

rowCount <- as.numeric(getMetadata(cleanQuest(validUrl))[1])
rowCount <- as.numeric(getMetadata(cleanQuest(validUrl), email, password)[1])

## More to come? Loop over pages implicitly
while (nrow(results) < rowCount) {
query_url <- paste0(validUrl, ifelse(is.null(parsedUrl$query), "?", "&"), "$offset=", nrow(results), "&$limit=", limit)
response <- errorHandling(query_url, app_token)
response <- errorHandling(query_url, app_token, email, password)
page <- getContentAsDataFrame(response)
results <- plyr::rbind.fill(results, page) # accumulate data
}
Expand Down
74 changes: 74 additions & 0 deletions R/writeData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#' Methods required for uploading data to Socrata

#' Wrap httr GET in some diagnostics
#'
#' In case of failure, report error details from Socrata.
#'
#' @param url - Socrata Open Data Application Program Interface (SODA) endpoint (JSON only for now)
#' @param json_data_to_upload - JSON encoded data to update your SODA endpoint with
#' @param http_verb - PUT or POST depending on update mode
#' @param email - email associated with Socrata account (will need write access to dataset)
#' @param password - password associated with Socrata account (will need write access to dataset)
#' @param app_token - optional app_token associated with Socrata account
#' @return httr a response object
#' @importFrom httr GET
#'
#' @noRd
checkUpdateResponse <- function(json_data_to_upload, url, http_verb, email, password, app_token = NULL) {
if(http_verb == "POST"){
response <- httr::POST(url,
body = json_data_to_upload,
httr::authenticate(email, password),
httr::add_headers("X-App-Token" = app_token,
"Content-Type" = "application/json")) #, verbose())
} else if(http_verb == "PUT"){
response <- httr::PUT(url,
body = json_data_to_upload,
httr::authenticate(email, password),
httr::add_headers("X-App-Token" = app_token,
"Content-Type" = "application/json")) # , verbose())
}

# TODO: error handling
# errorHandling(response)

return(response)
}


#' @description Method for updating Socrata datasets
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

title is missing

#'
#' @param dataframe - dataframe to upload to Socrata
#' @param dataset_json_endpoint - Socrata Open Data Application Program Interface (SODA) endpoint (JSON only for now)
#' @param update_mode - "UPSERT" or "REPLACE"; consult http://dev.socrata.com/publishers/getting-started.html
#' @param email - The email to the Socrata account with read access to the dataset
#' @param password - The password associated with the email to the Socrata account
#' @param app_token - a (non-required) string; SODA API token can be used to query the data
#' portal \url{http://dev.socrata.com/consumers/getting-started.html}
#'
#' @author Mark Silverberg \email{mark.silverberg@@socrata.com}
#'
#' @importFrom httr parse_url build_url
#' @importFrom plyr rbind.fill
#'
#' @export
write.socrata <- function(dataframe, dataset_json_endpoint, update_mode, email, password, app_token = NULL) {

# translate update_mode to http_verbs
if(update_mode == "UPSERT"){
http_verb <- "POST"
} else if(update_mode == "REPLACE") {
http_verb <- "PUT"
} else {
stop("update_mode must be UPSERT or REPLACE")
}

# convert dataframe to JSON
dataframe_as_json_string <- jsonlite::toJSON(dataframe)

# do the actual upload
response <- checkUpdateResponse(dataframe_as_json_string, dataset_json_endpoint, http_verb, email, password, app_token)

return(response)

}
6 changes: 5 additions & 1 deletion man/getMetadata.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,14 @@
\url{http://stackoverflow.com/a/29782941}
}
\usage{
getMetadata(url = "")
getMetadata(url = "", email = NULL, password = NULL)
}
\arguments{
\item{url}{- A Socrata resource URL, or a Socrata "human-friendly" URL!}

\item{optional}{email - The email to the Socrata account with read access to the dataset}

\item{optional}{password - The password associated with the email to the Socrata account}
}
\value{
a list (!) containing a number of rows & columns and a data frame of metadata
Expand Down
8 changes: 7 additions & 1 deletion man/read.socrata.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
\title{Get a full Socrata data set as an R data frame}
\usage{
read.socrata(url = NULL, app_token = NULL, limit = 50000, domain = NULL,
fourByFour = NULL, query = NULL)
fourByFour = NULL, query = NULL, email = NULL, password = NULL)
}
\arguments{
\item{url}{- A Socrata resource URL, or a Socrata "human-friendly" URL,
Expand All @@ -26,15 +26,21 @@ portal \url{http://dev.socrata.com/consumers/getting-started.html}}

\item{query}{- Based on query language called the "Socrata Query Language" ("SoQL"), see
\url{http://dev.socrata.com/docs/queries.html}.}

\item{optional}{email - The email to the Socrata account with read access to the dataset}

\item{optional}{password - The password associated with the email to the Socrata account}
}
\description{
Manages throttling and POSIX date-time conversions. We support only .json suffix.
}
\examples{
\dontrun{
df_1 <- read.socrata(url = "http://soda.demo.socrata.com/resource/4334-bgaj.csv")
df_2 <- read.socrata(domain = "http://data.cityofchicago.org/", fourByFour = "ydr8-5enu")
df_3 <- read.socrata(url = "http://data.cityofchicago.org/resource/ydr8-5enu.json")
}
}
\author{
Hugh J. Devlin, Ph. D. \email{Hugh.Devlin@cityofchicago.org}
}
Expand Down
29 changes: 29 additions & 0 deletions man/write.socrata.Rd
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/writeData.R
\name{write.socrata}
\alias{write.socrata}
\usage{
write.socrata(dataframe, dataset_json_endpoint, update_mode, email, password,
app_token = NULL)
}
\arguments{
\item{dataframe}{- dataframe to upload to Socrata}

\item{dataset_json_endpoint}{- Socrata Open Data Application Program Interface (SODA) endpoint (JSON only for now)}

\item{update_mode}{- "UPSERT" or "REPLACE"; consult http://dev.socrata.com/publishers/getting-started.html}

\item{email}{- The email to the Socrata account with read access to the dataset}

\item{password}{- The password associated with the email to the Socrata account}

\item{app_token}{- a (non-required) string; SODA API token can be used to query the data
portal \url{http://dev.socrata.com/consumers/getting-started.html}}
}
\description{
Method for updating Socrata datasets
}
\author{
Mark Silverberg \email{mark.silverberg@socrata.com}
}

26 changes: 26 additions & 0 deletions tests/testthat/test-readPrivateDataset.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
context("Test reading private Socrata dataset with email and password")

privateResourceToReadCsvUrl <- "https://soda.demo.socrata.com/resource/a9g2-feh2.csv"
privateResourceToReadJsonUrl <- "https://soda.demo.socrata.com/resource/a9g2-feh2.json"
socrataEmail <- Sys.getenv("SOCRATA_EMAIL", "")
socrataPassword <- Sys.getenv("SOCRATA_PASSWORD", "")

test_that("read Socrata CSV that requires a login", {
# should error when no email and password are sent with the request
expect_error(read.socrata(url = privateResourceToReadCsvUrl))
# try again, this time with email and password in the request
df <- read.socrata(url = privateResourceToReadCsvUrl, email = socrataEmail, password = socrataPassword)
# tests
expect_equal(2, ncol(df), label="columns")
expect_equal(3, nrow(df), label="rows")
})

test_that("read Socrata JSON that requires a login", {
# should error when no email and password are sent with the request
expect_error(read.socrata(url = privateResourceToReadJsonUrl))
# try again, this time with email and password in the request
df <- read.socrata(url = privateResourceToReadJsonUrl, email = socrataEmail, password = socrataPassword)
# tests
expect_equal(2, ncol(df), label="columns")
expect_equal(3, nrow(df), label="rows")
})
44 changes: 44 additions & 0 deletions tests/testthat/test-writeData.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
context("write Socrata datasets")

socrataEmail <- Sys.getenv("SOCRATA_EMAIL", "")
socrataPassword <- Sys.getenv("SOCRATA_PASSWORD", "")

test_that("add a row to a dataset", {
datasetToAddToUrl <- "https://soda.demo.socrata.com/resource/xh6g-yugi.json"

# populate df_in with two columns, each with a random number
x <- sample(-1000:1000, 1)
y <- sample(-1000:1000, 1)
df_in <- data.frame(x,y)

# write to dataset
write.socrata(df_in,datasetToAddToUrl,"UPSERT",socrataEmail,socrataPassword)

# read from dataset and store last (most recent) row for comparisons / tests
df_out <- read.socrata(url = datasetToAddToUrl, email = socrataEmail, password = socrataPassword)
df_out_last_row <- tail(df_out, n=1)

expect_equal(df_in$x, as.numeric(df_out_last_row$x), label = "x value")
expect_equal(df_in$y, as.numeric(df_out_last_row$y), label = "y value")
})


test_that("fully replace a dataset", {
datasetToReplaceUrl <- "https://soda.demo.socrata.com/resource/kc76-ybeq.json"

# populate df_in with two columns of random numbers
x <- sample(-1000:1000, 5)
y <- sample(-1000:1000, 5)
df_in <- data.frame(x,y)

# write to dataset
write.socrata(df_in,datasetToReplaceUrl,"REPLACE",socrataEmail,socrataPassword)

# read from dataset for comparisons / tests
df_out <- read.socrata(url = datasetToReplaceUrl, email = socrataEmail, password = socrataPassword)

expect_equal(ncol(df_in), ncol(df_out), label="columns")
expect_equal(nrow(df_in), nrow(df_out), label="rows")
expect_equal(df_in$x, as.numeric(df_out$x), label = "x values")
expect_equal(df_in$y, as.numeric(df_out$y), label = "y values")
})