diff --git a/.travis.yml b/.travis.yml index 9fe215a..275106d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,5 @@ language: python python: - - "3.4" - "3.5" # command to install dependencies install: "pip install -r requirements.txt" @@ -9,10 +8,8 @@ env: - DATABASE_URL="postgres://yaddqlhbmweddl:SxBfLvKcO9Vj2b3tcFLYvLcv9m@ec2-54-243-47-46.compute-1.amazonaws.com:5432/d520svb6jevb35" COOKIE_SECRET="password" # command to run tests script: cd json_api && pytest - +# TODO: unless we can get sauce working properly, then remove addons: sauce_connect: username: "brainspell" access_key: "56abf217-04be-441a-b624-1f889a4e237f" - - diff --git a/Dockerfile b/Dockerfile index a8e68d6..1add4aa 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,6 @@ FROM python:3 ADD . /brainspell-neo +ADD . /database_dumps/brainspell.pgsql WORKDIR /brainspell-neo EXPOSE 5000 ENV PATH /opt/conda/envs/brainspell/bin:$PATH diff --git a/docker-compose.yml b/docker-compose.yml index 3201cbf..52de6a1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,8 +1,14 @@ version: '2' + services: + db: + image: postgres web: build: . - ports: - - "5000:5000" + command: python3 json_api/brainspell.py volumes: - - .:/brainspell-neo \ No newline at end of file + - .:/brainspell-neo + ports: + - "5000:5000" + depends_on: + - db \ No newline at end of file diff --git a/ideas.txt b/ideas.txt index adc5755..09bd3cc 100644 --- a/ideas.txt +++ b/ideas.txt @@ -1,27 +1,14 @@ Consider: -- add-article-manual endpoint, which allows users to add articles that aren’t on PubMed. (potentially make PMID optional) -- Make "add article" UI. -- Maybe an endpoint to get the titles (or all of the information) for a set of PMIDs. - A validation for articles before they're sent to the bulk-add endpoint. - A cron job to automatically update DOIs. - -Github Functionality -- Check in search and random whether an article is already in a collection -- Store Collection names and articles associated in database - - Use that to generate Widgets -- Integrate User pages -- Store collections in userId in the database -- Store the PMID’s in each collection as well. -- Integrate the two login systems -- Add notes in each Article in the collection +GitHub: +- Potentially add notes in each article in the collection - Paginate the collections page -- The bar should be a database request +- The bar should be a database request (what does this mean?) Potentially reimplement: - Tables: reimplement using jQuery Datatables - - Brain Browser: a tool for visualization of translucent images that can take advantage of multi-core systems - -- Continuous Activation Graphic: +- Continuous Activation Graphic - Brainsprite: a tool that uses PNG files to show activation with depth \ No newline at end of file diff --git a/json_api/.DS_Store b/json_api/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/json_api/.DS_Store differ diff --git a/json_api/article_helpers.py b/json_api/article_helpers.py index e569cbc..140318a 100644 --- a/json_api/article_helpers.py +++ b/json_api/article_helpers.py @@ -6,15 +6,28 @@ import Bio from Bio import Entrez, Medline from Bio.Entrez import efetch, esearch, parse, read +from torngithub import json_encode from models import * -from search_helpers import get_article_object Entrez.email = "neel@berkeley.edu" # BEGIN: article helper functions +def get_article_object(query): + """ Get a single article PeeWee object. """ + + search = Articles.select().where(Articles.pmid == query) + return search.execute() + + +def get_all_articles(): + """ Get all article objects in the database. """ + + return Articles.select().execute() + + def update_authors(pmid, authors): """ Update the authors for an article. """ @@ -79,10 +92,9 @@ def toggle_vote(pmid, topic, username, direction): direction, "name") - query = Articles.update( + Articles.update( metadata=metadata).where( - Articles.pmid == pmid) - query.execute() + Articles.pmid == pmid).execute() def vote_stereotaxic_space(pmid, space, username): @@ -107,10 +119,9 @@ def vote_stereotaxic_space(pmid, space, username): "type": space }) - query = Articles.update( + Articles.update( metadata=target).where( - Articles.pmid == pmid) - query.execute() + Articles.pmid == pmid).execute() def vote_number_of_subjects(pmid, subjects, username): @@ -135,26 +146,24 @@ def vote_number_of_subjects(pmid, subjects, username): "value": subjects }) - query = Articles.update( + Articles.update( metadata=target).where( - Articles.pmid == pmid) - query.execute() + Articles.pmid == pmid).execute() -def add_user_tag(user_tag, id): +def add_user_tag(user_tag, pmid): """ Add a custom user tag to the database. """ main_target = next( Articles.select( Articles.metadata).where( - Articles.pmid == id).execute()) + Articles.pmid == pmid).execute()) target = eval(main_target.metadata) if target.get("user"): target["user"].append(user_tag) else: target["user"] = [user_tag] - query = Articles.update(metadata=target).where(Articles.pmid == id) - query.execute() + Articles.update(metadata=target).where(Articles.pmid == pmid).execute() def get_number_of_articles(): @@ -170,45 +179,48 @@ def add_pmid_article_to_database(article_id): Given a PMID, use external APIs to get the necessary article data in order to add the article to our database. """ - - pmid = str(article_id) - handle = efetch("pubmed", id=[pmid], rettype="medline", retmode="text") - records = list(Medline.parse(handle)) - records = records[0] - article_info = {} - article_info["title"] = records.get("TI") - article_info["PMID"] = pmid - article_info["authors"] = ', '.join(records.get("AU")) - article_info["abstract"] = records.get("AB") - article_info["DOI"] = getDOI(records.get("AID")) - article_info["experiments"] = "" - article["metadata"] = str({"meshHeadings": []}) - article["reference"] = None - identity = "" - try: - article_info["experiments"] = { - "locations": eval( - urllib.request.urlopen( - "http://neurosynth.org/api/studies/peaks/" + - str(pmid) + - "/").read().decode())["data"]} - k = article_info["experiments"]["locations"] - for i in range(len(k)): - if len(k[i]) == 4: - identity = k[0] - k[i] = k[i][1:] - k[i] = ",".join([str(x) for x in (k[i])]) - except BaseException: - pass - article_info["id"] = identity - article_info["experiments"] = [article_info["experiments"]] - Articles.create(abstract=article_info["abstract"], - authors=article_info["authors"], - doi=article_info["DOI"], - experiments=article_info["experiments"], - pmid=article_info["PMID"], - title=article_info["title"]) - return article_info + if len(list(get_article_object(article_id))) == 0: + pmid = str(article_id) + handle = efetch("pubmed", id=[pmid], rettype="medline", retmode="text") + records = list(Medline.parse(handle)) + records = records[0] + if "TI" not in records: + return False # catch bad PMIDs + article_info = {} + article_info["title"] = records["TI"] + article_info["PMID"] = pmid + article_info["authors"] = ', '.join(records["AU"]) + article_info["abstract"] = records["AB"] + article_info["DOI"] = getDOI(records["AID"]) + article_info["experiments"] = "" + article_info["metadata"] = str({"meshHeadings": []}) + article_info["reference"] = None + identity = "" + try: + article_info["experiments"] = { + "locations": eval( + urllib.request.urlopen( + "http://neurosynth.org/api/studies/peaks/" + + str(pmid) + + "/").read().decode())["data"]} + k = article_info["experiments"]["locations"] + for i in range(len(k)): + if len(k[i]) == 4: + identity = k[0] + k[i] = k[i][1:] + k[i] = ",".join([str(x) for x in (k[i])]) + except BaseException: + pass + article_info["id"] = identity + article_info["experiments"] = [article_info["experiments"]] + Articles.insert(abstract=article_info["abstract"], + authors=article_info["authors"], + doi=article_info["DOI"], + experiments=article_info["experiments"], + pmid=article_info["PMID"], + title=article_info["title"]).execute() + return True + return False def getDOI(lst): @@ -362,7 +374,7 @@ def add_coordinate_row(pmid, exp, coords, row_number=-1): else: elem["locations"].insert(row_number, row_list) Articles.update( - experiments=experiments).where( + experiments=json_encode(experiments)).where( Articles.pmid == pmid).execute() @@ -398,9 +410,7 @@ def add_table_through_text_box(pmid, values): def update_table_vote(tag_name, direction, table_num, pmid, column, username): """ Update the vote on an experiment tag for a given user. """ - article_obj = Articles.select( - Articles.experiments).where( - Articles.pmid == pmid).execute() + article_obj = get_article_object(pmid) article_obj = next(article_obj) article_obj = eval(article_obj.experiments) @@ -418,7 +428,6 @@ def update_table_vote(tag_name, direction, table_num, pmid, column, username): article_obj[table_num] = table_obj - query = Articles.update( + Articles.update( experiments=article_obj).where( - Articles.pmid == pmid) - query.execute() + Articles.pmid == pmid).execute() diff --git a/json_api/base_handler.py b/json_api/base_handler.py index f80c7cc..9314bf4 100644 --- a/json_api/base_handler.py +++ b/json_api/base_handler.py @@ -93,7 +93,8 @@ def process(self, response, args) the "asynchronous" boolean, decorating your "process" function with @tornado.gen.coroutine, and calling self.finish_async when your function finishes execution (MANDATORY). Then, any blocking code - should be decorated with @run_on_executor. + should be decorated with @run_on_executor. (Make sure that you import + run_on_executor with `from tornado.concurrent import run_on_executor`.) asynchronous :: True | False diff --git a/json_api/github_collections.py b/json_api/github_collections.py index ad1c743..80da6ea 100644 --- a/json_api/github_collections.py +++ b/json_api/github_collections.py @@ -6,6 +6,7 @@ import hashlib import os +import re from base64 import b64encode import tornado @@ -16,8 +17,8 @@ from tornado.httputil import url_concat from torngithub import json_decode, json_encode +from article_helpers import get_article_object from base_handler import * -from search_helpers import * from user_account_helpers import * # BEGIN: read environment variables @@ -132,7 +133,7 @@ def get_user_repos(http_client, access_token): access_token=access_token) for i in range(2, max_pages + 1)] for repo in repos_list: - data.extend(res.body) + data.extend(repo.body) raise tornado.gen.Return(data) diff --git a/json_api/json_api.py b/json_api/json_api.py index e6628b5..218631c 100644 --- a/json_api/json_api.py +++ b/json_api/json_api.py @@ -1,5 +1,9 @@ # JSON API classes +import statistics + +from tornado.concurrent import run_on_executor + import brainspell from article_helpers import * from base_handler import * @@ -22,6 +26,76 @@ def process(self, response, args): return response +# BEGIN: statistics API endpoints + + +class CollectionSignificanceEndpointHandler(BaseHandler): + """ + Take one or two collections, and calculate the significance of the peaks in the first collection at each (x, y, z) coordinate + with respect to the second collection, or with respect to the rest of the database. + + Return a dictionary of coordinates in the collection, mapped to their p-values, with the insignificant results filtered out. + """ + + parameters = { + "collection_name": { + "type": str + }, + "other_collection": { + "type": str, + "default": "null", + "description": "Another collection to run this significance test against. If not specified, then the test will be run against the entire database." + }, + "width": { + "type": int, + "default": 5, + "description": "The width for each coordinate that we'll check for significance." + }, + "threshold": { + "type": float, + "default": .001, + "description": "The threshold that we'll use to account for multiple comparisons using Benjamini–Hochberg" + } + } + + endpoint_type = Endpoint.PUSH_API + + collection_does_not_exist = "According to Brainspell's database, that user doesn't own a collection with the name {0}. Try syncing with GitHub if this isn't accurate. (/json/collections, set force_github_refresh to 1)" + + asynchronous = True + + @run_on_executor + def get_significance(self, pmids, other_pmids, width, threshold): + return statistics.significance_from_collections( + pmids, other_pmids, width, threshold) + + @tornado.gen.coroutine + def process(self, response, args): + user_collections = get_brainspell_collections_from_api_key(args["key"]) + # ensure that collection exists + if args["collection_name"] in user_collections: + pmids = user_collections[args["collection_name"]]["pmids"] + other_pmids = None + if args["other_collection"] != "null": + if args["other_collection"] in user_collections: + other_pmids = user_collections[args["other_collection"]]["pmids"] + else: + response["success"] = 0 + response["description"] = self.collection_does_not_exist.format( + args["other_collection"]) + return response + # at this point, we can assume that we have either one set of PMIDs + # and None, or two sets of PMIDs + response["significance_grid"] = yield self.get_significance(pmids, other_pmids, width=args["width"], threshold=args["threshold"]) + else: + # collection doesn't exist + response["success"] = 0 + response["description"] = self.collection_does_not_exist.format( + args["collection_name"]) + + self.finish_async(response) + + # BEGIN: search API endpoints @@ -132,10 +206,14 @@ def process(self, response, args): return response -class AddArticleFromSearchPageEndpointHandler(BaseHandler): - """ Add an article to our database via PMID (for use on the search page) """ +class AddArticleEndpointHandler(BaseHandler): + """ + Fetch PubMed and Neurosynth data using a user-specified PMID, and add + the article to our database. Do not add repeats. + """ + parameters = { - "new_pmid": { + "pmid": { "type": str } } @@ -143,7 +221,10 @@ class AddArticleFromSearchPageEndpointHandler(BaseHandler): endpoint_type = Endpoint.PUSH_API def process(self, response, args): - add_pmid_article_to_database(args["new_pmid"]) + success = add_pmid_article_to_database(args["pmid"]) + response["success"] = success + if success == 0: + response["description"] = "Either that PMID is not valid, or the article already exists in our database." return response @@ -212,31 +293,6 @@ def process(self, response, args): return response -class AddArticleEndpointHandler(BaseHandler): - """ - Fetch PubMed and Neurosynth data using a user-specified PMID, and add - the article to our database. - """ - - parameters = { - "pmid": { - "type": str - } - } - - endpoint_type = Endpoint.PUSH_API - - def process(self, response, args): - article_obj = getArticleData(args["pmid"]) - request = Articles.insert(abstract=article_obj["abstract"], - doi=article_obj["DOI"], - authors=article_obj["authors"], - experiments=article_obj["coordinates"], - title=article_obj["title"]) - request.execute() - return response - - class SetArticleAuthorsEndpointHandler(BaseHandler): """ Edit the authors of an article. """ diff --git a/json_api/search_helpers.py b/json_api/search_helpers.py index 84b2dca..bfe0727 100644 --- a/json_api/search_helpers.py +++ b/json_api/search_helpers.py @@ -101,13 +101,6 @@ def formatted_search(query, start, param=None, experiments=False): *fields).where(match).limit(numberResults).offset(start).execute() -def get_article_object(query): - """ Get a single article PeeWee object. """ - - search = Articles.select().where(Articles.pmid == query) - return search.execute() - - def generate_circle(coordinate): # Coordinate of form "-26,54,14" """ Specify a range around a given coordinate to search the database. """ diff --git a/json_api/static/html/account.html b/json_api/static/html/account.html index 917ac16..856205c 100644 --- a/json_api/static/html/account.html +++ b/json_api/static/html/account.html @@ -3,9 +3,13 @@ {% block header %}
Your open, curated classification of neuroimaging literature.
+Loading... This might take a while.
| + + | +
| + + + + + | +
| + + | +
| + + | +