Compare commits

..

13 Commits
dev ... main

Author SHA1 Message Date
Ladd Hoffman 24c183912a remove old stuff 2024-04-04 12:10:00 -05:00
Ladd Hoffman ad382b5caf Merge branch 'semantic-scholar-import' 2024-04-04 12:02:42 -05:00
Ladd Hoffman c80f2ee79b sematic scholar api key support 2024-04-04 11:56:58 -05:00
Ladd Hoffman 846eb73cea Add link to new repo 2023-07-28 18:23:23 +00:00
Ladd Hoffman 1f3d8a7d1e Merge branch 'dev' into 'main'
Moved forum prototype code to https://gitlab.com/dao-governance-framework/forum-logic

See merge request dao-governance-framework/dao-governance-framework!11
2023-07-10 18:36:26 +00:00
Ladd Hoffman 4d53f5c70e Merge branch 'dev' into 'main'
Improved graph editing

See merge request dao-governance-framework/science-publishing-dao!10
2023-07-10 15:08:35 +00:00
Ladd Hoffman ae5ab09e16 Merge branch 'dev' into 'main'
Basic graph editing

See merge request dao-governance-framework/science-publishing-dao!9
2023-07-04 00:33:47 +00:00
Ladd Hoffman 82e026f327 Merge branch 'dev' into 'main'
Add some basic introductory text to the home page

See merge request dao-governance-framework/science-publishing-dao!8
2023-06-30 22:03:04 +00:00
Ladd Hoffman 8bb188ff13 Merge branch 'dev' into 'main'
Dev

See merge request dao-governance-framework/science-publishing-dao!7
2023-06-28 21:14:40 +00:00
Ladd Hoffman ce4f78aa97 Merge branch 'dev' into 'main'
Move params to validation pool

See merge request dao-governance-framework/science-publishing-dao!6
2023-04-23 01:25:39 +00:00
Ladd Hoffman 68d04117c9 Successfully consuming data 2022-11-07 17:31:37 -06:00
Ladd Hoffman ff7d6134f1 semantic-scholar-client: Extend Readme 2022-07-15 10:57:27 -05:00
Ladd Hoffman 43462e84ea semantic-scholar-client: Initial commit 2022-07-15 10:48:33 -05:00
8 changed files with 2197 additions and 1 deletions

View File

@ -4,4 +4,4 @@
| Name | Description | | Name | Description |
| --- | --- | | --- | --- |
| [forum-logic](https://gitlab.com/dao-governance-framework/forum-logic) | Javascript prototyping forum architecture | | [semantic-scholar-client](./semantic-scholar-client) | Rust utility for reading data from the [Semantic Scholar API](https://api.semanticscholar.org/api-docs) |

View File

@ -0,0 +1 @@
SEMANTIC_SCHOLAR_API_KEY=

2
semantic-scholar-client/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target
.env

1939
semantic-scholar-client/Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,17 @@
[package]
name = "semantic-scholar-client"
version = "0.1.0"
edition = "2021"
default-run = "import"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
async-recursion = "1.0.0"
clap = { version = "3.2.11", features = ["derive"] }
dotenv = "0.15.0"
mongodb = "2.2.2"
reqwest = { version = "0.11.11", features = ["json"] }
serde = { version = "1.0.139", features = ["derive"] }
serde_json = "1.0.82"
tokio = { version = "1.20.0", features = ["full"] }

View File

@ -0,0 +1,25 @@
#`semantic-scholar-client`
This utility is able to fetch data from Semantic Scholar API.
Initial proof of concept here writes the result to stdout.
Work in progress to pipe this data into an operating database.
### Usage
* (Optional) Copy `.env.example` to `.env` and set the value of `SEMANTIC_SCHOLAR_API_KEY`
* Run the program
cargo run -- --paper-id <paper_id> --depth <depth>
* `paper_id` values are in accordance with [Semantic Scholar API](https://api.semanticscholar.org/api-docs/).
* `depth` is the number of citations to traverse, from the starting paper.
### Notes
Ideas for followup work:
- Consider strategies for deciding where to terminate a given traversal
- Provide an HTTP/WebSocket interface that can be used to talk to this process during its operation.
This can enable us to pipe the data to other tasks, to monitor, to start/stop, and even to make configuration changes.
- Rate limit requests

View File

@ -0,0 +1,153 @@
// During development, allowing dead code
#![allow(dead_code)]
use async_recursion::async_recursion;
use clap::Parser;
use dotenv::dotenv;
use serde::Deserialize;
use std::cmp::min;
use std::error::Error;
use std::fmt::Write;
type DataResult<T> = Result<T, Box<dyn Error>>;
const BASE_URL: &str = "https://api.semanticscholar.org/graph/v1";
const MAX_DEPTH: u32 = 3;
#[derive(Parser, Debug)]
#[clap(author, version, about, long_about = None)]
struct Args {
/// How deep to traverse citation graph from the starting paper
#[clap(short, long, value_parser)]
depth: u32,
/// Starting paper. We will traverse papers that cite this one
#[clap(short, long, value_parser)]
paper_id: String,
// Write the results to MongoDB
// #[clap(short, long, value_parser)]
// write_to_mongo: bool,
}
struct Author {
name: String,
}
type Authors = Vec<Author>;
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
struct Paper {
paper_id: String,
title: Option<String>,
citations: Vec<Citation>,
}
/**
* Occurs within Citation struct
*/
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
struct CitingPaper {
paper_id: Option<String>,
title: Option<String>,
}
#[derive(Deserialize, Debug)]
#[serde(rename_all = "camelCase")]
struct Citation {
citing_paper: CitingPaper,
}
/**
code: Option<String>,
* Generic struct to wrap the common API response pattern {data: [...]}
*/
#[derive(Deserialize, Debug)]
struct ApiListResponse<T> {
data: Option<Vec<T>>,
message: Option<String>,
}
// TODO: Cache results in a (separate but local) database such as Redis
// TODO: Store results in a (separate but local) database such as Postgres
#[async_recursion]
async fn get_citations(
client: &reqwest::Client,
paper_id: String,
depth: u32,
authors: &mut Vec<Author>,
) -> DataResult<Vec<Citation>> {
// Bound recursion to some depth
if depth > MAX_DEPTH {
return Ok(vec![]);
}
// Build the URL
let mut url = String::new();
write!(&mut url, "{}/paper/{}/citations", BASE_URL, paper_id)?;
let mut req = client.get(url);
let api_key = std::env::var("SEMANTIC_SCHOLAR_API_KEY");
if api_key.is_ok() {
req = req.header("x-api-key", api_key.unwrap());
}
let resp = req.send().await?.text().await?;
let resp_deserialized_attempt =
serde_json::from_str::<ApiListResponse<Citation>>(resp.as_str());
if let Err(err) = resp_deserialized_attempt {
println!("depth {} paper {} error {}", depth, paper_id, err);
return Ok(vec![]);
}
let resp_deserialized: ApiListResponse<Citation> = resp_deserialized_attempt.unwrap();
if resp_deserialized.message.is_some() {
println!(
"depth {} paper {} error {}",
depth,
paper_id,
resp_deserialized.message.unwrap()
);
return Ok(vec![]);
}
for Citation {
citing_paper:
CitingPaper {
paper_id: citing_paper_id,
title,
},
} in resp_deserialized.data.unwrap()
{
if let (Some(citing_paper_id), Some(title)) = (citing_paper_id, title) {
let short_len = min(50, title.len());
let (short_title, _) = title.split_at(short_len);
println!(
"depth {} paper {} cites {} title {}",
depth, citing_paper_id, paper_id, short_title
);
get_citations(&client, citing_paper_id, depth + 1, authors).await?;
}
}
Ok(vec![])
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn Error>> {
let Args { depth, paper_id } = Args::parse();
dotenv().ok();
let mut authors = Authors::new();
let client: reqwest::Client = reqwest::Client::new();
get_citations(&client, paper_id, depth, &mut authors).await?;
Ok(())
}

View File

@ -0,0 +1,59 @@
use mongodb::{Client, options::ClientOptions};
const MONGO_DB_ADDRESS: &str = "mongodb://docker:mongopw@localhost:55000";
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
// Parse a connection string into an options struct.
let client_options = ClientOptions::parse(MONGO_DB_ADDRESS).await?;
// Get a handle to the deployment.
let client = Client::with_options(client_options)?;
// Try creating a collection
{
use serde::{Deserialize, Serialize};
#[derive(Debug, Serialize, Deserialize)]
struct Book {
title: String,
author: String,
}
// Reference a (new) database
let db = client.database("db2");
// Get a handle to a collection of `Book`.
let typed_collection = db.collection::<Book>("books");
let books = vec![
Book {
title: "The Grapes of Wrath".to_string(),
author: "John Steinbeck".to_string(),
},
Book {
title: "To Kill a Mockingbird".to_string(),
author: "Harper Lee".to_string(),
},
];
// Insert the books into "mydb.books" collection, no manual conversion to BSON necessary.
typed_collection.insert_many(books, None).await?;
}
// List the names of the databases in that deployment.
for db_name in client.list_database_names(None, None).await? {
println!("{}", db_name);
// Get a handle to a database.
let db = client.database(db_name.as_str());
// List the names of the collections in that database.
for collection_name in db.list_collection_names(None).await? {
println!("- {}", collection_name);
}
}
Ok(())
}