sematic scholar api key support
This commit is contained in:
		
							parent
							
								
									68d04117c9
								
							
						
					
					
						commit
						c80f2ee79b
					
				| 
						 | 
				
			
			@ -0,0 +1 @@
 | 
			
		|||
SEMANTIC_SCHOLAR_API_KEY=
 | 
			
		||||
| 
						 | 
				
			
			@ -1 +1,2 @@
 | 
			
		|||
/target
 | 
			
		||||
.env
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -266,6 +266,12 @@ dependencies = [
 | 
			
		|||
 "subtle",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "dotenv"
 | 
			
		||||
version = "0.15.0"
 | 
			
		||||
source = "registry+https://github.com/rust-lang/crates.io-index"
 | 
			
		||||
checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f"
 | 
			
		||||
 | 
			
		||||
[[package]]
 | 
			
		||||
name = "encoding_rs"
 | 
			
		||||
version = "0.8.31"
 | 
			
		||||
| 
						 | 
				
			
			@ -1206,6 +1212,7 @@ version = "0.1.0"
 | 
			
		|||
dependencies = [
 | 
			
		||||
 "async-recursion",
 | 
			
		||||
 "clap",
 | 
			
		||||
 "dotenv",
 | 
			
		||||
 "mongodb",
 | 
			
		||||
 "reqwest",
 | 
			
		||||
 "serde",
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2,12 +2,14 @@
 | 
			
		|||
name = "semantic-scholar-client"
 | 
			
		||||
version = "0.1.0"
 | 
			
		||||
edition = "2021"
 | 
			
		||||
default-run = "import"
 | 
			
		||||
 | 
			
		||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 | 
			
		||||
 | 
			
		||||
[dependencies]
 | 
			
		||||
async-recursion = "1.0.0"
 | 
			
		||||
clap = { version = "3.2.11", features = ["derive"] }
 | 
			
		||||
dotenv = "0.15.0"
 | 
			
		||||
mongodb = "2.2.2"
 | 
			
		||||
reqwest = { version = "0.11.11", features = ["json"] }
 | 
			
		||||
serde = { version = "1.0.139", features = ["derive"] }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -8,6 +8,9 @@ Work in progress to pipe this data into an operating database.
 | 
			
		|||
 | 
			
		||||
### Usage
 | 
			
		||||
 | 
			
		||||
* (Optional) Copy `.env.example` to `.env` and set the value of `SEMANTIC_SCHOLAR_API_KEY`
 | 
			
		||||
* Run the program
 | 
			
		||||
 | 
			
		||||
    cargo run -- --paper-id <paper_id> --depth <depth>
 | 
			
		||||
 | 
			
		||||
* `paper_id` values are in accordance with [Semantic Scholar API](https://api.semanticscholar.org/api-docs/).
 | 
			
		||||
| 
						 | 
				
			
			@ -19,3 +22,4 @@ Ideas for followup work:
 | 
			
		|||
- Consider strategies for deciding where to terminate a given traversal
 | 
			
		||||
- Provide an HTTP/WebSocket interface that can be used to talk to this process during its operation.
 | 
			
		||||
  This can enable us to pipe the data to other tasks, to monitor, to start/stop, and even to make configuration changes.
 | 
			
		||||
- Rate limit requests
 | 
			
		||||
| 
						 | 
				
			
			@ -3,10 +3,11 @@
 | 
			
		|||
 | 
			
		||||
use async_recursion::async_recursion;
 | 
			
		||||
use clap::Parser;
 | 
			
		||||
use std::cmp::min;
 | 
			
		||||
use std::fmt::Write;
 | 
			
		||||
use std::error::Error;
 | 
			
		||||
use dotenv::dotenv;
 | 
			
		||||
use serde::Deserialize;
 | 
			
		||||
use std::cmp::min;
 | 
			
		||||
use std::error::Error;
 | 
			
		||||
use std::fmt::Write;
 | 
			
		||||
 | 
			
		||||
type DataResult<T> = Result<T, Box<dyn Error>>;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -23,14 +24,13 @@ struct Args {
 | 
			
		|||
    /// Starting paper. We will traverse papers that cite this one
 | 
			
		||||
    #[clap(short, long, value_parser)]
 | 
			
		||||
    paper_id: String,
 | 
			
		||||
 | 
			
		||||
    // Write the results to MongoDB
 | 
			
		||||
    // #[clap(short, long, value_parser)]
 | 
			
		||||
    // write_to_mongo: bool,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
struct Author {
 | 
			
		||||
    name: String
 | 
			
		||||
    name: String,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
type Authors = Vec<Author>;
 | 
			
		||||
| 
						 | 
				
			
			@ -56,22 +56,28 @@ struct CitingPaper {
 | 
			
		|||
#[derive(Deserialize, Debug)]
 | 
			
		||||
#[serde(rename_all = "camelCase")]
 | 
			
		||||
struct Citation {
 | 
			
		||||
    citing_paper: CitingPaper
 | 
			
		||||
    citing_paper: CitingPaper,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
   code: Option<String>,
 | 
			
		||||
* Generic struct to wrap the common API response pattern {data: [...]}
 | 
			
		||||
*/
 | 
			
		||||
#[derive(Deserialize, Debug)]
 | 
			
		||||
struct ApiListResponse<T> {
 | 
			
		||||
    data: Vec<T>
 | 
			
		||||
    data: Option<Vec<T>>,
 | 
			
		||||
    message: Option<String>,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
// TODO: Cache results in a (separate but local) database such as Redis
 | 
			
		||||
// TODO: Store results in a (separate but local) database such as Postgres
 | 
			
		||||
#[async_recursion]
 | 
			
		||||
async fn get_citations(paper_id: String, depth: u32, authors: &mut Vec<Author>) -> DataResult<Vec<Citation>> {
 | 
			
		||||
async fn get_citations(
 | 
			
		||||
    client: &reqwest::Client,
 | 
			
		||||
    paper_id: String,
 | 
			
		||||
    depth: u32,
 | 
			
		||||
    authors: &mut Vec<Author>,
 | 
			
		||||
) -> DataResult<Vec<Citation>> {
 | 
			
		||||
    // Bound recursion to some depth
 | 
			
		||||
    if depth > MAX_DEPTH {
 | 
			
		||||
        return Ok(vec![]);
 | 
			
		||||
| 
						 | 
				
			
			@ -81,12 +87,15 @@ async fn get_citations(paper_id: String, depth: u32, authors: &mut Vec<Author>)
 | 
			
		|||
    let mut url = String::new();
 | 
			
		||||
    write!(&mut url, "{}/paper/{}/citations", BASE_URL, paper_id)?;
 | 
			
		||||
 | 
			
		||||
    let resp = reqwest::get(url)
 | 
			
		||||
        .await?
 | 
			
		||||
        .text()
 | 
			
		||||
        .await?;
 | 
			
		||||
    let mut req = client.get(url);
 | 
			
		||||
    let api_key = std::env::var("SEMANTIC_SCHOLAR_API_KEY");
 | 
			
		||||
    if api_key.is_ok() {
 | 
			
		||||
        req = req.header("x-api-key", api_key.unwrap());
 | 
			
		||||
    }
 | 
			
		||||
    let resp = req.send().await?.text().await?;
 | 
			
		||||
 | 
			
		||||
    let resp_deserialized_attempt = serde_json::from_str::<ApiListResponse<Citation>>(resp.as_str());
 | 
			
		||||
    let resp_deserialized_attempt =
 | 
			
		||||
        serde_json::from_str::<ApiListResponse<Citation>>(resp.as_str());
 | 
			
		||||
 | 
			
		||||
    if let Err(err) = resp_deserialized_attempt {
 | 
			
		||||
        println!("depth {} paper {} error {}", depth, paper_id, err);
 | 
			
		||||
| 
						 | 
				
			
			@ -95,53 +104,50 @@ async fn get_citations(paper_id: String, depth: u32, authors: &mut Vec<Author>)
 | 
			
		|||
 | 
			
		||||
    let resp_deserialized: ApiListResponse<Citation> = resp_deserialized_attempt.unwrap();
 | 
			
		||||
 | 
			
		||||
    for Citation{citing_paper: CitingPaper{paper_id: citing_paper_id, title}} in resp_deserialized.data {
 | 
			
		||||
        if let (Some(citing_paper_id), Some(title)) = (citing_paper_id, title) {
 | 
			
		||||
            let short_len = min(50, title.len());
 | 
			
		||||
            let (short_title, _) = title.split_at(short_len);
 | 
			
		||||
            println!("depth {} paper {} cites {} title {}", depth, citing_paper_id, paper_id, short_title);
 | 
			
		||||
 | 
			
		||||
            get_citations(citing_paper_id, depth + 1, authors).await?;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Ok(vec![])
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
async fn get_paper_info(paper_id: String, depth: u32, authors: &mut Authors) -> DataResult<Vec<Paper>> {
 | 
			
		||||
    // Build the URL
 | 
			
		||||
    let mut url = String::new();
 | 
			
		||||
    // Probably also want: year,publicationDate,journal", BASE_URL, paper_id)?;
 | 
			
		||||
    const fields: &str = "title, authors, citations";
 | 
			
		||||
    write!(&mut url, "{}/paper/{}?fields={}", BASE_URL, paper_id, fields)?;
 | 
			
		||||
 | 
			
		||||
    let resp = reqwest::get(url)
 | 
			
		||||
        .await?
 | 
			
		||||
        .text()
 | 
			
		||||
        .await?;
 | 
			
		||||
 | 
			
		||||
    let resp_deserialized_attempt = serde_json::from_str::<ApiListResponse<Paper>>(resp.as_str());
 | 
			
		||||
 | 
			
		||||
    if let Err(err) = resp_deserialized_attempt {
 | 
			
		||||
        println!("depth {} paper {} error {}", depth, paper_id, err);
 | 
			
		||||
    if resp_deserialized.message.is_some() {
 | 
			
		||||
        println!(
 | 
			
		||||
            "depth {} paper {} error {}",
 | 
			
		||||
            depth,
 | 
			
		||||
            paper_id,
 | 
			
		||||
            resp_deserialized.message.unwrap()
 | 
			
		||||
        );
 | 
			
		||||
        return Ok(vec![]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    let resp_deserialized: ApiListResponse<Paper> = resp_deserialized_attempt.unwrap();
 | 
			
		||||
    for Citation {
 | 
			
		||||
        citing_paper:
 | 
			
		||||
            CitingPaper {
 | 
			
		||||
                paper_id: citing_paper_id,
 | 
			
		||||
                title,
 | 
			
		||||
            },
 | 
			
		||||
    } in resp_deserialized.data.unwrap()
 | 
			
		||||
    {
 | 
			
		||||
        if let (Some(citing_paper_id), Some(title)) = (citing_paper_id, title) {
 | 
			
		||||
            let short_len = min(50, title.len());
 | 
			
		||||
            let (short_title, _) = title.split_at(short_len);
 | 
			
		||||
            println!(
 | 
			
		||||
                "depth {} paper {} cites {} title {}",
 | 
			
		||||
                depth, citing_paper_id, paper_id, short_title
 | 
			
		||||
            );
 | 
			
		||||
 | 
			
		||||
            get_citations(&client, citing_paper_id, depth + 1, authors).await?;
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Ok(vec![])
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
#[tokio::main]
 | 
			
		||||
async fn main() -> Result<(), Box<dyn Error>> {
 | 
			
		||||
    let Args{
 | 
			
		||||
        depth,
 | 
			
		||||
        paper_id,
 | 
			
		||||
        // write_to_mongo,
 | 
			
		||||
    } = Args::parse();
 | 
			
		||||
    let Args { depth, paper_id } = Args::parse();
 | 
			
		||||
 | 
			
		||||
    dotenv().ok();
 | 
			
		||||
 | 
			
		||||
    let mut authors = Authors::new();
 | 
			
		||||
 | 
			
		||||
    get_citations(paper_id, depth, &mut authors).await?;
 | 
			
		||||
    let client: reqwest::Client = reqwest::Client::new();
 | 
			
		||||
 | 
			
		||||
    get_citations(&client, paper_id, depth, &mut authors).await?;
 | 
			
		||||
 | 
			
		||||
    Ok(())
 | 
			
		||||
}
 | 
			
		||||
		Loading…
	
		Reference in New Issue