From 43462e84ea1412bd89127b751496e7241e3eb98c Mon Sep 17 00:00:00 2001 From: Ladd Hoffman Date: Fri, 15 Jul 2022 10:48:33 -0500 Subject: [PATCH 1/4] semantic-scholar-client: Initial commit --- README.md | 93 +-- semantic-scholar-client/.gitignore | 1 + semantic-scholar-client/Cargo.lock | 1162 +++++++++++++++++++++++++++ semantic-scholar-client/Cargo.toml | 14 + semantic-scholar-client/README.md | 4 + semantic-scholar-client/src/main.rs | 110 +++ 6 files changed, 1295 insertions(+), 89 deletions(-) create mode 100644 semantic-scholar-client/.gitignore create mode 100644 semantic-scholar-client/Cargo.lock create mode 100644 semantic-scholar-client/Cargo.toml create mode 100644 semantic-scholar-client/README.md create mode 100644 semantic-scholar-client/src/main.rs diff --git a/README.md b/README.md index ea2a204..687c602 100644 --- a/README.md +++ b/README.md @@ -1,92 +1,7 @@ # Science Publishing DAO +## Subprojects - -## Getting started - -To make it easy for you to get started with GitLab, here's a list of recommended next steps. - -Already a pro? Just edit this README.md and make it your own. Want to make it easy? [Use the template at the bottom](#editing-this-readme)! - -## Add your files - -- [ ] [Create](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#create-a-file) or [upload](https://docs.gitlab.com/ee/user/project/repository/web_editor.html#upload-a-file) files -- [ ] [Add files using the command line](https://docs.gitlab.com/ee/gitlab-basics/add-file.html#add-a-file-using-the-command-line) or push an existing Git repository with the following command: - -``` -cd existing_repo -git remote add origin https://gitlab.com/dao-governance-framework/science-publishing-dao.git -git branch -M main -git push -uf origin main -``` - -## Integrate with your tools - -- [ ] [Set up project integrations](https://gitlab.com/dao-governance-framework/science-publishing-dao/-/settings/integrations) - -## Collaborate with your team - -- [ ] [Invite team members and collaborators](https://docs.gitlab.com/ee/user/project/members/) -- [ ] [Create a new merge request](https://docs.gitlab.com/ee/user/project/merge_requests/creating_merge_requests.html) -- [ ] [Automatically close issues from merge requests](https://docs.gitlab.com/ee/user/project/issues/managing_issues.html#closing-issues-automatically) -- [ ] [Enable merge request approvals](https://docs.gitlab.com/ee/user/project/merge_requests/approvals/) -- [ ] [Automatically merge when pipeline succeeds](https://docs.gitlab.com/ee/user/project/merge_requests/merge_when_pipeline_succeeds.html) - -## Test and Deploy - -Use the built-in continuous integration in GitLab. - -- [ ] [Get started with GitLab CI/CD](https://docs.gitlab.com/ee/ci/quick_start/index.html) -- [ ] [Analyze your code for known vulnerabilities with Static Application Security Testing(SAST)](https://docs.gitlab.com/ee/user/application_security/sast/) -- [ ] [Deploy to Kubernetes, Amazon EC2, or Amazon ECS using Auto Deploy](https://docs.gitlab.com/ee/topics/autodevops/requirements.html) -- [ ] [Use pull-based deployments for improved Kubernetes management](https://docs.gitlab.com/ee/user/clusters/agent/) -- [ ] [Set up protected environments](https://docs.gitlab.com/ee/ci/environments/protected_environments.html) - -*** - -# Editing this README - -When you're ready to make this README your own, just edit this file and use the handy template below (or feel free to structure it however you want - this is just a starting point!). Thank you to [makeareadme.com](https://www.makeareadme.com/) for this template. - -## Suggestions for a good README -Every project is different, so consider which of these sections apply to yours. The sections used in the template are suggestions for most open source projects. Also keep in mind that while a README can be too long and detailed, too long is better than too short. If you think your README is too long, consider utilizing another form of documentation rather than cutting out information. - -## Name -Choose a self-explaining name for your project. - -## Description -Let people know what your project can do specifically. Provide context and add a link to any reference visitors might be unfamiliar with. A list of Features or a Background subsection can also be added here. If there are alternatives to your project, this is a good place to list differentiating factors. - -## Badges -On some READMEs, you may see small images that convey metadata, such as whether or not all the tests are passing for the project. You can use Shields to add some to your README. Many services also have instructions for adding a badge. - -## Visuals -Depending on what you are making, it can be a good idea to include screenshots or even a video (you'll frequently see GIFs rather than actual videos). Tools like ttygif can help, but check out Asciinema for a more sophisticated method. - -## Installation -Within a particular ecosystem, there may be a common way of installing things, such as using Yarn, NuGet, or Homebrew. However, consider the possibility that whoever is reading your README is a novice and would like more guidance. Listing specific steps helps remove ambiguity and gets people to using your project as quickly as possible. If it only runs in a specific context like a particular programming language version or operating system or has dependencies that have to be installed manually, also add a Requirements subsection. - -## Usage -Use examples liberally, and show the expected output if you can. It's helpful to have inline the smallest example of usage that you can demonstrate, while providing links to more sophisticated examples if they are too long to reasonably include in the README. - -## Support -Tell people where they can go to for help. It can be any combination of an issue tracker, a chat room, an email address, etc. - -## Roadmap -If you have ideas for releases in the future, it is a good idea to list them in the README. - -## Contributing -State if you are open to contributions and what your requirements are for accepting them. - -For people who want to make changes to your project, it's helpful to have some documentation on how to get started. Perhaps there is a script that they should run or some environment variables that they need to set. Make these steps explicit. These instructions could also be useful to your future self. - -You can also document commands to lint the code or run tests. These steps help to ensure high code quality and reduce the likelihood that the changes inadvertently break something. Having instructions for running tests is especially helpful if it requires external setup, such as starting a Selenium server for testing in a browser. - -## Authors and acknowledgment -Show your appreciation to those who have contributed to the project. - -## License -For open source projects, say how it is licensed. - -## Project status -If you have run out of energy or time for your project, put a note at the top of the README saying that development has slowed down or stopped completely. Someone may choose to fork your project or volunteer to step in as a maintainer or owner, allowing your project to keep going. You can also make an explicit request for maintainers. +| Name | Description | +| --- | --- | +| [semantic-scholar-client](./semantic-scholar-client) | Rust utility for reading data from the [Semantic Scholar API](https://api.semanticscholar.org/api-docs) | diff --git a/semantic-scholar-client/.gitignore b/semantic-scholar-client/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/semantic-scholar-client/.gitignore @@ -0,0 +1 @@ +/target diff --git a/semantic-scholar-client/Cargo.lock b/semantic-scholar-client/Cargo.lock new file mode 100644 index 0000000..da8f256 --- /dev/null +++ b/semantic-scholar-client/Cargo.lock @@ -0,0 +1,1162 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "async-recursion" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2cda8f4bcc10624c4e85bc66b3f452cca98cfa5ca002dc83a16aad2367641bea" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "bumpalo" +version = "3.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37ccbd214614c6783386c1af30caf03192f17891059cecc394b4fb119e363de3" + +[[package]] +name = "bytes" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4872d67bab6358e59559027aa3b9157c53d9358c51423c17554809a8858e0f8" + +[[package]] +name = "cc" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "clap" +version = "3.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d646c7ade5eb07c4aa20e907a922750df0c448892513714fd3e4acbc7130829f" +dependencies = [ + "atty", + "bitflags", + "clap_derive", + "clap_lex", + "indexmap", + "once_cell", + "strsim", + "termcolor", + "textwrap", +] + +[[package]] +name = "clap_derive" +version = "3.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "759bf187376e1afa7b85b959e6a664a3e7a95203415dba952ad19139e798f902" +dependencies = [ + "heck", + "proc-macro-error", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "clap_lex" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5" +dependencies = [ + "os_str_bytes", +] + +[[package]] +name = "core-foundation" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "194a7a9e6de53fa55116934067c844d9d749312f75c6f6d0980e8c252f8c2146" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "core-foundation-sys" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" + +[[package]] +name = "encoding_rs" +version = "0.8.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "fastrand" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3fcf0cee53519c866c09b5de1f6c56ff9d647101f81c1964fa632e148896cdf" +dependencies = [ + "instant", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + +[[package]] +name = "foreign-types" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1" +dependencies = [ + "foreign-types-shared", +] + +[[package]] +name = "foreign-types-shared" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" + +[[package]] +name = "form_urlencoded" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fc25a87fa4fd2094bffb06925852034d90a17f0d1e05197d4956d3555752191" +dependencies = [ + "matches", + "percent-encoding", +] + +[[package]] +name = "futures-channel" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3083ce4b914124575708913bca19bfe887522d6e2e6d0952943f5eac4a74010" +dependencies = [ + "futures-core", +] + +[[package]] +name = "futures-core" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" + +[[package]] +name = "futures-sink" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21163e139fa306126e6eedaf49ecdb4588f939600f0b1e770f4205ee4b7fa868" + +[[package]] +name = "futures-task" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c66a976bf5909d801bbef33416c41372779507e7a6b3a5e25e4749c58f776a" + +[[package]] +name = "futures-util" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" +dependencies = [ + "futures-core", + "futures-task", + "pin-project-lite", + "pin-utils", +] + +[[package]] +name = "h2" +version = "0.3.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "37a82c6d637fc9515a4694bbf1cb2457b79d81ce52b3108bdeea58b07dd34a57" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.12.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "607c8a29735385251a339424dd462993c0fed8fa09d378f259377df08c126022" + +[[package]] +name = "heck" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9" + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "http" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75f43d41e26995c17e71ee126451dd3941010b0514a81a9d11f3b341debc2399" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + +[[package]] +name = "http-body" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5f38f16d184e36f2408a55281cd658ecbd3ca05cce6d6510a176eca393e26d1" +dependencies = [ + "bytes", + "http", + "pin-project-lite", +] + +[[package]] +name = "httparse" +version = "1.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "496ce29bb5a52785b44e0f7ca2847ae0bb839c9bd28f69acac9b99d461c0c04c" + +[[package]] +name = "httpdate" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" + +[[package]] +name = "hyper" +version = "0.14.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02c929dc5c39e335a03c405292728118860721b10190d98c2a0f0efd5baafbac" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2", + "tokio", + "tower-service", + "tracing", + "want", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper", + "native-tls", + "tokio", + "tokio-native-tls", +] + +[[package]] +name = "idna" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "418a0a6fab821475f634efe3ccc45c013f742efe03d853e8d3355d5cb850ecf8" +dependencies = [ + "matches", + "unicode-bidi", + "unicode-normalization", +] + +[[package]] +name = "indexmap" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10a35a97730320ffe8e2d410b5d3b69279b98d2c14bdb8b70ea89ecf7888d41e" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "instant" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a5bbe824c507c5da5956355e86a746d82e0e1464f65d862cc5e71da70e94b2c" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "ipnet" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "879d54834c8c76457ef4293a689b2a8c59b076067ad77b15efafbb05f92a592b" + +[[package]] +name = "itoa" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "112c678d4050afce233f4f2852bb2eb519230b3cf12f33585275537d7e41578d" + +[[package]] +name = "js-sys" +version = "0.3.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3fac17f7123a73ca62df411b1bf727ccc805daa070338fda671c86dac1bdc27" +dependencies = [ + "wasm-bindgen", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.126" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" + +[[package]] +name = "lock_api" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "327fa5b6a6940e4699ec49a9beae1ea4845c6bab9314e4f84ac68742139d8c53" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "log" +version = "0.4.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "matches" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" + +[[package]] +name = "memchr" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d" + +[[package]] +name = "mime" +version = "0.3.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" + +[[package]] +name = "mio" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf" +dependencies = [ + "libc", + "log", + "wasi", + "windows-sys", +] + +[[package]] +name = "native-tls" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd7e2f3618557f980e0b17e8856252eee3c97fa12c54dff0ca290fb6266ca4a9" +dependencies = [ + "lazy_static", + "libc", + "log", + "openssl", + "openssl-probe", + "openssl-sys", + "schannel", + "security-framework", + "security-framework-sys", + "tempfile", +] + +[[package]] +name = "num_cpus" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18a6dbe30758c9f83eb00cbea4ac95966305f5a7772f3f42ebfc7fc7eddbd8e1" + +[[package]] +name = "openssl" +version = "0.10.41" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "618febf65336490dfcf20b73f885f5651a0c89c64c2d4a8c3662585a70bf5bd0" +dependencies = [ + "bitflags", + "cfg-if", + "foreign-types", + "libc", + "once_cell", + "openssl-macros", + "openssl-sys", +] + +[[package]] +name = "openssl-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b501e44f11665960c7e7fcf062c7d96a14ade4aa98116c004b2e37b5be7d736c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "openssl-probe" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" + +[[package]] +name = "openssl-sys" +version = "0.9.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5f9bd0c2710541a3cda73d6f9ac4f1b240de4ae261065d309dbe73d9dceb42f" +dependencies = [ + "autocfg", + "cc", + "libc", + "pkg-config", + "vcpkg", +] + +[[package]] +name = "os_str_bytes" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21326818e99cfe6ce1e524c2a805c189a99b5ae555a35d19f9a284b427d86afa" + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09a279cbf25cb0757810394fbc1e359949b59e348145c643a939a525692e6929" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-sys", +] + +[[package]] +name = "percent-encoding" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" + +[[package]] +name = "pin-project-lite" +version = "0.2.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e0a7ae3ac2f1173085d398531c705756c94a4c56843785df85a60c1a0afac116" + +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + +[[package]] +name = "pkg-config" +version = "0.3.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" + +[[package]] +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro2" +version = "1.0.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd96a1e8ed2596c337f8eae5f24924ec83f5ad5ab21ea8e455d3566c69fbcaf7" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3bcdf212e9776fbcb2d23ab029360416bb1706b1aea2d1a5ba002727cbcab804" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.2.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42" +dependencies = [ + "bitflags", +] + +[[package]] +name = "remove_dir_all" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" +dependencies = [ + "winapi", +] + +[[package]] +name = "reqwest" +version = "0.11.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b75aa69a3f06bbcc66ede33af2af253c6f7a86b1ca0033f60c580a27074fbf92" +dependencies = [ + "base64", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http", + "http-body", + "hyper", + "hyper-tls", + "ipnet", + "js-sys", + "lazy_static", + "log", + "mime", + "native-tls", + "percent-encoding", + "pin-project-lite", + "serde", + "serde_json", + "serde_urlencoded", + "tokio", + "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg", +] + +[[package]] +name = "ryu" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3f6f92acf49d1b98f7a81226834412ada05458b7364277387724a237f062695" + +[[package]] +name = "schannel" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88d6731146462ea25d9244b2ed5fd1d716d25c52e4d54aa4fb0f3c4e9854dbe2" +dependencies = [ + "lazy_static", + "windows-sys", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "security-framework" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dc14f172faf8a0194a3aded622712b0de276821addc574fa54fc0a1167e10dc" +dependencies = [ + "bitflags", + "core-foundation", + "core-foundation-sys", + "libc", + "security-framework-sys", +] + +[[package]] +name = "security-framework-sys" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0160a13a177a45bfb43ce71c01580998474f556ad854dcbca936dd2841a5c556" +dependencies = [ + "core-foundation-sys", + "libc", +] + +[[package]] +name = "semantic-scholar-client" +version = "0.1.0" +dependencies = [ + "async-recursion", + "clap", + "reqwest", + "serde", + "serde_json", + "tokio", +] + +[[package]] +name = "serde" +version = "1.0.139" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0171ebb889e45aa68b44aee0859b3eede84c6f5f5c228e6f140c0b2a0a46cad6" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.139" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc1d3230c1de7932af58ad8ffbe1d784bd55efd5a9d84ac24f69c72d83543dfb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82c2c1fdcd807d1098552c5b9a36e425e42e9fbd7c6a37a8425f390f781f7fa7" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "signal-hook-registry" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e51e73328dc4ac0c7ccbda3a494dfa03df1de2f46018127f60c693f2648455b0" +dependencies = [ + "libc", +] + +[[package]] +name = "slab" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb703cfe953bccee95685111adeedb76fabe4e97549a58d16f03ea7b9367bb32" + +[[package]] +name = "smallvec" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fd0db749597d91ff862fd1d55ea87f7855a744a8425a64695b6fca237d1dad1" + +[[package]] +name = "socket2" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "66d72b759436ae32898a2af0a14218dbf55efde3feeb170eb623637db85ee1e0" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "strsim" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" + +[[package]] +name = "syn" +version = "1.0.98" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c50aef8a904de4c23c788f104b7dddc7d6f79c647c7c8ce4cc8f73eb0ca773dd" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "tempfile" +version = "3.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cdb1ef4eaeeaddc8fbd371e5017057064af0911902ef36b39801f67cc6d79e4" +dependencies = [ + "cfg-if", + "fastrand", + "libc", + "redox_syscall", + "remove_dir_all", + "winapi", +] + +[[package]] +name = "termcolor" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "textwrap" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" + +[[package]] +name = "tinyvec" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87cc5ceb3875bb20c2890005a4e226a4651264a5c75edb2421b52861a0a0cb50" +dependencies = [ + "tinyvec_macros", +] + +[[package]] +name = "tinyvec_macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c" + +[[package]] +name = "tokio" +version = "1.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57aec3cfa4c296db7255446efb4928a6be304b431a806216105542a67b6ca82e" +dependencies = [ + "autocfg", + "bytes", + "libc", + "memchr", + "mio", + "num_cpus", + "once_cell", + "parking_lot", + "pin-project-lite", + "signal-hook-registry", + "socket2", + "tokio-macros", + "winapi", +] + +[[package]] +name = "tokio-macros" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9724f9a975fb987ef7a3cd9be0350edcbe130698af5b8f7a631e23d42d052484" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tokio-native-tls" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d995660bd2b7f8c1568414c1126076c13fbb725c40112dc0120b78eb9b717b" +dependencies = [ + "native-tls", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc463cd8deddc3770d20f9852143d50bf6094e640b485cb2e189a2099085ff45" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", + "tracing", +] + +[[package]] +name = "tower-service" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6bc1c9ce2b5135ac7f93c72918fc37feb872bdc6a5533a8b85eb4b86bfdae52" + +[[package]] +name = "tracing" +version = "0.1.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a400e31aa60b9d44a52a8ee0343b5b18566b03a8321e0d321f695cf56e940160" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-core", +] + +[[package]] +name = "tracing-core" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7b7358be39f2f274f322d2aaed611acc57f382e8eb1e5b48cb9ae30933495ce7" +dependencies = [ + "once_cell", +] + +[[package]] +name = "try-lock" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" + +[[package]] +name = "unicode-bidi" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "099b7128301d285f79ddd55b9a83d5e6b9e97c92e0ea0daebee7263e932de992" + +[[package]] +name = "unicode-ident" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bd2fe26506023ed7b5e1e315add59d6f584c621d037f9368fea9cfb988f368c" + +[[package]] +name = "unicode-normalization" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854cbdc4f7bc6ae19c820d44abdc3277ac3e1b2b93db20a636825d9322fb60e6" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "url" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a507c383b2d33b5fc35d1861e77e6b383d158b2da5e14fe51b83dfedf6fd578c" +dependencies = [ + "form_urlencoded", + "idna", + "matches", + "percent-encoding", +] + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "want" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0" +dependencies = [ + "log", + "try-lock", +] + +[[package]] +name = "wasi" +version = "0.11.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" + +[[package]] +name = "wasm-bindgen" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c53b543413a17a202f4be280a7e5c62a1c69345f5de525ee64f8cfdbc954994" +dependencies = [ + "cfg-if", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5491a68ab4500fa6b4d726bd67408630c3dbe9c4fe7bda16d5c82a1fd8c7340a" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de9a9cec1733468a8c657e57fa2413d2ae2c0129b95e87c5b72b8ace4d13f31f" +dependencies = [ + "cfg-if", + "js-sys", + "wasm-bindgen", + "web-sys", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c441e177922bc58f1e12c022624b6216378e5febc2f0533e41ba443d505b80aa" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d94ac45fcf608c1f45ef53e748d35660f168490c10b23704c7779ab8f5c3048" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a89911bd99e5f3659ec4acf9c4d93b0a90fe4a2a11f15328472058edc5261be" + +[[package]] +name = "web-sys" +version = "0.3.58" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2fed94beee57daf8dd7d51f2b15dc2bcde92d7a72304cdf662a4371008b71b90" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea04155a16a59f9eab786fe12a4a450e75cdb175f9e0d80da1e17db09f55b8d2" +dependencies = [ + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9bb8c3fd39ade2d67e9874ac4f3db21f0d710bee00fe7cab16949ec184eeaa47" + +[[package]] +name = "windows_i686_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "180e6ccf01daf4c426b846dfc66db1fc518f074baa793aa7d9b9aaeffad6a3b6" + +[[package]] +name = "windows_i686_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2e7917148b2812d1eeafaeb22a97e4813dfa60a3f8f78ebe204bcc88f12f024" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4dcd171b8776c41b97521e5da127a2d86ad280114807d0b2ab1e462bc764d9e1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.36.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" + +[[package]] +name = "winreg" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "80d0f4e272c85def139476380b12f9ac60926689dd2e01d4923222f40580869d" +dependencies = [ + "winapi", +] diff --git a/semantic-scholar-client/Cargo.toml b/semantic-scholar-client/Cargo.toml new file mode 100644 index 0000000..a3aad49 --- /dev/null +++ b/semantic-scholar-client/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "semantic-scholar-client" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +async-recursion = "1.0.0" +clap = { version = "3.2.11", features = ["derive"] } +reqwest = { version = "0.11.11", features = ["json"] } +serde = { version = "1.0.139", features = ["derive"] } +serde_json = "1.0.82" +tokio = { version = "1.20.0", features = ["full"] } diff --git a/semantic-scholar-client/README.md b/semantic-scholar-client/README.md new file mode 100644 index 0000000..b9de209 --- /dev/null +++ b/semantic-scholar-client/README.md @@ -0,0 +1,4 @@ +This utility is able to fetch data fromSemantic Scholar API. + +Initial proof of concept here writes the result to stdout. Work in progress to pipe this data into an operating database. + diff --git a/semantic-scholar-client/src/main.rs b/semantic-scholar-client/src/main.rs new file mode 100644 index 0000000..3796e39 --- /dev/null +++ b/semantic-scholar-client/src/main.rs @@ -0,0 +1,110 @@ +// During development, allowing dead code +#![allow(dead_code)] + +use async_recursion::async_recursion; +use clap::Parser; +use std::fmt::Write; +use std::error::Error; +// use std::time::Duration; +// use std::thread::sleep; +use serde::Deserialize; + +type DataResult = Result>; + +const BASE_URL: &str = "https://api.semanticscholar.org/graph/v1"; +const MAX_DEPTH: u32 = 3; + +#[derive(Parser, Debug)] +#[clap(author, version, about, long_about = None)] +struct Args { + /// URL to query + #[clap(short, long, value_parser)] + paper_id: String, +} + +struct Author { + name: String +} +struct Paper { + authors: Vec +} + +/** + * Occurs within Citation struct + */ +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct CitingPaper { + paper_id: String, + title: String, +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct Citation { + citing_paper: CitingPaper +} + +/** + * Generic struct to wrap the common API response pattern {data: [...]} + */ +#[derive(Deserialize, Debug)] +struct ApiListResponse { + data: Vec +} + + +// TODO: Cache results in a (separate but local) database such as Redis +// TODO: Store results in a (separate but local) database such as Postgres +#[async_recursion] +async fn get_citations(paper_id: String, depth: u32) -> DataResult> { + // Bound recursion to some depth + if depth > MAX_DEPTH { + return Ok(vec![]); + } + + // Naieve approach to possible rate-limiting + // sleep(Duration::new(1, 0)); + + // Build the URL + let mut url = String::new(); + write!(&mut url, "{}/paper/{}/citations", BASE_URL, paper_id)?; + + let resp = reqwest::get(url) + .await? + .text() + .await?; + // .json::().await?; + + let resp_deserialized_attempt = serde_json::from_str::>(resp.as_str()); + + if let Err(err) = resp_deserialized_attempt { + println!("depth {} paper {} error {}", depth, paper_id, err); + return Ok(vec![]); + } + + let resp_deserialized: ApiListResponse = resp_deserialized_attempt.unwrap(); + + for Citation{citing_paper: CitingPaper{paper_id: cited_id, title}} in resp_deserialized.data { + println!("depth {} paper {} cites {} title {}", depth, paper_id, cited_id, title); + + get_citations(cited_id, depth + 1).await?; + } + + Ok(vec![]) +} + + + +#[tokio::main] +async fn main() -> Result<(), Box> { + let Args{ paper_id } = Args::parse(); + + // let mut authors: Vec = Vec::new(); + + // let citations = + + get_citations(paper_id, 0).await?; + + Ok(()) +} \ No newline at end of file From ff7d6134f15e809b8b5108bd12ff27559ff67b2f Mon Sep 17 00:00:00 2001 From: Ladd Hoffman Date: Fri, 15 Jul 2022 10:57:27 -0500 Subject: [PATCH 2/4] semantic-scholar-client: Extend Readme --- semantic-scholar-client/README.md | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/semantic-scholar-client/README.md b/semantic-scholar-client/README.md index b9de209..2ff88f0 100644 --- a/semantic-scholar-client/README.md +++ b/semantic-scholar-client/README.md @@ -2,3 +2,15 @@ This utility is able to fetch data fromSemantic Scholar API. Initial proof of concept here writes the result to stdout. Work in progress to pipe this data into an operating database. +### Usage + + cargo run -- --paper-id + +`paper_id` is in accordance with [Semantic Scholar API](https://api.semanticscholar.org/api-docs/) rules. + +In its current form the utility outputs the citation graph up to a depth of 3 citations. + +Next steps include +- developing strategies for deciding where to terminate a given traversal +- providing an HTTP and perhaps WebSocket interface that can be used to talk to this process during its operation. + This can enable us to pipe the data to other tasks, to monitor, to start/stop, and even to make configuration changes. \ No newline at end of file From 68d04117c996d8b6e4df7f59454295f2b468e772 Mon Sep 17 00:00:00 2001 From: Ladd Hoffman Date: Mon, 7 Nov 2022 17:31:37 -0600 Subject: [PATCH 3/4] Successfully consuming data --- diagrams/contracts.sq | 101 +++ forum-network-node/README.md | 84 +++ forum-network-node/forum.md | 235 +++++++ notes.md | 4 + semantic-scholar-client/Cargo.lock | 774 +++++++++++++++++++++- semantic-scholar-client/Cargo.toml | 1 + semantic-scholar-client/README.md | 23 +- semantic-scholar-client/src/bin/import.rs | 147 ++++ semantic-scholar-client/src/bin/mongo.rs | 59 ++ semantic-scholar-client/src/main.rs | 110 --- 10 files changed, 1417 insertions(+), 121 deletions(-) create mode 100644 diagrams/contracts.sq create mode 100644 forum-network-node/README.md create mode 100644 forum-network-node/forum.md create mode 100644 notes.md create mode 100644 semantic-scholar-client/src/bin/import.rs create mode 100644 semantic-scholar-client/src/bin/mongo.rs delete mode 100644 semantic-scholar-client/src/main.rs diff --git a/diagrams/contracts.sq b/diagrams/contracts.sq new file mode 100644 index 0000000..01d5e3a --- /dev/null +++ b/diagrams/contracts.sq @@ -0,0 +1,101 @@ +participantgroup #lightblue Voting Contract + participantgroup Methods + boundary "createVote()" as create_vote + boundary "voteResults()" as voting_vote_result + end + participantgroup Data + database "Params" as voting_params + end +end + +participantgroup #lightyellow Vote Contract + participantgroup Methods + boundary "vote()" as vote + end + participantgroup Data + database "Votes" as votes + end +end + +participantgroup #pink Forum Contract + participantgroup Methods + boundary "post()" as post + boundary "voteResult()" as forum_vote_result + end + participantgroup Data + database "Params" as forum_params + end +end + +participantgroup #orange Post\nContract + participantgroup Data + database "Posts" as posts + end +end + +participantgroup #lightgreen Operating Accounts + participant "Reputation\nNFT" as rep + participant "Reviewer" as reviewer + participant "Public" as public +end + +activate voting_params +activate forum_params +activate rep + +group Author a post + public -> post : post() + activate public + activate post +post<-forum_params:Read param values + post -> posts : Create post instance;\nInitialize with current\nparam values + activate posts +posts->posts:Reference\nother posts + deactivate post + deactivate public +end + +group Initiate a vote + reviewer -> create_vote : createVote() + activate reviewer + activate create_vote + create_vote<-voting_params:Read params + create_vote -> votes : Create vote instance;\nInitialize with current\nparam values + activate votes + votes -> posts : Reference a post + deactivate create_vote + deactivate reviewer +end + +group Cast a vote + reviewer->vote:vote() + activate vote + activate reviewer + vote<-votes:Read prior votes + rep->vote:Read voter reputations + vote->vote:Evaluate\nterminating\nconditions +end + +alt Voting terminates, according to params + alt Voting param change + posts->vote:Read post contents + vote->voting_vote_result:voteResult() + voting_vote_result ->voting_params : Update\nparams + end + votes->forum_vote_result:voteResult() + activate forum_vote_result + posts ->forum_vote_result : Read post contents + forum_vote_result<-forum_params:Read params + alt Forum param change + forum_vote_result -> forum_params : Update\nparams + end + forum_vote_result<-rep:Read authors reputations + forum_vote_result->rep:Mint reputation for post / authors / references + deactivate forum_vote_result + activate rep + votes->rep:Mint reputation for vote winners + activate rep +end +vote->votes:Update\nvote\nrecord +deactivate vote +deactivate reviewer \ No newline at end of file diff --git a/forum-network-node/README.md b/forum-network-node/README.md new file mode 100644 index 0000000..47ebd71 --- /dev/null +++ b/forum-network-node/README.md @@ -0,0 +1,84 @@ +Forum Network Node +--- + +The forum is a collection of posts. + +Each post may reference other posts, attributing some reputation to those posts. + +A validation pool may vote to validate forum posts. + +When such a vote passes, the reputation minted at the creation of the post, gets awarded +- to the post author +- to the voters +- recursively to cited posts + +A post's author should be able to choose how much reputation to stake on it. +- Work evidence post. expect unanimity, decent sized stake, punish losers. +- Upvote on a comment. Low stakes. Constitutes validation of the comment as + - carrying an appropriate stake + - containing appropriate content +- Downvote a comment. Moderate stakes. +- Upvote on a post. Moderate stakes. Constitutes validation of the post as being valuable to the DAO. +- Downvote on a post. Moderate stakes. + +what if we don't support generic upvotes? +instead we would just have posts with weighted citations, and standard validation pool votes on each post. + +It will be important for forum voting to be cheap too, in addition to posting to the forum. +So we probably need off-chain voting as well. + +We probably want each client UI to be validating as many posts as possible. +We can try to do something clever such as monitor the time the user spends with a given post visible on their screen, and if they don't downvote it within a certain time, we can cast an implicit upvote. This would save a lot of manual activity. + +The DAO can choose to implement additional automated filtering of what to show the user, and could automatically downvote unwanted content in order to earn validation pool rewards, punish bad actors, and decrease visibility of the unwanted content. + +The UI can be configured to highlight incoming, unvalidate posts for the user to review and validate. +A user might choose to disable this option, setting the threshold higher, so that they only see validated posts. +This is something a user might turn on and off at different times, depending on their present mode of engagement with the forum. +I.e., browsing or actively engaged. + +Approving these posts should be a quick operation on the client side. +The results of such operations should be propagated among network nodes and to other clients as needed, so that the off-chain system maintains an up-to-date view of the status of each post. Periodically, the resulting changes to reputation balances should be written to the blockchain. + +We should identify all viable opportunities to collect payments from users. These payments will be necessary to fund the on-chain operations of this system. + +Fundamentally we expect payments will be associated with incoming work requests from outside. +So a post is made requesting work? +Then the worker makes a post presenting the work output for validation. +Discussion may occur in the forum regarding the work output. +This means associates will make new comment posts descending from the work output. +These comments form a body of discussion. +Comments may declare their agreement or disagreement with other comments. +The validation pool will need to validate the comments. +The comments should not mint new reputation if there is no incoming fee. +Instead, the commenter must stake some of their own reputation. +If the comment is successfully validated, it gains comment reeputation. +Then once the parent post is validated, the comment author will get a portion of the reputation minted from the parent post creation. +If the comment is not successfully validated one way or the other, the author's stakes will be returned to them. +If the comment is invalidated, the author will lose their stakes. + +The validation pool eventually votes on the work output. +If the vote passes, awards are distributed among +- authors +- cited posts, weighted +- validated comments, weighted by the comment reputation balances + +A governance post, such as a parameter change or a client software upgrade, will function similarly. +The expected fee will be smaller. +The reputation awards should be pretty decent. +The discussion around these governance posts will be particularly important. + +Posts, including comments and governance posts, can reference prior posts. +Suppose a comment references a prior comment from a different thread (different work product parent post). +Then if reputation is awarded to the new comment, some of it should be transitively awarded to the referenced comment. +In this way, a comment can gain reputation over time, which in turn awards reputation to its authors and cited posts. +Thus, the forum is like a living system, where connections with new posts can influence existing posts. + +--- + +We want the Network Node to accept reputation-staking actions from the web client (to make it more general, let's call it the user agent). +This means the user agent must have a way to prove that the user owns a given reputation token. +Ideally this should be done via a zero-knowledge proof, where +- the server sends the client something, +- the client does something with it and sends a response, and +- the server is able to verify based on the client's response that they own the given reputation token. \ No newline at end of file diff --git a/forum-network-node/forum.md b/forum-network-node/forum.md new file mode 100644 index 0000000..a06a9e4 --- /dev/null +++ b/forum-network-node/forum.md @@ -0,0 +1,235 @@ +# Goals +- Enable each individual to express their values by taking actions in the system. +- Enable a group to arrive at a decision through a process of deliberation. + - Reward participants in the deliberation process. +- Enable participants to post contributions for review. + - Also enable discussion during this review process, a.k.a. + Enable participants to post comments on the review and on other comments +- Enable participants to submit arbitrary posts that stand alone or that reference other posts + - This correctly implies recursion. + - Since we don't want loops, we want a DAG (directed acyclic graph). + +# Requirements + +## Use Cases +- Outsiders can submit work requests via the Business contract + - Includes fees + - Incoming request can be reviewed and approved by validators + - Adds a post to the forum + - Assigns the work request to an associate, via Availability contract +- Associate can submit their work results via the Business contract + - Adds a post to the forum + - Associates can carry out discussions on the forum by adding new posts. + - Each post can attribute reputation for or against other posts + - Each post should be validated by associates. + - Eventually a formal vote should occur, in the context of the Business contract. + If the votes passes, reputation should be awarded to the following: + - associate who submitted the work + - associates who voted in favor of the post + - associates whose comments in the discussion earned reputation +- Associates can submit new posts to the forum, outside of any existing post or work request + - Each post should be validated by associates + - These can be referenced by future posts, thereby gaining or losing reputation + - Reputation awards are only distributed when posts are later referenced in a fee-generating discussion + +## Storage Requirements +- Run-time operational data + - Active sessions + - Possible cache of on-chain data to expedite look-ups + - Subscribe to updates? +- Archival data + - Forum posts and their contents + - This is needed in order to display forum contents to clients, + as well as to compute reputation awards when submitting a batch + of forum results to the forum validator contract. + +## Messaging Requirements + +# Contracts + +## Validator Contract + +How generic do we want our validator contract to be? + +So far, what I've thought of: +- Points to a forum post. +- Off-chain computation provides reputation effects arising from the forum attribution DAG. +- Network nodes function as voters here, to vote on the result they believe is correct. + This decision is expected to be determined by the forum client software. + The forum contract must include provision for tracking the forum client post with the highest reputation. + A new forum client post would include reputation stake against the previous version, and if + Like all governance decisions and perhaps many other kinds of decisions, there should be a period of deliberation where participants may express their opinions. + At some point it will transition to a formal vote. This will occur when the off-chain network nodes decide to cast formal votes. + +## Network Node Contract + +- Should require staking reputation to add a new network node +- Should require vote by validation pool to add the new network node +- Should allow vote by validation pool to remove a network node + + +# Options for architecture of off-chain forum components. + +## 1 + +Use the following components: +- Existing storage network +- Existing messaging network +- New forum network nodes + +Effects of this arrangement: +- Pro: Minimize storage and network requirements for the individual network nodes, + since they won't need to talk to each other directly. +- Pro: Gains benefits of whatever features the chosen storage or messaging systems provide. +- Con: Adds infrastructure costs that must be managed. +- Con: Adds requirement to implement integration with chosen storage and messaging systems. + +## 2 + +Use the following components: +- Existing storage network +- New messaging network +- Network nodes talk to each other directly + +Effects of this arrangement: +- Pro: Reduce messaging infrastructure costs by implementing this functionality within our own application. +- Nodes can discover each other by reading from the blockchain. +- Nodes can vote for/against each other with regard to their stakes as network nodes. + Nodes can gossip amongst each other. + Nodes MUST be able to verify peer nodes ownership of reputation tokens. + Nodes SHOULD periodically re-verify their local view of the network, with the view that may be accessed on-chain. + +Notes: +- Since we need our application to be publicly networked anyway, to interact with user agents, + it's not asking a huge amount for them to communicate amongst one another. + +## 3 + +Use the following components: +- New messaging network +- New storage network + +Effects of this arrangement: +- Pro: Integrity of storage can be policed by reputation staking + +Notes: +- We would have to choose a consensus algorithm for our data storage, or adopt an existing self-managed solution + +--- + +# Questions + +## How much on-chain? +Just hashes? Any full content? + +## What forum storage? +IPFS? +Filecoin? +Arweave? +CouchDB? +Custom? + +If we use existing/separate networks for storage and/or messaging, how do we police them? +If they mess up our data, whose reputation is staked? +Perhaps this is implicitly covered by the Validator voting, where off-chain results are compared. +We may want to support multiple storage options. +No matter where archival storage occurs, stored data can and should be verified using hashes stored on-chain. +Nodes should only write to the archive AFTER voting on results. +In the worst case, if archived data loses integrity, it will prevent the forum from processing new transactions. +If enough network nodes could agree on a strategy to remediate the data, it might be possible to recover somewhat gracefully. This would depend on the nature and extent of the damage. +The last resort would be to initialize a new forum and abandon the old one. +While this would be disruptive to continuity of operations for the DAO, it would not alter on-chain reputation holdings. + +## What forum messaging? +ZeroMQ? +RabbitMQ? +CouchDB? +Custom? + +## What UI? +The forum contract should serve the forum network node source code. +If we only store a hash, we need a secure mechanism for storing and serving the actual code. +If we store the full code on-chain, we would also need to document a process for network node operators to obtain the code. +For example, by using existing command-line utilities to interface with the blockchain and download the data. +If we store the full code off-chain, we would still need to document the process for network node operators to obtain the code. +We would also need to make sure that one network nodes are up and running, they help pin the content to IPFS. + +The forum network node should then serve the UI to users. This can be served as a web application. +Network node will send HTML, CSS, and Javascript to a browser client. +The browser client must have an extension that allows it to function as a wallet, and it must be able to +provide proof to the forum node, of the user's ownership of reputation tokens. + +This can be accomplished as follows: +1. Web client prepares a message (probably using Casper Signer browser extension). Message includes: + - Public key + - Nonce + - Signature +2. Forum node verifies the signed message. +3. Forum node checks on-chain reputation for the given public key. +4. Forum node authenticates the client's HTTPS session. + +From there, the forum node should be able to take actions on behalf of the client. +Most of these actions will occur initially within the off-chain context. +Eventually however, the results of the actions should be encoded in an on-chain validation vote. + +The above step 3, check on-chain reputation for given key, may be prohibitively expensive. +Here's a way we might deal with that. +When the forum node receives the signature from the client, we can store it along with the data representing other forum activities; we can provisionally accept the offered public key from the client, and use it for the purposes of computing +reputation effects from forum activities. It could be verified asynchronously. If it turns out to have an issue, however, +then we would have to remediate our results before finalizing. +Here's another approach. +Each client will need to pay a small fee to register with the forum. +This would cover the cost of the on-chain transaction which is needed in order for the forum node to verify the client's reputation. +Once verified, the forum will store the client's public key. The client will then be able to authenticate with the off-chain forum network. +Certain actions in the forum will involve an associate staking reputation. +The plan is for the off-chain network to keep track of these actions, +and periodically vote on-chain to enact their results. +In order for these reputation stakes to be realized on-chain, +the Forum Validator Contract must empowered to apply the resulting reputation effects. +Otherwise, the user agent would need to engage directly with the blockchain. + +How shall we fund the forum nodes to deploy the necessary calls to smart contracts? + +Maybe it should be possible to submit a fee in order to fund a given forum node, and +thereby to gain some reputation, and thus receive a share of the fees that the DAO earns. +This would also suggest the need for a votable parameter to tune the proportion of these rewards. + +## Should network node contract voters consist of network nodes, or voting associates? +Network nodes should be resistant to DOS attacks by restricting white-listed peers to the list obtained from the network node contract on-chain. +But what if a whitelisted node starts misbehaving? +A network node that notices a problem with another network node can: +- Locally graylist or blacklist the offending peer. +- Attempt to notify its human operator, who may then cast an on-chain vote against the offender. +- Attempt to notify its network peers, who may then graylist or blacklist the offending peer. +- Automatically cast an on-chain vote aginst the offender. + +Let's consider the possibility of nodes notifying each other of problematic behavior of other nodes. +What if a bad node sends messages to its peers attempting to gray/blacklist a good node? +This suggests that each node should listen for such messages from peers, but should require +some number of them before taking action. +Perhaps the degree of graylisting can build up with additional reports from other peers. + +It should be expected that people will attempt to attack the network nodes. +If we enforce whitelisting by reputation stakes via on-chain network node contract, +we raise the bar considerably for a successful attack. +Remaining threat models: +- A reputation holder may attempt to act against the interests of the DAO. +- A supply chain attack may occur against the network node or user agent +- A man-in-the-middle attack may occur between network nodes, or between user agents and network nodes +- A network node may be compromised. +- A user agent may be compromised. + +Among these threats, the supply chain attack against the network node is the most severe. +The other threats are limited because individual nodes, clients, or accounts must be compromised one by one. +But a supply chain attack may compromise many nodes, clients, or accounts. +Therefore, securing the supply chain is a top priority for this system. + +## What's the desired timing of the process to initiate a new network node? +If the initiator already has reputation available to stake, maybe the process shouldn't take very long. +However, it's an action with serious repercussions. If network nodes can be added quickly, +then anyone in control of a disproportionately large amount of reputation for whatever reason, +could potentially quickly add a fleet of new network nodes and execute a 51% attack on the forum. +So, on the order of hours to days seems reasonable to me. Also the answer may depend on the current number of network nodes. +When there are more, it will make sense to add them in batches, and it might be nice to expedite that process to an appropriate degree. +On the other hand, there may not be a legitimate value in adding many new network nodes in the same physical location. +If they're going to be spreat out in space, their activation might as well be spread out in time. \ No newline at end of file diff --git a/notes.md b/notes.md new file mode 100644 index 0000000..bcd827b --- /dev/null +++ b/notes.md @@ -0,0 +1,4 @@ + + +```mermaid +``` \ No newline at end of file diff --git a/semantic-scholar-client/Cargo.lock b/semantic-scholar-client/Cargo.lock index da8f256..f2cf698 100644 --- a/semantic-scholar-client/Cargo.lock +++ b/semantic-scholar-client/Cargo.lock @@ -2,6 +2,17 @@ # It is not intended for manual editing. version = 3 +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + [[package]] name = "async-recursion" version = "1.0.0" @@ -13,6 +24,17 @@ dependencies = [ "syn", ] +[[package]] +name = "async-trait" +version = "0.1.56" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96cf8829f67d2eab0b2dfa42c5d0ef737e0724e4a82b01b3e292456202b19716" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "atty" version = "0.2.14" @@ -42,6 +64,34 @@ version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" +[[package]] +name = "block-buffer" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf7fe51849ea569fd452f37822f606a5cabb684dc918707a0193fd4664ff324" +dependencies = [ + "generic-array", +] + +[[package]] +name = "bson" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a24ecf39f5a314493ede1bb015984735d41aa6aedb59cafb95492d40cd893330" +dependencies = [ + "ahash", + "base64", + "hex", + "indexmap", + "lazy_static", + "rand", + "serde", + "serde_bytes", + "serde_json", + "time 0.3.11", + "uuid", +] + [[package]] name = "bumpalo" version = "3.10.0" @@ -66,6 +116,19 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time 0.1.44", + "winapi", +] + [[package]] name = "clap" version = "3.2.11" @@ -121,6 +184,88 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5827cebf4670468b8772dd191856768aedcb1b0278a04f989f7766351917b9dc" +[[package]] +name = "cpufeatures" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59a6001667ab124aebae2a495118e11d30984c3a653e99d86d58971708cf5e4b" +dependencies = [ + "libc", +] + +[[package]] +name = "crypto-common" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ccfd8c0ee4cce11e45b3fd6f9d5e69e0cc62912aa6a0cb1bf4617b0eba5a12f" +dependencies = [ + "generic-array", + "typenum", +] + +[[package]] +name = "darling" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a01d95850c592940db9b8194bc39f4bc0e89dee5c4265e4b1807c34a9aba453c" +dependencies = [ + "darling_core", + "darling_macro", +] + +[[package]] +name = "darling_core" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "859d65a907b6852c9361e3185c862aae7fafd2887876799fa55f5f99dc40d610" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.13.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c972679f83bdf9c42bd905396b6c3588a843a17f0f16dfcfa3e2c5d57441835" +dependencies = [ + "darling_core", + "quote", + "syn", +] + +[[package]] +name = "data-encoding" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ee2393c4a91429dffb4bedf19f4d6abf27d8a732c8ce4980305d782e5426d57" + +[[package]] +name = "derivative" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcc3dd5e9e9c0b295d6e1e4d811fb6f157d5ffd784b8d202fc62eac8035a770b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "digest" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2fb860ca6fafa5552fb6d0e816a69c8e49f0908bf524e30a90d97c85892d506" +dependencies = [ + "block-buffer", + "crypto-common", + "subtle", +] + [[package]] name = "encoding_rs" version = "0.8.31" @@ -130,6 +275,18 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "enum-as-inner" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21cdad81446a7f7dc43f6a77409efeb9733d2fa65553efef6018ef257c959b73" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "fastrand" version = "1.7.0" @@ -185,6 +342,34 @@ version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c09fd04b7e4073ac7156a9539b57a484a8ea920f79c7c675d05d289ab6110d3" +[[package]] +name = "futures-executor" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9420b90cfa29e327d0429f19be13e7ddb68fa1cccb09d65e5706b8c7a749b8a6" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc4045962a5a5e935ee2fdedaa4e08284547402885ab326734432bed5d12966b" + +[[package]] +name = "futures-macro" +version = "0.3.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33c1e13800337f4d4d7a316bf45a567dbcb6ffe087f16424852d97e97a91f512" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "futures-sink" version = "0.3.21" @@ -204,9 +389,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d8b7abd5d659d9b90c8cba917f6ec750a74e2dc23902ef9cd4cc8c8b22e6036a" dependencies = [ "futures-core", + "futures-io", + "futures-macro", "futures-task", + "memchr", "pin-project-lite", "pin-utils", + "slab", +] + +[[package]] +name = "generic-array" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd48d33ec7f05fbfa152300fdad764757cbded343c1aa1cff2fbaf4134851803" +dependencies = [ + "typenum", + "version_check", +] + +[[package]] +name = "getrandom" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb1a864a501629691edf6c15a593b7a51eebaa1e8468e9ddc623de7c9b58ec6" +dependencies = [ + "cfg-if", + "libc", + "wasi 0.11.0+wasi-snapshot-preview1", ] [[package]] @@ -249,6 +459,32 @@ dependencies = [ "libc", ] +[[package]] +name = "hex" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" + +[[package]] +name = "hmac" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" +dependencies = [ + "digest", +] + +[[package]] +name = "hostname" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c731c3e10504cc8ed35cfe2f1db4c9274c3d35fa486e3b31df46f068ef3e867" +dependencies = [ + "libc", + "match_cfg", + "winapi", +] + [[package]] name = "http" version = "0.2.8" @@ -320,6 +556,12 @@ dependencies = [ "tokio-native-tls", ] +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + [[package]] name = "idna" version = "0.2.3" @@ -350,6 +592,18 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "ipconfig" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "723519edce41262b05d4143ceb95050e4c614f483e78e9fd9e39a8275a84ad98" +dependencies = [ + "socket2", + "widestring", + "winapi", + "winreg 0.7.0", +] + [[package]] name = "ipnet" version = "2.5.0" @@ -383,6 +637,12 @@ version = "0.2.126" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "349d5a591cd28b49e1d1037471617a32ddcda5731b99419008085f72d5a53836" +[[package]] +name = "linked-hash-map" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" + [[package]] name = "lock_api" version = "0.4.7" @@ -402,12 +662,36 @@ dependencies = [ "cfg-if", ] +[[package]] +name = "lru-cache" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "31e24f1ad8321ca0e8a1e0ac13f23cb668e6f5466c2c57319f6a5cf1cc8e3b1c" +dependencies = [ + "linked-hash-map", +] + +[[package]] +name = "match_cfg" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffbee8634e0d45d258acb448e7eaab3fce7a0a467395d4d9f228e3c1f01fb2e4" + [[package]] name = "matches" version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a3e378b66a060d48947b590737b30a1be76706c8dd7b8ba0f2fe3989c68a853f" +[[package]] +name = "md-5" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "658646b21e0b72f7866c7038ab086d3d5e1cd6271f060fd37defb241949d0582" +dependencies = [ + "digest", +] + [[package]] name = "memchr" version = "2.5.0" @@ -428,10 +712,56 @@ checksum = "57ee1c23c7c63b0c9250c339ffdc69255f110b298b901b9f6c82547b7b87caaf" dependencies = [ "libc", "log", - "wasi", + "wasi 0.11.0+wasi-snapshot-preview1", "windows-sys", ] +[[package]] +name = "mongodb" +version = "2.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28f3943e379e9dcaaab9dc319c308a8caaf9e7ff083c6838dff740afbba59df7" +dependencies = [ + "async-trait", + "base64", + "bitflags", + "bson", + "chrono", + "derivative", + "futures-core", + "futures-executor", + "futures-util", + "hex", + "hmac", + "lazy_static", + "md-5", + "os_info", + "pbkdf2", + "percent-encoding", + "rand", + "rustc_version_runtime", + "rustls", + "rustls-pemfile", + "serde", + "serde_bytes", + "serde_with", + "sha-1", + "sha2", + "socket2", + "stringprep", + "strsim", + "take_mut", + "thiserror", + "tokio", + "tokio-rustls", + "tokio-util", + "trust-dns-proto", + "trust-dns-resolver", + "typed-builder", + "uuid", + "webpki-roots", +] + [[package]] name = "native-tls" version = "0.2.10" @@ -450,6 +780,25 @@ dependencies = [ "tempfile", ] +[[package]] +name = "num-integer" +version = "0.1.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "578ede34cf02f8924ab9447f50c28075b4d3e5b269972345e7e0372b38c6cdcd" +dependencies = [ + "autocfg", +] + [[package]] name = "num_cpus" version = "1.13.1" @@ -460,6 +809,15 @@ dependencies = [ "libc", ] +[[package]] +name = "num_threads" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2819ce041d2ee131036f4fc9d6ae7ae125a3a40e97ba64d04fe799ad9dabbb44" +dependencies = [ + "libc", +] + [[package]] name = "once_cell" version = "1.13.0" @@ -511,6 +869,16 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "os_info" +version = "3.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eca3ecae1481e12c3d9379ec541b238a16f0b75c9a409942daa8ec20dbfdb62" +dependencies = [ + "log", + "winapi", +] + [[package]] name = "os_str_bytes" version = "6.1.0" @@ -540,6 +908,15 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "pbkdf2" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "271779f35b581956db91a3e55737327a03aa051e90b1c47aeb189508533adfd7" +dependencies = [ + "digest", +] + [[package]] name = "percent-encoding" version = "2.1.0" @@ -564,6 +941,12 @@ version = "0.3.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae" +[[package]] +name = "ppv-lite86" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872" + [[package]] name = "proc-macro-error" version = "1.0.4" @@ -597,6 +980,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "quick-error" +version = "1.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" + [[package]] name = "quote" version = "1.0.20" @@ -606,6 +995,36 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom", +] + [[package]] name = "redox_syscall" version = "0.2.13" @@ -658,7 +1077,72 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "winreg", + "winreg 0.10.1", +] + +[[package]] +name = "resolv-conf" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52e44394d2086d010551b14b53b1f24e31647570cd1deb0379e2c21b329aba00" +dependencies = [ + "hostname", + "quick-error", +] + +[[package]] +name = "ring" +version = "0.16.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" +dependencies = [ + "cc", + "libc", + "once_cell", + "spin", + "untrusted", + "web-sys", + "winapi", +] + +[[package]] +name = "rustc_version" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" +dependencies = [ + "semver", +] + +[[package]] +name = "rustc_version_runtime" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d31b7153270ebf48bf91c65ae5b0c00e749c4cfad505f66530ac74950249582f" +dependencies = [ + "rustc_version", + "semver", +] + +[[package]] +name = "rustls" +version = "0.20.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aab8ee6c7097ed6057f43c187a62418d0c05a4bd5f18b3571db50ee0f9ce033" +dependencies = [ + "log", + "ring", + "sct", + "webpki", +] + +[[package]] +name = "rustls-pemfile" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ee86d63972a7c661d1536fefe8c3c8407321c3df668891286de28abcd087360" +dependencies = [ + "base64", ] [[package]] @@ -683,6 +1167,16 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "sct" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d53dcdb7c9f8158937a7981b48accfd39a43af418591a5d008c7b22b5e1b7ca4" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "security-framework" version = "2.6.1" @@ -712,12 +1206,28 @@ version = "0.1.0" dependencies = [ "async-recursion", "clap", + "mongodb", "reqwest", "serde", "serde_json", "tokio", ] +[[package]] +name = "semver" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403" +dependencies = [ + "semver-parser", +] + +[[package]] +name = "semver-parser" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3" + [[package]] name = "serde" version = "1.0.139" @@ -727,6 +1237,15 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde_bytes" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "212e73464ebcde48d723aa02eb270ba62eff38a9b732df31f33f1b4e145f3a54" +dependencies = [ + "serde", +] + [[package]] name = "serde_derive" version = "1.0.139" @@ -744,6 +1263,7 @@ version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "82c2c1fdcd807d1098552c5b9a36e425e42e9fbd7c6a37a8425f390f781f7fa7" dependencies = [ + "indexmap", "itoa", "ryu", "serde", @@ -761,6 +1281,50 @@ dependencies = [ "serde", ] +[[package]] +name = "serde_with" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "678b5a069e50bf00ecd22d0cd8ddf7c236f68581b03db652061ed5eb13a312ff" +dependencies = [ + "serde", + "serde_with_macros", +] + +[[package]] +name = "serde_with_macros" +version = "1.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e182d6ec6f05393cc0e5ed1bf81ad6db3a8feedf8ee515ecdd369809bcce8082" +dependencies = [ + "darling", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "sha-1" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "028f48d513f9678cda28f6e4064755b3fbb2af6acd672f2c209b62323f7aea0f" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + +[[package]] +name = "sha2" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "55deaec60f81eefe3cce0dc50bda92d6d8e88f2a27df7c5033b42afeb1ed2676" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "signal-hook-registry" version = "1.4.0" @@ -792,12 +1356,34 @@ dependencies = [ "winapi", ] +[[package]] +name = "spin" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" + +[[package]] +name = "stringprep" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ee348cb74b87454fff4b551cbf727025810a004f88aeacae7f85b87f4e9a1c1" +dependencies = [ + "unicode-bidi", + "unicode-normalization", +] + [[package]] name = "strsim" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" +[[package]] +name = "subtle" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6bdef32e8150c2a081110b42772ffe7d7c9032b606bc226c8260fd97e0976601" + [[package]] name = "syn" version = "1.0.98" @@ -809,6 +1395,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "take_mut" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" + [[package]] name = "tempfile" version = "3.3.0" @@ -838,6 +1430,55 @@ version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1141d4d61095b28419e22cb0bbf02755f5e54e0526f97f1e3d1d160e60885fb" +[[package]] +name = "thiserror" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd829fe32373d27f76265620b5309d0340cb8550f523c1dda251d6298069069a" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0396bc89e626244658bef819e22d0cc459e795a5ebe878e6ec336d1674a8d79a" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "time" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +dependencies = [ + "libc", + "wasi 0.10.0+wasi-snapshot-preview1", + "winapi", +] + +[[package]] +name = "time" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72c91f41dcb2f096c05f0873d667dceec1087ce5bcf984ec8ffb19acddbb3217" +dependencies = [ + "itoa", + "libc", + "num_threads", + "time-macros", +] + +[[package]] +name = "time-macros" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42657b1a6f4d817cda8e7a0ace261fe0cc946cf3a80314390b22cc61ae080792" + [[package]] name = "tinyvec" version = "1.6.0" @@ -895,6 +1536,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-rustls" +version = "0.23.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" +dependencies = [ + "rustls", + "tokio", + "webpki", +] + [[package]] name = "tokio-util" version = "0.7.3" @@ -935,12 +1587,74 @@ dependencies = [ "once_cell", ] +[[package]] +name = "trust-dns-proto" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c31f240f59877c3d4bb3b3ea0ec5a6a0cff07323580ff8c7a605cd7d08b255d" +dependencies = [ + "async-trait", + "cfg-if", + "data-encoding", + "enum-as-inner", + "futures-channel", + "futures-io", + "futures-util", + "idna", + "ipnet", + "lazy_static", + "log", + "rand", + "smallvec", + "thiserror", + "tinyvec", + "tokio", + "url", +] + +[[package]] +name = "trust-dns-resolver" +version = "0.21.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4ba72c2ea84515690c9fcef4c6c660bb9df3036ed1051686de84605b74fd558" +dependencies = [ + "cfg-if", + "futures-util", + "ipconfig", + "lazy_static", + "log", + "lru-cache", + "parking_lot", + "resolv-conf", + "smallvec", + "thiserror", + "tokio", + "trust-dns-proto", +] + [[package]] name = "try-lock" version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "59547bce71d9c38b83d9c0e92b6066c4253371f15005def0c30d9657f50c7642" +[[package]] +name = "typed-builder" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89851716b67b937e393b3daa8423e67ddfc4bbbf1654bcf05488e95e0828db0c" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "typenum" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dcf81ac59edc17cc8697ff311e8f5ef2d99fcbd9817b34cec66f90b6c3dfd987" + [[package]] name = "unicode-bidi" version = "0.3.8" @@ -962,6 +1676,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "untrusted" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" + [[package]] name = "url" version = "2.2.2" @@ -974,6 +1694,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "uuid" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" +dependencies = [ + "getrandom", + "serde", +] + [[package]] name = "vcpkg" version = "0.2.15" @@ -996,6 +1726,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -1078,6 +1814,31 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f095d78192e208183081cc07bc5515ef55216397af48b873e5edcd72637fa1bd" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "webpki-roots" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1c760f0d366a6c24a02ed7816e23e691f5d92291f94d15e836006fd11b04daf" +dependencies = [ + "webpki", +] + +[[package]] +name = "widestring" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "17882f045410753661207383517a6f62ec3dbeb6a4ed2acce01f0728238d1983" + [[package]] name = "winapi" version = "0.3.9" @@ -1152,6 +1913,15 @@ version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" +[[package]] +name = "winreg" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0120db82e8a1e0b9fb3345a539c478767c0048d842860994d96113d5b667bd69" +dependencies = [ + "winapi", +] + [[package]] name = "winreg" version = "0.10.1" diff --git a/semantic-scholar-client/Cargo.toml b/semantic-scholar-client/Cargo.toml index a3aad49..baaf309 100644 --- a/semantic-scholar-client/Cargo.toml +++ b/semantic-scholar-client/Cargo.toml @@ -8,6 +8,7 @@ edition = "2021" [dependencies] async-recursion = "1.0.0" clap = { version = "3.2.11", features = ["derive"] } +mongodb = "2.2.2" reqwest = { version = "0.11.11", features = ["json"] } serde = { version = "1.0.139", features = ["derive"] } serde_json = "1.0.82" diff --git a/semantic-scholar-client/README.md b/semantic-scholar-client/README.md index 2ff88f0..b8c6b3f 100644 --- a/semantic-scholar-client/README.md +++ b/semantic-scholar-client/README.md @@ -1,16 +1,21 @@ -This utility is able to fetch data fromSemantic Scholar API. +#`semantic-scholar-client` -Initial proof of concept here writes the result to stdout. Work in progress to pipe this data into an operating database. +This utility is able to fetch data from Semantic Scholar API. + +Initial proof of concept here writes the result to stdout. + +Work in progress to pipe this data into an operating database. ### Usage - cargo run -- --paper-id + cargo run -- --paper-id --depth -`paper_id` is in accordance with [Semantic Scholar API](https://api.semanticscholar.org/api-docs/) rules. +* `paper_id` values are in accordance with [Semantic Scholar API](https://api.semanticscholar.org/api-docs/). +* `depth` is the number of citations to traverse, from the starting paper. -In its current form the utility outputs the citation graph up to a depth of 3 citations. +### Notes -Next steps include -- developing strategies for deciding where to terminate a given traversal -- providing an HTTP and perhaps WebSocket interface that can be used to talk to this process during its operation. - This can enable us to pipe the data to other tasks, to monitor, to start/stop, and even to make configuration changes. \ No newline at end of file +Ideas for followup work: +- Consider strategies for deciding where to terminate a given traversal +- Provide an HTTP/WebSocket interface that can be used to talk to this process during its operation. + This can enable us to pipe the data to other tasks, to monitor, to start/stop, and even to make configuration changes. diff --git a/semantic-scholar-client/src/bin/import.rs b/semantic-scholar-client/src/bin/import.rs new file mode 100644 index 0000000..d741bfe --- /dev/null +++ b/semantic-scholar-client/src/bin/import.rs @@ -0,0 +1,147 @@ +// During development, allowing dead code +#![allow(dead_code)] + +use async_recursion::async_recursion; +use clap::Parser; +use std::cmp::min; +use std::fmt::Write; +use std::error::Error; +use serde::Deserialize; + +type DataResult = Result>; + +const BASE_URL: &str = "https://api.semanticscholar.org/graph/v1"; +const MAX_DEPTH: u32 = 3; + +#[derive(Parser, Debug)] +#[clap(author, version, about, long_about = None)] +struct Args { + /// How deep to traverse citation graph from the starting paper + #[clap(short, long, value_parser)] + depth: u32, + + /// Starting paper. We will traverse papers that cite this one + #[clap(short, long, value_parser)] + paper_id: String, + + // Write the results to MongoDB + // #[clap(short, long, value_parser)] + // write_to_mongo: bool, +} + +struct Author { + name: String +} + +type Authors = Vec; + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct Paper { + paper_id: String, + title: Option, + citations: Vec, +} + +/** + * Occurs within Citation struct + */ +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct CitingPaper { + paper_id: Option, + title: Option, +} + +#[derive(Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +struct Citation { + citing_paper: CitingPaper +} + +/** + * Generic struct to wrap the common API response pattern {data: [...]} + */ +#[derive(Deserialize, Debug)] +struct ApiListResponse { + data: Vec +} + + +// TODO: Cache results in a (separate but local) database such as Redis +// TODO: Store results in a (separate but local) database such as Postgres +#[async_recursion] +async fn get_citations(paper_id: String, depth: u32, authors: &mut Vec) -> DataResult> { + // Bound recursion to some depth + if depth > MAX_DEPTH { + return Ok(vec![]); + } + + // Build the URL + let mut url = String::new(); + write!(&mut url, "{}/paper/{}/citations", BASE_URL, paper_id)?; + + let resp = reqwest::get(url) + .await? + .text() + .await?; + + let resp_deserialized_attempt = serde_json::from_str::>(resp.as_str()); + + if let Err(err) = resp_deserialized_attempt { + println!("depth {} paper {} error {}", depth, paper_id, err); + return Ok(vec![]); + } + + let resp_deserialized: ApiListResponse = resp_deserialized_attempt.unwrap(); + + for Citation{citing_paper: CitingPaper{paper_id: citing_paper_id, title}} in resp_deserialized.data { + if let (Some(citing_paper_id), Some(title)) = (citing_paper_id, title) { + let short_len = min(50, title.len()); + let (short_title, _) = title.split_at(short_len); + println!("depth {} paper {} cites {} title {}", depth, citing_paper_id, paper_id, short_title); + + get_citations(citing_paper_id, depth + 1, authors).await?; + } + } + + Ok(vec![]) +} + +async fn get_paper_info(paper_id: String, depth: u32, authors: &mut Authors) -> DataResult> { + // Build the URL + let mut url = String::new(); + // Probably also want: year,publicationDate,journal", BASE_URL, paper_id)?; + const fields: &str = "title, authors, citations"; + write!(&mut url, "{}/paper/{}?fields={}", BASE_URL, paper_id, fields)?; + + let resp = reqwest::get(url) + .await? + .text() + .await?; + + let resp_deserialized_attempt = serde_json::from_str::>(resp.as_str()); + + if let Err(err) = resp_deserialized_attempt { + println!("depth {} paper {} error {}", depth, paper_id, err); + return Ok(vec![]); + } + + let resp_deserialized: ApiListResponse = resp_deserialized_attempt.unwrap(); + Ok(vec![]) +} + +#[tokio::main] +async fn main() -> Result<(), Box> { + let Args{ + depth, + paper_id, + // write_to_mongo, + } = Args::parse(); + + let mut authors = Authors::new(); + + get_citations(paper_id, depth, &mut authors).await?; + + Ok(()) +} \ No newline at end of file diff --git a/semantic-scholar-client/src/bin/mongo.rs b/semantic-scholar-client/src/bin/mongo.rs new file mode 100644 index 0000000..c45d641 --- /dev/null +++ b/semantic-scholar-client/src/bin/mongo.rs @@ -0,0 +1,59 @@ + +use mongodb::{Client, options::ClientOptions}; + +const MONGO_DB_ADDRESS: &str = "mongodb://docker:mongopw@localhost:55000"; + +#[tokio::main] +async fn main() -> Result<(), Box> { + // Parse a connection string into an options struct. + let client_options = ClientOptions::parse(MONGO_DB_ADDRESS).await?; + + // Get a handle to the deployment. + let client = Client::with_options(client_options)?; + + // Try creating a collection + { + use serde::{Deserialize, Serialize}; + + #[derive(Debug, Serialize, Deserialize)] + struct Book { + title: String, + author: String, + } + + // Reference a (new) database + let db = client.database("db2"); + + // Get a handle to a collection of `Book`. + let typed_collection = db.collection::("books"); + + let books = vec![ + Book { + title: "The Grapes of Wrath".to_string(), + author: "John Steinbeck".to_string(), + }, + Book { + title: "To Kill a Mockingbird".to_string(), + author: "Harper Lee".to_string(), + }, + ]; + + // Insert the books into "mydb.books" collection, no manual conversion to BSON necessary. + typed_collection.insert_many(books, None).await?; + } + + // List the names of the databases in that deployment. + for db_name in client.list_database_names(None, None).await? { + println!("{}", db_name); + // Get a handle to a database. + let db = client.database(db_name.as_str()); + + // List the names of the collections in that database. + for collection_name in db.list_collection_names(None).await? { + println!("- {}", collection_name); + } + } + + + Ok(()) +} \ No newline at end of file diff --git a/semantic-scholar-client/src/main.rs b/semantic-scholar-client/src/main.rs deleted file mode 100644 index 3796e39..0000000 --- a/semantic-scholar-client/src/main.rs +++ /dev/null @@ -1,110 +0,0 @@ -// During development, allowing dead code -#![allow(dead_code)] - -use async_recursion::async_recursion; -use clap::Parser; -use std::fmt::Write; -use std::error::Error; -// use std::time::Duration; -// use std::thread::sleep; -use serde::Deserialize; - -type DataResult = Result>; - -const BASE_URL: &str = "https://api.semanticscholar.org/graph/v1"; -const MAX_DEPTH: u32 = 3; - -#[derive(Parser, Debug)] -#[clap(author, version, about, long_about = None)] -struct Args { - /// URL to query - #[clap(short, long, value_parser)] - paper_id: String, -} - -struct Author { - name: String -} -struct Paper { - authors: Vec -} - -/** - * Occurs within Citation struct - */ -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -struct CitingPaper { - paper_id: String, - title: String, -} - -#[derive(Deserialize, Debug)] -#[serde(rename_all = "camelCase")] -struct Citation { - citing_paper: CitingPaper -} - -/** - * Generic struct to wrap the common API response pattern {data: [...]} - */ -#[derive(Deserialize, Debug)] -struct ApiListResponse { - data: Vec -} - - -// TODO: Cache results in a (separate but local) database such as Redis -// TODO: Store results in a (separate but local) database such as Postgres -#[async_recursion] -async fn get_citations(paper_id: String, depth: u32) -> DataResult> { - // Bound recursion to some depth - if depth > MAX_DEPTH { - return Ok(vec![]); - } - - // Naieve approach to possible rate-limiting - // sleep(Duration::new(1, 0)); - - // Build the URL - let mut url = String::new(); - write!(&mut url, "{}/paper/{}/citations", BASE_URL, paper_id)?; - - let resp = reqwest::get(url) - .await? - .text() - .await?; - // .json::().await?; - - let resp_deserialized_attempt = serde_json::from_str::>(resp.as_str()); - - if let Err(err) = resp_deserialized_attempt { - println!("depth {} paper {} error {}", depth, paper_id, err); - return Ok(vec![]); - } - - let resp_deserialized: ApiListResponse = resp_deserialized_attempt.unwrap(); - - for Citation{citing_paper: CitingPaper{paper_id: cited_id, title}} in resp_deserialized.data { - println!("depth {} paper {} cites {} title {}", depth, paper_id, cited_id, title); - - get_citations(cited_id, depth + 1).await?; - } - - Ok(vec![]) -} - - - -#[tokio::main] -async fn main() -> Result<(), Box> { - let Args{ paper_id } = Args::parse(); - - // let mut authors: Vec = Vec::new(); - - // let citations = - - get_citations(paper_id, 0).await?; - - Ok(()) -} \ No newline at end of file From c80f2ee79bbe2c18810a1b6dd4bd2bb9a7844d4f Mon Sep 17 00:00:00 2001 From: Ladd Hoffman Date: Thu, 4 Apr 2024 11:56:58 -0500 Subject: [PATCH 4/4] sematic scholar api key support --- semantic-scholar-client/.env.example | 1 + semantic-scholar-client/.gitignore | 1 + semantic-scholar-client/Cargo.lock | 7 ++ semantic-scholar-client/Cargo.toml | 2 + semantic-scholar-client/README.md | 4 + semantic-scholar-client/src/bin/import.rs | 104 ++++++++++++---------- 6 files changed, 70 insertions(+), 49 deletions(-) create mode 100644 semantic-scholar-client/.env.example diff --git a/semantic-scholar-client/.env.example b/semantic-scholar-client/.env.example new file mode 100644 index 0000000..059442f --- /dev/null +++ b/semantic-scholar-client/.env.example @@ -0,0 +1 @@ +SEMANTIC_SCHOLAR_API_KEY= \ No newline at end of file diff --git a/semantic-scholar-client/.gitignore b/semantic-scholar-client/.gitignore index ea8c4bf..fedaa2b 100644 --- a/semantic-scholar-client/.gitignore +++ b/semantic-scholar-client/.gitignore @@ -1 +1,2 @@ /target +.env diff --git a/semantic-scholar-client/Cargo.lock b/semantic-scholar-client/Cargo.lock index f2cf698..e41ebf6 100644 --- a/semantic-scholar-client/Cargo.lock +++ b/semantic-scholar-client/Cargo.lock @@ -266,6 +266,12 @@ dependencies = [ "subtle", ] +[[package]] +name = "dotenv" +version = "0.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77c90badedccf4105eca100756a0b1289e191f6fcbdadd3cee1d2f614f97da8f" + [[package]] name = "encoding_rs" version = "0.8.31" @@ -1206,6 +1212,7 @@ version = "0.1.0" dependencies = [ "async-recursion", "clap", + "dotenv", "mongodb", "reqwest", "serde", diff --git a/semantic-scholar-client/Cargo.toml b/semantic-scholar-client/Cargo.toml index baaf309..1102ca1 100644 --- a/semantic-scholar-client/Cargo.toml +++ b/semantic-scholar-client/Cargo.toml @@ -2,12 +2,14 @@ name = "semantic-scholar-client" version = "0.1.0" edition = "2021" +default-run = "import" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] async-recursion = "1.0.0" clap = { version = "3.2.11", features = ["derive"] } +dotenv = "0.15.0" mongodb = "2.2.2" reqwest = { version = "0.11.11", features = ["json"] } serde = { version = "1.0.139", features = ["derive"] } diff --git a/semantic-scholar-client/README.md b/semantic-scholar-client/README.md index b8c6b3f..8e1ab37 100644 --- a/semantic-scholar-client/README.md +++ b/semantic-scholar-client/README.md @@ -8,6 +8,9 @@ Work in progress to pipe this data into an operating database. ### Usage +* (Optional) Copy `.env.example` to `.env` and set the value of `SEMANTIC_SCHOLAR_API_KEY` +* Run the program + cargo run -- --paper-id --depth * `paper_id` values are in accordance with [Semantic Scholar API](https://api.semanticscholar.org/api-docs/). @@ -19,3 +22,4 @@ Ideas for followup work: - Consider strategies for deciding where to terminate a given traversal - Provide an HTTP/WebSocket interface that can be used to talk to this process during its operation. This can enable us to pipe the data to other tasks, to monitor, to start/stop, and even to make configuration changes. +- Rate limit requests \ No newline at end of file diff --git a/semantic-scholar-client/src/bin/import.rs b/semantic-scholar-client/src/bin/import.rs index d741bfe..7857b4f 100644 --- a/semantic-scholar-client/src/bin/import.rs +++ b/semantic-scholar-client/src/bin/import.rs @@ -3,10 +3,11 @@ use async_recursion::async_recursion; use clap::Parser; -use std::cmp::min; -use std::fmt::Write; -use std::error::Error; +use dotenv::dotenv; use serde::Deserialize; +use std::cmp::min; +use std::error::Error; +use std::fmt::Write; type DataResult = Result>; @@ -23,14 +24,13 @@ struct Args { /// Starting paper. We will traverse papers that cite this one #[clap(short, long, value_parser)] paper_id: String, - // Write the results to MongoDB // #[clap(short, long, value_parser)] // write_to_mongo: bool, } struct Author { - name: String + name: String, } type Authors = Vec; @@ -56,22 +56,28 @@ struct CitingPaper { #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase")] struct Citation { - citing_paper: CitingPaper + citing_paper: CitingPaper, } /** - * Generic struct to wrap the common API response pattern {data: [...]} - */ + code: Option, +* Generic struct to wrap the common API response pattern {data: [...]} +*/ #[derive(Deserialize, Debug)] struct ApiListResponse { - data: Vec + data: Option>, + message: Option, } - // TODO: Cache results in a (separate but local) database such as Redis // TODO: Store results in a (separate but local) database such as Postgres #[async_recursion] -async fn get_citations(paper_id: String, depth: u32, authors: &mut Vec) -> DataResult> { +async fn get_citations( + client: &reqwest::Client, + paper_id: String, + depth: u32, + authors: &mut Vec, +) -> DataResult> { // Bound recursion to some depth if depth > MAX_DEPTH { return Ok(vec![]); @@ -81,12 +87,15 @@ async fn get_citations(paper_id: String, depth: u32, authors: &mut Vec) let mut url = String::new(); write!(&mut url, "{}/paper/{}/citations", BASE_URL, paper_id)?; - let resp = reqwest::get(url) - .await? - .text() - .await?; + let mut req = client.get(url); + let api_key = std::env::var("SEMANTIC_SCHOLAR_API_KEY"); + if api_key.is_ok() { + req = req.header("x-api-key", api_key.unwrap()); + } + let resp = req.send().await?.text().await?; - let resp_deserialized_attempt = serde_json::from_str::>(resp.as_str()); + let resp_deserialized_attempt = + serde_json::from_str::>(resp.as_str()); if let Err(err) = resp_deserialized_attempt { println!("depth {} paper {} error {}", depth, paper_id, err); @@ -95,53 +104,50 @@ async fn get_citations(paper_id: String, depth: u32, authors: &mut Vec) let resp_deserialized: ApiListResponse = resp_deserialized_attempt.unwrap(); - for Citation{citing_paper: CitingPaper{paper_id: citing_paper_id, title}} in resp_deserialized.data { + if resp_deserialized.message.is_some() { + println!( + "depth {} paper {} error {}", + depth, + paper_id, + resp_deserialized.message.unwrap() + ); + return Ok(vec![]); + } + + for Citation { + citing_paper: + CitingPaper { + paper_id: citing_paper_id, + title, + }, + } in resp_deserialized.data.unwrap() + { if let (Some(citing_paper_id), Some(title)) = (citing_paper_id, title) { let short_len = min(50, title.len()); let (short_title, _) = title.split_at(short_len); - println!("depth {} paper {} cites {} title {}", depth, citing_paper_id, paper_id, short_title); + println!( + "depth {} paper {} cites {} title {}", + depth, citing_paper_id, paper_id, short_title + ); - get_citations(citing_paper_id, depth + 1, authors).await?; + get_citations(&client, citing_paper_id, depth + 1, authors).await?; } } Ok(vec![]) } -async fn get_paper_info(paper_id: String, depth: u32, authors: &mut Authors) -> DataResult> { - // Build the URL - let mut url = String::new(); - // Probably also want: year,publicationDate,journal", BASE_URL, paper_id)?; - const fields: &str = "title, authors, citations"; - write!(&mut url, "{}/paper/{}?fields={}", BASE_URL, paper_id, fields)?; - - let resp = reqwest::get(url) - .await? - .text() - .await?; - - let resp_deserialized_attempt = serde_json::from_str::>(resp.as_str()); - - if let Err(err) = resp_deserialized_attempt { - println!("depth {} paper {} error {}", depth, paper_id, err); - return Ok(vec![]); - } - - let resp_deserialized: ApiListResponse = resp_deserialized_attempt.unwrap(); - Ok(vec![]) -} - #[tokio::main] async fn main() -> Result<(), Box> { - let Args{ - depth, - paper_id, - // write_to_mongo, - } = Args::parse(); + let Args { depth, paper_id } = Args::parse(); + + dotenv().ok(); let mut authors = Authors::new(); - get_citations(paper_id, depth, &mut authors).await?; + let client: reqwest::Client = reqwest::Client::new(); + + get_citations(&client, paper_id, depth, &mut authors).await?; Ok(()) -} \ No newline at end of file +}