backend: first cut at semantic scholar import

This commit is contained in:
Ladd Hoffman 2024-04-19 18:08:18 -05:00
parent 626905bad6
commit 90e5f599b8
11 changed files with 395 additions and 101 deletions

View File

@ -1,2 +1,4 @@
PORT=3000
DATA_DIR="./data"
DATA_DIR="./data"
SEMANTIC_SCHOLAR_API_KEY=
NETWORK="localhost"

View File

@ -0,0 +1,14 @@
{
"localhost": {
"DAO": "0xD60A1c64B96a133587A75C2771690072F238a549",
"Work1": "0xCF3f16D151052FA7b99a71E79EC3b0e6C793aa0b",
"Onboarding": "0xE148e864A646B8bFc95dcc9acd3dBcB52704EE60",
"Proposals": "0x981234BBBC1ec93200F5BB3a65e2F9711A6109aa"
},
"sepolia": {
"DAO": "0x241514DC94568e98222fBE66662b054b545A61AE",
"Work1": "0xc04152a440d8f79099e2049dc19b07EE7f2F8cc0",
"Onboarding": "0xFa5877940e527559320afc1303c06D0fb7E88907",
"Proposals": "0xeA9AF5fF56ef2bfd9DbC1295F1488302c61B92dF"
}
}

View File

@ -1,103 +1,19 @@
const express = require('express');
const { Level } = require('level');
const { recoverPersonalSignature } = require('@metamask/eth-sig-util');
const objectHash = require('object-hash');
const read = require('./src/read');
const write = require('./src/write');
const importFromSS = require('./src/import-from-ss');
require('dotenv').config();
const app = express();
const port = process.env.PORT || 3000;
const dataDir = process.env.DATA_DIR || 'data';
const db = new Level(`${dataDir}/forum`, { valueEncoding: 'json' });
const verifySignature = ({
author, content, signature, embeddedData,
}) => {
let contentToVerify = content;
if (embeddedData && Object.entries(embeddedData).length) {
contentToVerify += `\n\n${JSON.stringify(embeddedData, null, 2)}`;
}
try {
const account = recoverPersonalSignature({ data: contentToVerify, signature });
if (account !== author) {
console.log('error: author does not match signature');
return false;
}
} catch (e) {
console.log('error: failed to recover signature:', e.message);
return false;
}
return true;
};
app.use(express.json());
app.post('/write', async (req, res) => {
const {
body: {
author, content, signature, embeddedData,
},
} = req;
// Check author signature
if (!verifySignature({
author, content, signature, embeddedData,
})) {
res.status(403).end();
return;
}
// Compute content hash
const data = {
author, content, signature, embeddedData,
};
const hash = objectHash(data);
console.log('write', hash);
console.log(data);
// Store content
db.put(hash, data);
// Return hash
res.send(hash);
});
app.get('/read/:hash', async (req, res) => {
const { hash } = req.params;
console.log('read', hash);
// Fetch content
let data;
try {
data = await db.get(req.params.hash);
} catch (e) {
console.log('read error:', e.message, hash);
res.status(e.status).end();
return;
}
data.embeddedData = data.embeddedData || undefined;
console.log(data);
// Verify hash
const derivedHash = objectHash(data);
if (derivedHash !== hash) {
console.log('error: hash mismatch');
res.status(500).end();
return;
}
// Verify signature
if (!verifySignature(data)) {
console.log('error: signature verificaition failed');
res.status(500).end();
return;
}
// Return content
res.json(data);
});
app.post('/write', write);
app.get('/read/:hash', read);
app.post('/importFromSemanticScholar', importFromSS);
app.get('*', (req, res) => {
console.log(`404 req.path: ${req.path}`);

View File

@ -10,8 +10,9 @@
"license": "ISC",
"dependencies": {
"@metamask/eth-sig-util": "^7.0.1",
"axios": "^1.6.7",
"axios": "^1.6.8",
"dotenv": "^16.4.5",
"ethers": "^6.12.0",
"express": "^4.18.2",
"level": "^8.0.1",
"object-hash": "^3.0.0"
@ -34,6 +35,11 @@
"node": ">=0.10.0"
}
},
"node_modules/@adraffy/ens-normalize": {
"version": "1.10.1",
"resolved": "https://registry.npmjs.org/@adraffy/ens-normalize/-/ens-normalize-1.10.1.tgz",
"integrity": "sha512-96Z2IP3mYmF1Xg2cDm8f1gWGf/HUVedQ3FMifV4kG/PQ4yEP51xDtRAEfhVNt5f/uzpNkZHwWQuUcu6D6K+Ekw=="
},
"node_modules/@babel/runtime": {
"version": "7.23.9",
"resolved": "https://registry.npmjs.org/@babel/runtime/-/runtime-7.23.9.tgz",
@ -400,6 +406,11 @@
"resolved": "https://registry.npmjs.org/@types/ms/-/ms-0.7.34.tgz",
"integrity": "sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g=="
},
"node_modules/@types/node": {
"version": "18.15.13",
"resolved": "https://registry.npmjs.org/@types/node/-/node-18.15.13.tgz",
"integrity": "sha512-N+0kuo9KgrUQ1Sn/ifDXsvg0TTleP7rIy4zOBGECxAljqvqfqpTfzx0Q1NUedOixRMBfe2Whhb056a42cWs26Q=="
},
"node_modules/@ungap/structured-clone": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz",
@ -456,6 +467,11 @@
"acorn": "^6.0.0 || ^7.0.0 || ^8.0.0"
}
},
"node_modules/aes-js": {
"version": "4.0.0-beta.5",
"resolved": "https://registry.npmjs.org/aes-js/-/aes-js-4.0.0-beta.5.tgz",
"integrity": "sha512-G965FqalsNyrPqgEGON7nIx1e/OVENSgiEIzyC63haUMuvNnwIgIjMs52hlTCKhkBny7A2ORNlfY9Zu+jmGk1Q=="
},
"node_modules/ajv": {
"version": "6.12.6",
"resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz",
@ -705,11 +721,11 @@
}
},
"node_modules/axios": {
"version": "1.6.7",
"resolved": "https://registry.npmjs.org/axios/-/axios-1.6.7.tgz",
"integrity": "sha512-/hDJGff6/c7u0hDkvkGxR/oy6CbCs8ziCsC7SqmhjfozqiJGc8Z11wrv9z9lYfY4K8l+H9TpjcMDX0xOZmx+RA==",
"version": "1.6.8",
"resolved": "https://registry.npmjs.org/axios/-/axios-1.6.8.tgz",
"integrity": "sha512-v/ZHtJDU39mDpyBoFVkETcd/uNdxrWRrg3bKpOKzXFA6Bvqopts6ALSMU3y6ijYxbw2B+wPrIv46egTzJXCLGQ==",
"dependencies": {
"follow-redirects": "^1.15.4",
"follow-redirects": "^1.15.6",
"form-data": "^4.0.0",
"proxy-from-env": "^1.1.0"
}
@ -1701,6 +1717,55 @@
"@scure/bip39": "1.2.2"
}
},
"node_modules/ethers": {
"version": "6.12.0",
"resolved": "https://registry.npmjs.org/ethers/-/ethers-6.12.0.tgz",
"integrity": "sha512-zL5NlOTjML239gIvtVJuaSk0N9GQLi1Hom3ZWUszE5lDTQE/IVB62mrPkQ2W1bGcZwVGSLaetQbWNQSvI4rGDQ==",
"funding": [
{
"type": "individual",
"url": "https://github.com/sponsors/ethers-io/"
},
{
"type": "individual",
"url": "https://www.buymeacoffee.com/ricmoo"
}
],
"dependencies": {
"@adraffy/ens-normalize": "1.10.1",
"@noble/curves": "1.2.0",
"@noble/hashes": "1.3.2",
"@types/node": "18.15.13",
"aes-js": "4.0.0-beta.5",
"tslib": "2.4.0",
"ws": "8.5.0"
},
"engines": {
"node": ">=14.0.0"
}
},
"node_modules/ethers/node_modules/@noble/curves": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/@noble/curves/-/curves-1.2.0.tgz",
"integrity": "sha512-oYclrNgRaM9SsBUBVbb8M6DTV7ZHRTKugureoYEncY5c65HOmRzvSiTE3y5CYaPYJA/GVkrhXEoF0M3Ya9PMnw==",
"dependencies": {
"@noble/hashes": "1.3.2"
},
"funding": {
"url": "https://paulmillr.com/funding/"
}
},
"node_modules/ethers/node_modules/@noble/hashes": {
"version": "1.3.2",
"resolved": "https://registry.npmjs.org/@noble/hashes/-/hashes-1.3.2.tgz",
"integrity": "sha512-MVC8EAQp7MvEcm30KWENFjgR+Mkmf+D189XJTkFIlwohU5hcBbn1ZkKq7KVTi2Hme3PMGF390DaL52beVrIihQ==",
"engines": {
"node": ">= 16"
},
"funding": {
"url": "https://paulmillr.com/funding/"
}
},
"node_modules/express": {
"version": "4.18.2",
"resolved": "https://registry.npmjs.org/express/-/express-4.18.2.tgz",
@ -1861,9 +1926,9 @@
"dev": true
},
"node_modules/follow-redirects": {
"version": "1.15.5",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.5.tgz",
"integrity": "sha512-vSFWUON1B+yAw1VN4xMfxgn5fTUiaOzAJCKBwIIgT/+7CuGy9+r+5gITvP62j3RmaD5Ph65UaERdOSRGUzZtgw==",
"version": "1.15.6",
"resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.6.tgz",
"integrity": "sha512-wWN62YITEaOpSK584EZXJafH1AGpO8RVgElfkuXbTOrPX4fIfOyEpW/CsiNd8JdYrAoOvafRTOEnvsO++qCqFA==",
"funding": [
{
"type": "individual",
@ -3802,6 +3867,11 @@
"strip-bom": "^3.0.0"
}
},
"node_modules/tslib": {
"version": "2.4.0",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.4.0.tgz",
"integrity": "sha512-d6xOpEDfsi2CZVlPQzGeux8XMwLT9hssAsaPYExaQMuYskwb+x1x7J371tWlbBdWHroy99KnVB6qIkUbs5X3UQ=="
},
"node_modules/tweetnacl": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/tweetnacl/-/tweetnacl-1.0.3.tgz",
@ -4078,6 +4148,26 @@
"integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==",
"dev": true
},
"node_modules/ws": {
"version": "8.5.0",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.5.0.tgz",
"integrity": "sha512-BWX0SWVgLPzYwF8lTzEy1egjhS4S4OEAHfsO8o65WOVsrnSRGaSiUaa9e0ggGlkMTtBlmOpEXiie9RUcBO86qg==",
"engines": {
"node": ">=10.0.0"
},
"peerDependencies": {
"bufferutil": "^4.0.1",
"utf-8-validate": "^5.0.2"
},
"peerDependenciesMeta": {
"bufferutil": {
"optional": true
},
"utf-8-validate": {
"optional": true
}
}
},
"node_modules/yallist": {
"version": "4.0.0",
"resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz",

View File

@ -10,8 +10,9 @@
"license": "ISC",
"dependencies": {
"@metamask/eth-sig-util": "^7.0.1",
"axios": "^1.6.7",
"axios": "^1.6.8",
"dotenv": "^16.4.5",
"ethers": "^6.12.0",
"express": "^4.18.2",
"level": "^8.0.1",
"object-hash": "^3.0.0"

View File

@ -0,0 +1,23 @@
const contractAddresses = require('../contract-addresses.json');
const networks = {
localhost: '0x539',
sepolia: '0xaa36a7',
};
const getContractAddressByNetworkName = (networkName, contractName) => {
const address = contractAddresses[networkName][contractName];
if (!address) throw new Error(`Contract ${contractName} not recognized`);
return address;
};
const getContractAddressByChainId = (chainId, contractName) => {
const network = Object.entries(networks).find(([, id]) => id === chainId)[0];
if (!network) throw new Error(`Chain ID ${chainId} not recognized`);
return getContractAddressByNetworkName(network, contractName);
};
module.exports = {
getContractAddressByChainId,
getContractAddressByNetworkName,
};

9
backend/src/db.js Normal file
View File

@ -0,0 +1,9 @@
const { Level } = require('level');
const dataDir = process.env.DATA_DIR || 'data';
module.exports = {
forum: new Level(`${dataDir}/forum`, { valueEncoding: 'json' }),
authorAddresses: new Level(`${dataDir}/authorAddresses`, { valueEncoding: 'utf8' }),
authorPrivKeys: new Level(`${dataDir}/authorPrivKeys`, { valueEncoding: 'utf8' }),
};

View File

@ -0,0 +1,133 @@
const axios = require('axios');
const ethers = require('ethers');
const crypto = require('crypto');
const objectHash = require('object-hash');
const { getContractAddressByNetworkName } = require('./contract-config');
const { authorAddresses, authorPrivKeys, forum } = require('./db');
const getContract = (name) => ethers.getContractAt(
name,
getContractAddressByNetworkName(process.env.NETWORK, name),
);
const fetchPaperInfo = async (paperId) => {
const paper = await axios.get(`https://api.semanticscholar.org/graph/v1/paper/${paperId}`, {
headers: {
'api-key': process.env.SEMANTIC_SCHOLAR_API_KEY,
},
});
return paper;
};
const getAuthorsInfo = async (paper) => Promise.all(paper.authors.map(async ({ authorId }) => {
// Check if we already have an account for each author
let authorAddress;
let authorPrivKey;
try {
authorAddress = await authorAddresses.get(authorId);
} catch (e) {
// Probably not found
}
if (authorAddress) {
// This should always succeed, so we don't use try/catch here
authorPrivKey = await authorPrivKeys.get(authorAddress);
} else {
// Generate and store a new account
const id = crypto.randomBytes(32).toString('hex');
authorPrivKey = `0x${id}`;
const wallet = new ethers.Wallet(authorPrivKey);
authorAddress = wallet.address;
await authorAddress.put(authorId, authorAddress);
await authorPrivKeys.put(authorAddress, authorPrivKey);
}
return {
authorAddress,
authorPrivKey,
};
}));
const generatePost = async (paper) => {
const authorsInfo = getAuthorsInfo(paper);
const firstAuthorWallet = new ethers.Wallet(authorsInfo[0].authorPrivKey);
const eachAuthorWeightPercent = Math.floor(100 / authorsInfo.length);
const authors = authorsInfo.map(({ authorAddress }) => ({
weightPercent: eachAuthorWeightPercent,
authorAddress,
}));
// Make sure author weights sum to 100
const totalAuthorsWeight = authors.reduce((t, { weightPercent }) => t + weightPercent);
authors[0].weightPercent += 100 - totalAuthorsWeight;
const content = `Semantic Scholar paper ${paper.paperId}
${paper.title}
HREF ${paper.url}`;
// Note that for now we leave embedded data empty, but the stub is here in case we want to use it
const embeddedData = {};
let contentToSign = content;
if (embeddedData && Object.entries(embeddedData).length) {
contentToSign += `\n\nDATA\n${JSON.stringify(embeddedData, null, 2)}`;
}
const signature = firstAuthorWallet.signMessageSync(contentToSign);
const hash = objectHash({
authors, content, signature, embeddedData,
});
return {
hash, authors, content, signature, embeddedData,
};
};
module.exports = async (req, res) => {
const dao = await getContract('DAO');
const {
body: {
paperId,
},
} = req;
console.log(`importFromSS ${paperId}`);
// Read the paper info from SS
const paper = await fetchPaperInfo(paperId);
const citations = [];
if (paper.references) {
const eachCitationWeightPercent = Math.floor(30 / paper.references.length);
paper.references.forEach(async ({ paperId: citedPaperId }) => {
// We need to fetch this paper so we can
// We need to generate the post we would add to the forum, sign, and hash it.
// The forum allows dangling citations to support this use case.
const citedPaper = await fetchPaperInfo(citedPaperId);
const citedPaperInfo = await generatePost(citedPaper);
citations.push({
weightPercent: eachCitationWeightPercent,
targetPostId: citedPaperInfo.hash,
});
});
// Make sure citation weights sum to 100
const totalCitationWeight = citations.reduce((t, { weightPercent }) => t + weightPercent);
citations[0].weightPercent += 100 - totalCitationWeight;
}
// Create a post for this paper
const {
hash, authors, content, signature, embeddedData,
} = await generatePost(paper);
// Write the new post to our database
await forum.put(hash, {
authors, content, signature, embeddedData, citations,
});
// Add the post to the form (on-chain)
await dao.addPost(authors, hash, citations);
console.log({
authors, content, signature, embeddedData, citations,
});
res.end();
};

47
backend/src/read.js Normal file
View File

@ -0,0 +1,47 @@
const objectHash = require('object-hash');
const verifySignature = require('./verify-signature');
const { forum } = require('./db');
module.exports = async (req, res) => {
const { hash } = req.params;
console.log('read', hash);
// Fetch content
let data;
try {
data = await forum.get(req.params.hash);
} catch (e) {
console.log('read error:', e.message, hash);
res.status(e.status).end();
return;
}
data.embeddedData = data.embeddedData || undefined;
console.log(data);
const {
authors, content, signature, embeddedData,
} = data;
// Verify hash
const derivedHash = objectHash({
authors, content, signature, embeddedData,
});
if (derivedHash !== hash) {
console.log('error: hash mismatch');
res.status(500).end();
return;
}
// Verify signature
if (!verifySignature(data)) {
console.log('error: signature verificaition failed');
res.status(500).end();
return;
}
// Return content
res.json(data);
};

View File

@ -0,0 +1,24 @@
const { recoverPersonalSignature } = require('@metamask/eth-sig-util');
const verifySignature = ({
authors, content, signature, embeddedData,
}) => {
let contentToVerify = content;
if (embeddedData && Object.entries(embeddedData).length) {
contentToVerify += `\n\nDATA\n${JSON.stringify(embeddedData, null, 2)}`;
}
try {
const account = recoverPersonalSignature({ data: contentToVerify, signature });
const authorAddresses = authors.map((author) => author.authorAddress);
if (!authorAddresses.includes(account)) {
console.log('error: signer is not among the authors');
return false;
}
} catch (e) {
console.log('error: failed to recover signature:', e.message);
return false;
}
return true;
};
module.exports = verifySignature;

35
backend/src/write.js Normal file
View File

@ -0,0 +1,35 @@
const objectHash = require('object-hash');
const verifySignature = require('./verify-signature');
const { forum } = require('./db');
module.exports = async (req, res) => {
const {
body: {
authors, content, signature, embeddedData, citations,
},
} = req;
// Check author signature
if (!verifySignature({
authors, content, signature, embeddedData,
})) {
res.status(403).end();
return;
}
// Compute content hash
const data = {
authors, content, signature, embeddedData, citations,
};
const hash = objectHash({
authors, content, signature, embeddedData,
});
console.log('write', hash);
console.log(data);
// Store content
await forum.put(hash, data);
// Return hash
res.send(hash);
};