Browse Source

Initial commit of the Cellar

consume-entitystore
Puck Meerburg 11 months ago
commit
8b6c04f558

+ 4
- 0
.gitignore View File

@@ -0,0 +1,4 @@
1
+.env
2
+/target
3
+**/*.rs.bk
4
+Cargo.lock

+ 11
- 0
Cargo.toml View File

@@ -0,0 +1,11 @@
1
+[package]
2
+name = "kroeg-cellar"
3
+version = "0.1.0"
4
+authors = ["Puck Meerburg <puck@puckipedia.com>"]
5
+
6
+[dependencies]
7
+diesel = { version = "~1.2", features = ["postgres"] }
8
+jsonld = { path = "../jsonld-rs" }
9
+dotenv = "0.9.0"
10
+serde = "1.0"
11
+serde_json = "1.0"

+ 36
- 0
README.md View File

@@ -0,0 +1,36 @@
1
+# Kroeg Cellar
2
+
3
+(I'd have called this `kroeg-kelder` but that'd probably go too far)
4
+
5
+Simple JSON-LD storage mechanism, storing everything in a database of RDF
6
+ quads.
7
+
8
+## Usage
9
+
10
+This code is made mostly to work inside of Kroeg, but the external API is
11
+ documented with rustdoc, which isn't publicly generated yet. Clone therepo
12
+ then run `cargo doc` to do it yourself.
13
+
14
+## Design
15
+
16
+The database currently stores its data in two tables:
17
+
18
+### `Attribute`
19
+
20
+This table maps an ID number to a URL, allowing for e.g. quick lookups and even
21
+ quicker renaming of objects (just change its value in this table, and
22
+ everything automatically updates!)
23
+
24
+### `Quad`
25
+This table is slightly complicated, as it has to support everything that RDF
26
+ can:
27
+
28
+- `id`: unique ID for the quad.
29
+- `quad_id`, `subject_id`, `predicate_id`: These point into the `Attribute`
30
+   table, fort the corresponding RDF quad attributes.
31
+
32
+- `attribute_id`: used if object points to another ID.
33
+
34
+- `object`, `type_id`, `language`: The value, and type or language of the quad.
35
+  The code only supports `language` or `type_id` being set, both of them being
36
+   set is unsupported.

+ 0
- 0
migrations/.gitkeep View File


+ 6
- 0
migrations/00000000000000_diesel_initial_setup/down.sql View File

@@ -0,0 +1,6 @@
1
+-- This file was automatically created by Diesel to setup helper functions
2
+-- and other internal bookkeeping. This file is safe to edit, any future
3
+-- changes will be added to existing projects as new migrations.
4
+
5
+DROP FUNCTION IF EXISTS diesel_manage_updated_at(_tbl regclass);
6
+DROP FUNCTION IF EXISTS diesel_set_updated_at();

+ 36
- 0
migrations/00000000000000_diesel_initial_setup/up.sql View File

@@ -0,0 +1,36 @@
1
+-- This file was automatically created by Diesel to setup helper functions
2
+-- and other internal bookkeeping. This file is safe to edit, any future
3
+-- changes will be added to existing projects as new migrations.
4
+
5
+
6
+
7
+
8
+-- Sets up a trigger for the given table to automatically set a column called
9
+-- `updated_at` whenever the row is modified (unless `updated_at` was included
10
+-- in the modified columns)
11
+--
12
+-- # Example
13
+--
14
+-- ```sql
15
+-- CREATE TABLE users (id SERIAL PRIMARY KEY, updated_at TIMESTAMP NOT NULL DEFAULT NOW());
16
+--
17
+-- SELECT diesel_manage_updated_at('users');
18
+-- ```
19
+CREATE OR REPLACE FUNCTION diesel_manage_updated_at(_tbl regclass) RETURNS VOID AS $$
20
+BEGIN
21
+    EXECUTE format('CREATE TRIGGER set_updated_at BEFORE UPDATE ON %s
22
+                    FOR EACH ROW EXECUTE PROCEDURE diesel_set_updated_at()', _tbl);
23
+END;
24
+$$ LANGUAGE plpgsql;
25
+
26
+CREATE OR REPLACE FUNCTION diesel_set_updated_at() RETURNS trigger AS $$
27
+BEGIN
28
+    IF (
29
+        NEW IS DISTINCT FROM OLD AND
30
+        NEW.updated_at IS NOT DISTINCT FROM OLD.updated_at
31
+    ) THEN
32
+        NEW.updated_at := current_timestamp;
33
+    END IF;
34
+    RETURN NEW;
35
+END;
36
+$$ LANGUAGE plpgsql;

+ 2
- 0
migrations/2018-05-14-135610_base_database/down.sql View File

@@ -0,0 +1,2 @@
1
+DROP TABLE quad;
2
+DROP TABLE attribute;

+ 23
- 0
migrations/2018-05-14-135610_base_database/up.sql View File

@@ -0,0 +1,23 @@
1
+CREATE TABLE attribute (
2
+    id SERIAL PRIMARY KEY,
3
+    url TEXT UNIQUE NOT NULL
4
+);
5
+
6
+CREATE INDEX attribute_url on attribute (url);
7
+
8
+CREATE TABLE quad (
9
+    id SERIAL PRIMARY KEY,
10
+    quad_id integer not null references attribute,
11
+    subject_id integer not null references attribute,
12
+    predicate_id integer not null references attribute,
13
+
14
+    -- either
15
+    attribute_id integer null references attribute,
16
+
17
+    -- or
18
+    object text null,
19
+    type_id integer null references attribute,
20
+    language char(2) null
21
+);
22
+
23
+CREATE INDEX quad_quad_id on quad (quad_id);

+ 234
- 0
src/lib.rs View File

@@ -0,0 +1,234 @@
1
+#[macro_use]
2
+extern crate diesel;
3
+extern crate jsonld;
4
+
5
+pub mod models;
6
+pub mod schema;
7
+
8
+use diesel::dsl::any;
9
+use diesel::pg::upsert::excluded;
10
+use diesel::pg::PgConnection;
11
+use diesel::prelude::*;
12
+
13
+/// Bad hack for now.
14
+pub use diesel::result::Error;
15
+
16
+use std::collections::{HashMap, HashSet};
17
+use std::fmt;
18
+
19
+use jsonld::rdf::{QuadContents, StringQuad};
20
+
21
+/// A client that talks to a database to store triples keyed by quad.
22
+pub struct QuadClient {
23
+    connection: PgConnection,
24
+    attribute_id: HashMap<String, i32>,
25
+    attribute_url: HashMap<i32, String>,
26
+}
27
+
28
+impl fmt::Debug for QuadClient {
29
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
30
+        write!(f, "QuadClient {{ [...] }}")
31
+    }
32
+}
33
+
34
+impl QuadClient {
35
+    /// Creates a new `QuadClient` from a `PgConnnection`. Multiple clients
36
+    /// can exist safely on one DB without interfering.
37
+    pub fn new(connection: PgConnection) -> QuadClient {
38
+        QuadClient {
39
+            connection: connection,
40
+            attribute_id: HashMap::new(),
41
+            attribute_url: HashMap::new(),
42
+        }
43
+    }
44
+
45
+    /// Gets a handle to the underlying connection.
46
+    pub fn connection(&self) -> &PgConnection {
47
+        &self.connection
48
+    }
49
+
50
+    /// Takes a list of attributes and caches their contents into this client.
51
+    fn read_attributes(&mut self, vals: &Vec<models::Attribute>) {
52
+        for val in vals {
53
+            self.attribute_id.insert(val.url.to_owned(), val.id);
54
+            self.attribute_url.insert(val.id, val.url.to_owned());
55
+        }
56
+    }
57
+
58
+    /// Takes a `Vec` of IRIs, and stores them into the DB where needed,
59
+    /// caching them.
60
+    fn store_attributes(&mut self, vals: &Vec<&str>) -> Result<(), diesel::result::Error> {
61
+        use models::NewAttribute;
62
+        use schema::attribute::dsl::*;
63
+
64
+        let to_write: Vec<_> = vals
65
+            .iter()
66
+            .filter(|f| !self.attribute_id.contains_key(**f))
67
+            .map(|f| NewAttribute { url: f })
68
+            .collect();
69
+
70
+        if to_write.len() == 0 {
71
+            return Ok(());
72
+        }
73
+
74
+        let attribute_results = diesel::insert_into(attribute)
75
+            .values(&to_write)
76
+            .on_conflict(url)
77
+            .do_update()
78
+            .set(url.eq(excluded(url)))
79
+            .load(&self.connection)?;
80
+
81
+        self.read_attributes(&attribute_results);
82
+
83
+        Ok(())
84
+    }
85
+
86
+    /// Takes a `Vec` of database IDs and caches them into the local client.
87
+    fn get_attributes(&mut self, vals: &Vec<i32>) -> Result<(), diesel::result::Error> {
88
+        use schema::attribute::dsl::*;
89
+
90
+        let to_read: Vec<_> = vals
91
+            .iter()
92
+            .filter(|f| !self.attribute_url.contains_key(f))
93
+            .collect();
94
+        if to_read.len() == 0 {
95
+            return Ok(());
96
+        }
97
+
98
+        let attribute_results = attribute
99
+            .filter(id.eq(any(to_read)))
100
+            .load(&self.connection)?;
101
+
102
+        self.read_attributes(&attribute_results);
103
+
104
+        Ok(())
105
+    }
106
+
107
+    /// Gets a single attribute IRI from a database ID.
108
+    pub fn get_attribute_url(&mut self, value: i32) -> Result<String, diesel::result::Error> {
109
+        self.get_attributes(&vec![value])?;
110
+
111
+        Ok(self.attribute_url[&value].to_owned())
112
+    }
113
+
114
+    /// Gets a single database ID from an attribute IRI.
115
+    pub fn get_attribute_id(&mut self, value: &str) -> Result<i32, diesel::result::Error> {
116
+        self.store_attributes(&vec![value])?;
117
+
118
+        Ok(self.attribute_id[value].to_owned())
119
+    }
120
+
121
+    /// Takes a `Vec<Quad>` and ensures that all the database IDs that are used
122
+    /// will be cached.
123
+    fn preload_quads(&mut self, quads: &Vec<models::Quad>) -> Result<(), diesel::result::Error> {
124
+        let mut required_ids = HashSet::new();
125
+
126
+        for quad in quads {
127
+            required_ids.insert(quad.subject_id);
128
+            required_ids.insert(quad.predicate_id);
129
+
130
+            if let Some(qval) = quad.attribute_id {
131
+                required_ids.insert(qval);
132
+            }
133
+
134
+            if let Some(qval) = quad.type_id {
135
+                required_ids.insert(qval);
136
+            }
137
+        }
138
+
139
+        self.get_attributes(&(required_ids.into_iter().collect()))
140
+    }
141
+
142
+    /// Translates a single DB Quad into a `StringQuad`
143
+    fn read_quad(&self, quad: &models::Quad) -> StringQuad {
144
+        let contents = if let Some(attribute_id) = quad.attribute_id {
145
+            QuadContents::Id(self.attribute_url[&attribute_id].to_owned())
146
+        } else if let Some(type_id) = quad.type_id {
147
+            QuadContents::Object(
148
+                self.attribute_url[&type_id].to_owned(),
149
+                quad.object.as_ref().unwrap().to_owned(),
150
+                quad.language.as_ref().map(|f| f.to_owned()),
151
+            )
152
+        } else {
153
+            unreachable!();
154
+        };
155
+
156
+        StringQuad {
157
+            subject_id: self.attribute_url[&quad.subject_id].to_owned(),
158
+            predicate_id: self.attribute_url[&quad.predicate_id].to_owned(),
159
+            contents: contents,
160
+        }
161
+    }
162
+
163
+    /// Reads a list of triples from the database, using a graph ID as key.
164
+    pub fn read_quads(&mut self, quadid: &str) -> Result<Vec<StringQuad>, diesel::result::Error> {
165
+        let quadid = self.get_attribute_id(quadid)?;
166
+
167
+        use schema::quad::dsl::*;
168
+
169
+        let quads: Vec<models::Quad> = quad.filter(quad_id.eq(quadid)).load(&self.connection)?;
170
+
171
+        self.preload_quads(&quads)?;
172
+
173
+        Ok(quads.into_iter().map(|f| self.read_quad(&f)).collect())
174
+    }
175
+
176
+    fn prestore_quads(&mut self, quads: &Vec<StringQuad>) -> Result<(), diesel::result::Error> {
177
+        let mut required_ids: HashSet<&str> = HashSet::new();
178
+
179
+        for quad in quads {
180
+            required_ids.insert(&quad.subject_id);
181
+            required_ids.insert(&quad.predicate_id);
182
+            match quad.contents {
183
+                QuadContents::Id(ref data) => required_ids.insert(&*data),
184
+                QuadContents::Object(ref data, _, _) => required_ids.insert(&*data),
185
+            };
186
+        }
187
+
188
+        self.store_attributes(&(required_ids.into_iter().collect()))
189
+    }
190
+
191
+    fn write_quad(&self, quad_id: i32, quad: StringQuad) -> models::InsertableQuad {
192
+        let (attribute_id, type_id, object, lang) = match quad.contents {
193
+            QuadContents::Id(data) => (Some(self.attribute_id[&data]), None, None, None),
194
+            QuadContents::Object(data, object, lang) => {
195
+                (None, Some(self.attribute_id[&data]), Some(object), lang)
196
+            }
197
+        };
198
+
199
+        models::InsertableQuad {
200
+            quad_id: quad_id,
201
+            subject_id: self.attribute_id[&quad.subject_id],
202
+            predicate_id: self.attribute_id[&quad.predicate_id],
203
+            attribute_id: attribute_id,
204
+            type_id: type_id,
205
+            object: object,
206
+            language: lang,
207
+        }
208
+    }
209
+
210
+    /// Store a list of quads in the DB, keyed by graph ID.
211
+    pub fn write_quads(
212
+        &mut self,
213
+        quadid: &str,
214
+        items: Vec<StringQuad>,
215
+    ) -> Result<(), diesel::result::Error> {
216
+        self.prestore_quads(&items)?;
217
+
218
+        let quadid = self.get_attribute_id(quadid)?;
219
+
220
+        use schema::quad::dsl::*;
221
+
222
+        let items: Vec<_> = items
223
+            .into_iter()
224
+            .map(|f| self.write_quad(quadid, f))
225
+            .collect();
226
+
227
+        diesel::delete(quad.filter(quad_id.eq(quadid))).execute(&self.connection)?;
228
+        diesel::insert_into(quad)
229
+            .values(&items)
230
+            .execute(&self.connection)?;
231
+
232
+        Ok(())
233
+    }
234
+}

+ 41
- 0
src/models.rs View File

@@ -0,0 +1,41 @@
1
+use schema::*;
2
+
3
+#[derive(Queryable, Debug)]
4
+pub struct Attribute {
5
+    pub id: i32,
6
+    pub url: String,
7
+}
8
+
9
+#[derive(Insertable, Debug)]
10
+#[table_name = "attribute"]
11
+pub struct NewAttribute<'a> {
12
+    pub url: &'a str,
13
+}
14
+
15
+#[derive(Queryable, Debug)]
16
+pub struct Quad {
17
+    pub id: i32,
18
+    pub quad_id: i32,
19
+    pub subject_id: i32,
20
+    pub predicate_id: i32,
21
+
22
+    pub attribute_id: Option<i32>,
23
+
24
+    pub object: Option<String>,
25
+    pub type_id: Option<i32>,
26
+    pub language: Option<String>,
27
+}
28
+
29
+#[derive(Insertable, Debug)]
30
+#[table_name = "quad"]
31
+pub struct InsertableQuad {
32
+    pub quad_id: i32,
33
+    pub subject_id: i32,
34
+    pub predicate_id: i32,
35
+
36
+    pub attribute_id: Option<i32>,
37
+
38
+    pub object: Option<String>,
39
+    pub type_id: Option<i32>,
40
+    pub language: Option<String>,
41
+}

+ 21
- 0
src/schema.rs View File

@@ -0,0 +1,21 @@
1
+table! {
2
+    attribute (id) {
3
+        id -> Int4,
4
+        url -> Text,
5
+    }
6
+}
7
+
8
+table! {
9
+    quad (id) {
10
+        id -> Int4,
11
+        quad_id -> Int4,
12
+        subject_id -> Int4,
13
+        predicate_id -> Int4,
14
+        attribute_id -> Nullable<Int4>,
15
+        object -> Nullable<Text>,
16
+        type_id -> Nullable<Int4>,
17
+        language -> Nullable<Text>,
18
+    }
19
+}
20
+
21
+allow_tables_to_appear_in_same_query!(attribute, quad,);

Loading…
Cancel
Save