1
//! Large file storage support for BonsaiDb.
2
//!
3
//! This crate provides support for storing large files in
4
//! [BonsaiDb](https://bonsaidb.io/). While BonsaiDb's document size limit is 4
5
//! gigabytes, the requirement that each document is loaded in memory fully can
6
//! cause higher memory usage when storing larger files.
7
//!
8
//! # `FileConfig`
9
//!
10
//! The [`FileConfig`] trait allows customizing the [`CollectionName`]s and
11
//! block size. If you want to use smaller or larger blocks, you can. If you
12
//! want to store more than one set of files in the same database, you can use
13
//! two [`FileConfig`] implementors with different [`CollectionName`]s.
14
//!
15
//! For most users, the provided implementation [`BonsaiFiles`] will work for
16
//! them.
17
//!
18
//! # Basic Example
19
//!
20
//! ```rust
21
#![doc = include_str!("../examples/basic-files.rs")]
22
//! ```
23
//! 
24
//! # Async Support
25
//!
26
//! This crate adds implementations of `tokio::io::AsyncRead` and
27
//! `tokio::io::AsyncWrite` when the `async` feature flag is enabled.
28
#![cfg_attr(feature = "async", doc = "```rust")]
29
#![cfg_attr(feature = "async", doc = include_str!("../examples/basic-files-async.rs"))]
30
#![cfg_attr(feature = "async", doc = "```")]
31
#![forbid(unsafe_code)]
32
#![warn(
33
    clippy::cargo,
34
    missing_docs,
35
    // clippy::missing_docs_in_private_items,
36
    clippy::pedantic,
37
    future_incompatible,
38
    rust_2018_idioms,
39
)]
40
#![allow(
41
    clippy::missing_errors_doc, // TODO clippy::missing_errors_doc
42
    clippy::option_if_let_else,
43
    clippy::module_name_repetitions,
44
)]
45

            
46
use std::fmt::Debug;
47
use std::marker::PhantomData;
48

            
49
#[cfg(feature = "async")]
50
use bonsaidb_core::async_trait::async_trait;
51
#[cfg(feature = "async")]
52
use bonsaidb_core::connection::AsyncConnection;
53
use bonsaidb_core::connection::Connection;
54
use bonsaidb_core::key::time::TimestampAsNanoseconds;
55
use bonsaidb_core::schema::{
56
    CollectionName, InsertError, Qualified, Schema, SchemaName, Schematic,
57
};
58
pub use bonsaidb_macros::FileConfig;
59
use derive_where::derive_where;
60
use serde::de::DeserializeOwned;
61
use serde::Serialize;
62

            
63
mod schema;
64

            
65
/// Types for accessing files directly from a connection to a database. These
66
/// types perform no permission checking beyond what BonsaiDb normally checks as
67
/// part of accessing/updating the underlying collections.
68
pub mod direct;
69

            
70
/// A configuration for a set of [stored files](direct::File).
71
#[cfg_attr(feature = "async", async_trait)]
72
pub trait FileConfig: Sized + Send + Sync + Unpin + 'static {
73
    /// The type of the `metadata` stored in [`File`](direct::File). If you do
74
    /// not need to store metadata, you can set this type to `()`.
75
    type Metadata: Serialize + DeserializeOwned + Send + Sync + Debug + Clone;
76

            
77
    /// The maximum size for each write to an underlying file. The file will be
78
    /// stored by breaking the data written into chunks no larger than
79
    /// `BLOCK_SIZE`.
80
    const BLOCK_SIZE: usize;
81
    /// Returns the unique collection name to use to store [`File`s][direct::File].
82
    fn files_name() -> CollectionName;
83
    /// Returns the unique collection name to use to store file blocks.
84
    fn blocks_name() -> CollectionName;
85

            
86
    /// Registers the collections for this configuration into `schema`.
87
40
    fn register_collections(schema: &mut Schematic) -> Result<(), bonsaidb_core::Error> {
88
40
        schema.define_collection::<schema::file::File<Self>>()?;
89
40
        schema.define_collection::<schema::block::Block<Self>>()?;
90

            
91
40
        Ok(())
92
40
    }
93

            
94
    /// Builds a new file. If `name_or_path` starts with a `/`, the argument is
95
    /// treated as a full path to the file being built. Otherwise, the argument
96
    /// is treated as the file's name.
97
20
    fn build<NameOrPath: AsRef<str>>(name_or_path: NameOrPath) -> direct::FileBuilder<'static, Self>
98
20
    where
99
20
        Self::Metadata: Default,
100
20
    {
101
20
        direct::FileBuilder::new(name_or_path, <Self::Metadata as Default>::default())
102
20
    }
103

            
104
    /// Builds a new file. If `name_or_path` starts with a `/`, the argument is
105
    /// treated as a full path to the file being built. Otherwise, the argument
106
    /// is treated as the file's name. The file's metadata will be `metadata`
107
    /// upon creation. The file's metadata will be `metadata` upon creation.
108
4
    fn build_with_metadata<NameOrPath: AsRef<str>>(
109
4
        name_or_path: NameOrPath,
110
4
        metadata: Self::Metadata,
111
4
    ) -> direct::FileBuilder<'static, Self> {
112
4
        direct::FileBuilder::new(name_or_path, metadata)
113
4
    }
114

            
115
    /// Returns the file with the unique `id` given, if found. This function
116
    /// only loads metadata about the file, it does not load the contents of the
117
    /// file.
118
2
    fn get<Database: Connection + Clone>(
119
2
        id: u32,
120
2
        database: &Database,
121
2
    ) -> Result<Option<direct::File<direct::Blocking<Database>, Self>>, bonsaidb_core::Error> {
122
2
        direct::File::<_, Self>::get(id, database)
123
2
    }
124

            
125
    /// Returns the file located at `path`, if found. This function
126
    /// only loads metadata about the file, it does not load the contents of the
127
    /// file.
128
10
    fn load<Database: Connection + Clone>(
129
10
        path: &str,
130
10
        database: &Database,
131
10
    ) -> Result<Option<direct::File<direct::Blocking<Database>, Self>>, Error> {
132
10
        direct::File::<_, Self>::load(path, database)
133
10
    }
134

            
135
    /// Returns the file locate at `path`, or creates an empty file if not
136
    /// currently present.
137
    ///
138
    /// If `expect_present` is true, this function will first check for an
139
    /// existing file before attempting to create the file. This parameter is
140
    /// purely an optimization, and the function will work regardless of the
141
    /// value. Pass true if you expect the file to be present a majority of the
142
    /// time this function is invoked. For example, using this function to
143
    /// retrieve a file created once and append to the same path in the future,
144
    /// passing true will make this function slightly more optimized for the
145
    /// most common flow.
146
    ///
147
    /// Regardless whether `expect_present` is true or false, this function will
148
    /// proceed by attempting to create a file at `path`, relying on BonsaiDb's
149
    /// ACID-compliance to notify of a conflict if another request succeeds
150
    /// before this one. If a conflict occurs, this function will then attempt
151
    /// to load the document. If the document has been deleted, the
152
    /// [`Error::Deleted`] will be returned.
153
3
    fn load_or_create<Database: Connection + Clone>(
154
3
        path: &str,
155
3
        expect_present: bool,
156
3
        database: &Database,
157
3
    ) -> Result<direct::File<direct::Blocking<Database>, Self>, Error>
158
3
    where
159
3
        Self::Metadata: Default,
160
3
    {
161
3
        Self::load_or_create_with_metadata(
162
3
            path,
163
3
            <Self::Metadata as Default>::default(),
164
3
            expect_present,
165
3
            database,
166
3
        )
167
3
    }
168

            
169
    /// Returns the file locate at `path`, or creates an empty file if not
170
    /// currently present.
171
    ///
172
    /// If `expect_present` is true, this function will first check for an
173
    /// existing file before attempting to create the file. This parameter is
174
    /// purely an optimization, and the function will work regardless of the
175
    /// value. Pass true if you expect the file to be present a majority of the
176
    /// time this function is invoked. For example, using this function to
177
    /// retrieve a file created once and append to the same path in the future,
178
    /// passing true will make this function slightly more optimized for the
179
    /// most common flow.
180
    ///
181
    /// Regardless whether `expect_present` is true or false, this function will
182
    /// proceed by attempting to create a file at `path`, relying on BonsaiDb's
183
    /// ACID-compliance to notify of a conflict if another request succeeds
184
    /// before this one. If a conflict occurs, this function will then attempt
185
    /// to load the document. If the document has been deleted, the
186
    /// [`Error::Deleted`] will be returned.
187
3
    fn load_or_create_with_metadata<Database: Connection + Clone>(
188
3
        path: &str,
189
3
        metadata: Self::Metadata,
190
3
        expect_present: bool,
191
3
        database: &Database,
192
3
    ) -> Result<direct::File<direct::Blocking<Database>, Self>, Error> {
193
3
        // First, try loading the file if we expect the file will be present
194
3
        // (ie, a singleton file that is always preseent after the first
195
3
        // launch).
196
3
        if expect_present {
197
2
            if let Some(file) = direct::File::<_, Self>::load(path, database)? {
198
1
                return Ok(file);
199
1
            }
200
1
        }
201

            
202
        // File not found, or we are going to assume the file isn't present.
203
2
        match Self::build_with_metadata(path, metadata).create(database) {
204
1
            Ok(file) => Ok(file),
205
            Err(Error::AlreadyExists) => {
206
                // Rather than continue to loop, we will just propogate the
207
                // previous error in the situation where the file was deleted
208
                // between our failed attempt to create and the attempt to
209
                // retrieve the conflicted document.
210
1
                direct::File::<_, Self>::load(path, database)?.ok_or(Error::Deleted)
211
            }
212
            Err(other) => Err(other),
213
        }
214
3
    }
215

            
216
    /// Deletes the file at `path`. Returns true if a file was deleted. Does not
217
    /// error if the file is not found.
218
    fn delete<Database: Connection + Clone>(
219
        path: &str,
220
        database: &Database,
221
    ) -> Result<bool, Error> {
222
        if let Some(file) = direct::File::<_, Self>::load(path, database)? {
223
            file.delete()?;
224
            Ok(true)
225
        } else {
226
            Ok(false)
227
        }
228
    }
229

            
230
    /// Returns all files that have a containing path of exactly `path`. It will
231
    /// only return files that have been created, and will not return "virtual"
232
    /// directories that are part of a file's path but have never been created.
233
    ///
234
    /// This function only loads metadata about the files, it does not load the
235
    /// contents of the files.
236
4
    fn list<Database: Connection + Clone>(
237
4
        path: &str,
238
4
        database: &Database,
239
4
    ) -> Result<Vec<direct::File<direct::Blocking<Database>, Self>>, bonsaidb_core::Error> {
240
4
        direct::File::<_, Self>::list(path, database)
241
4
    }
242

            
243
    /// Returns all files that have a path starting with `path`.
244
    ///
245
    /// This function only loads metadata about the files, it does not load the
246
    /// contents of the files.
247
4
    fn list_recursive<Database: Connection + Clone>(
248
4
        path: &str,
249
4
        database: &Database,
250
4
    ) -> Result<Vec<direct::File<direct::Blocking<Database>, Self>>, bonsaidb_core::Error> {
251
4
        direct::File::<_, Self>::list_recursive(path, database)
252
4
    }
253

            
254
    /// Returns statistics for all files contained within this collection. This
255
    /// is equivalent to calling [`Self::stats_for_path`] with `"/"` for the
256
    /// path.
257
1
    fn stats<Database: Connection + Clone>(
258
1
        database: &Database,
259
1
    ) -> Result<Statistics, bonsaidb_core::Error> {
260
1
        Self::stats_for_path("/", database)
261
1
    }
262

            
263
    /// Returns statistics for all files whose path starts with `path`.
264
5
    fn stats_for_path<Database: Connection + Clone>(
265
5
        path: &str,
266
5
        database: &Database,
267
5
    ) -> Result<Statistics, bonsaidb_core::Error> {
268
5
        direct::File::<_, Self>::stats_for_path(path, database)
269
5
    }
270

            
271
    /// Returns the file with the unique `id` given, if found. This function
272
    /// only loads metadata about the file, it does not load the contents of the
273
    /// file.
274
    #[cfg(feature = "async")]
275
2
    async fn get_async<Database: AsyncConnection + Clone>(
276
2
        id: u32,
277
2
        database: &Database,
278
2
    ) -> Result<Option<direct::File<direct::Async<Database>, Self>>, bonsaidb_core::Error> {
279
2
        direct::File::<_, Self>::get_async(id, database).await
280
6
    }
281

            
282
    /// Returns the file located at `path`, if found. This function
283
    /// only loads metadata about the file, it does not load the contents of the
284
    /// file.
285
    #[cfg(feature = "async")]
286
7
    async fn load_async<Database: AsyncConnection + Clone>(
287
7
        path: &str,
288
7
        database: &Database,
289
7
    ) -> Result<Option<direct::File<direct::Async<Database>, Self>>, Error> {
290
7
        direct::File::<_, Self>::load_async(path, database).await
291
21
    }
292

            
293
    /// Returns the file locate at `path`, or creates an empty file if not
294
    /// currently present.
295
    ///
296
    /// If `expect_present` is true, this function will first check for an
297
    /// existing file before attempting to create the file. This parameter is
298
    /// purely an optimization, and the function will work regardless of the
299
    /// value. Pass true if you expect the file to be present a majority of the
300
    /// time this function is invoked. For example, using this function to
301
    /// retrieve a file created once and append to the same path in the future,
302
    /// passing true will make this function slightly more optimized for the
303
    /// most common flow.
304
    ///
305
    /// Regardless whether `expect_present` is true or false, this function will
306
    /// proceed by attempting to create a file at `path`, relying on BonsaiDb's
307
    /// ACID-compliance to notify of a conflict if another request succeeds
308
    /// before this one. If a conflict occurs, this function will then attempt
309
    /// to load the document. If the document has been deleted, the
310
    /// [`Error::Deleted`] will be returned.
311
    #[cfg(feature = "async")]
312
3
    async fn load_or_create_async<Database: AsyncConnection + Clone>(
313
3
        path: &str,
314
3
        expect_present: bool,
315
3
        database: &Database,
316
3
    ) -> Result<direct::File<direct::Async<Database>, Self>, Error>
317
3
    where
318
3
        Self::Metadata: Default,
319
3
    {
320
3
        Self::load_or_create_with_metadata_async(
321
3
            path,
322
3
            <Self::Metadata as Default>::default(),
323
3
            expect_present,
324
3
            database,
325
3
        )
326
5
        .await
327
9
    }
328

            
329
    /// Returns the file locate at `path`, or creates an empty file if not
330
    /// currently present.
331
    ///
332
    /// If `expect_present` is true, this function will first check for an
333
    /// existing file before attempting to create the file. This parameter is
334
    /// purely an optimization, and the function will work regardless of the
335
    /// value. Pass true if you expect the file to be present a majority of the
336
    /// time this function is invoked. For example, using this function to
337
    /// retrieve a file created once and append to the same path in the future,
338
    /// passing true will make this function slightly more optimized for the
339
    /// most common flow.
340
    ///
341
    /// Regardless whether `expect_present` is true or false, this function will
342
    /// proceed by attempting to create a file at `path`, relying on BonsaiDb's
343
    /// ACID-compliance to notify of a conflict if another request succeeds
344
    /// before this one. If a conflict occurs, this function will then attempt
345
    /// to load the document. If the document has been deleted, the
346
    /// [`Error::Deleted`] will be returned.
347
    #[cfg(feature = "async")]
348
3
    async fn load_or_create_with_metadata_async<Database: AsyncConnection + Clone>(
349
3
        path: &str,
350
3
        metadata: Self::Metadata,
351
3
        expect_present: bool,
352
3
        database: &Database,
353
3
    ) -> Result<direct::File<direct::Async<Database>, Self>, Error> {
354
        // First, try loading the file if we expect the file will be present
355
        // (ie, a singleton file that is always preseent after the first
356
        // launch).
357
3
        if expect_present {
358
2
            if let Some(file) = direct::File::<_, Self>::load_async(path, database).await? {
359
1
                return Ok(file);
360
1
            }
361
1
        }
362

            
363
        // File not found, or we are going to assume the file isn't present.
364
2
        match Self::build_with_metadata(path, metadata)
365
2
            .create_async(database)
366
2
            .await
367
        {
368
1
            Ok(file) => Ok(file),
369
            Err(Error::AlreadyExists) => {
370
                // Rather than continue to loop, we will just propogate the
371
                // previous error in the situation where the file was deleted
372
                // between our failed attempt to create and the attempt to
373
                // retrieve the conflicted document.
374
1
                direct::File::<_, Self>::load_async(path, database)
375
1
                    .await?
376
1
                    .ok_or(Error::Deleted)
377
            }
378
            Err(other) => Err(other),
379
        }
380
9
    }
381

            
382
    /// Deletes the file at `path`. Returns true if a file was deleted. Does not
383
    /// error if the file is not found.
384
    #[cfg(feature = "async")]
385
    async fn delete_async<Database: AsyncConnection + Clone>(
386
        path: &str,
387
        database: &Database,
388
    ) -> Result<bool, Error> {
389
        if let Some(file) = direct::File::<_, Self>::load_async(path, database).await? {
390
            file.delete().await?;
391
            Ok(true)
392
        } else {
393
            Ok(false)
394
        }
395
    }
396

            
397
    /// Returns all files that have a containing path of exactly `path`. It will
398
    /// only return files that have been created, and will not return "virtual"
399
    /// directories that are part of a file's path but have never been created.
400
    ///
401
    /// This function only loads metadata about the files, it does not load the
402
    /// contents of the files.
403
    #[cfg(feature = "async")]
404
4
    async fn list_async<Database: AsyncConnection + Clone>(
405
4
        path: &str,
406
4
        database: &Database,
407
4
    ) -> Result<Vec<direct::File<direct::Async<Database>, Self>>, bonsaidb_core::Error> {
408
4
        direct::File::<_, Self>::list_async(path, database).await
409
12
    }
410

            
411
    /// Returns all files that have a path starting with `path`.
412
    ///
413
    /// This function only loads metadata about the files, it does not load the
414
    /// contents of the files.
415
    #[cfg(feature = "async")]
416
4
    async fn list_recursive_async<Database: AsyncConnection + Clone>(
417
4
        path: &str,
418
4
        database: &Database,
419
4
    ) -> Result<Vec<direct::File<direct::Async<Database>, Self>>, bonsaidb_core::Error> {
420
4
        direct::File::<_, Self>::list_recursive_async(path, database).await
421
12
    }
422

            
423
    /// Returns statistics for all files contained within this collection. This
424
    /// is equivalent to calling [`Self::stats_for_path_async`] with `"/"` for the
425
    /// path.
426
    #[cfg(feature = "async")]
427
1
    async fn stats_async<Database: AsyncConnection + Clone>(
428
1
        database: &Database,
429
1
    ) -> Result<Statistics, bonsaidb_core::Error> {
430
2
        Self::stats_for_path_async("/", database).await
431
3
    }
432

            
433
    /// Returns statistics for all files whose path starts with `path`.
434
    #[cfg(feature = "async")]
435
5
    async fn stats_for_path_async<Database: AsyncConnection + Clone>(
436
5
        path: &str,
437
5
        database: &Database,
438
5
    ) -> Result<Statistics, bonsaidb_core::Error> {
439
10
        direct::File::<_, Self>::stats_for_path_async(path, database).await
440
15
    }
441
}
442

            
443
/// A default configuration for storing files within BonsaiDb.
444
#[derive(Debug)]
445
pub struct BonsaiFiles;
446

            
447
impl FileConfig for BonsaiFiles {
448
    type Metadata = Option<()>;
449

            
450
    const BLOCK_SIZE: usize = 65_536;
451

            
452
1534
    fn files_name() -> CollectionName {
453
1534
        CollectionName::new("bonsaidb", "files")
454
1534
    }
455

            
456
2481
    fn blocks_name() -> CollectionName {
457
2481
        CollectionName::new("bonsaidb", "blocks")
458
2481
    }
459
}
460

            
461
/// A schema implementation that allows using any [`FileConfig`] as a [`Schema`]
462
/// without manually implementing [`Schema`].
463
#[derive_where(Default, Debug)]
464
pub struct FilesSchema<Config: FileConfig = BonsaiFiles>(PhantomData<Config>);
465

            
466
impl<Config: FileConfig> Schema for FilesSchema<Config> {
467
90
    fn schema_name() -> SchemaName {
468
90
        SchemaName::from(Config::files_name())
469
90
    }
470

            
471
36
    fn define_collections(schema: &mut Schematic) -> Result<(), bonsaidb_core::Error> {
472
36
        Config::register_collections(schema)
473
36
    }
474
}
475

            
476
/// Errors that can be returned when interacting with files.
477
#[derive(thiserror::Error, Debug)]
478
pub enum Error {
479
    /// An underlying database error was returned.
480
    #[error("database error: {0}")]
481
    Database(bonsaidb_core::Error),
482
    /// A name contained an invalid character. Currently, the only disallowed
483
    /// character is `/`.
484
    #[error("names must not contain '/'")]
485
    InvalidName,
486
    /// An absolute path was expected, but the path provided did not include a
487
    /// leading `/`.
488
    #[error("all paths must start with a leading '/'")]
489
    InvalidPath,
490
    /// An attempt at creating a file failed because a file already existed.
491
    #[error("a file already exists at the path provided")]
492
    AlreadyExists,
493
    /// The file was deleted during the operation.
494
    #[error("the file was deleted during the operation")]
495
    Deleted,
496
}
497

            
498
impl<T> From<InsertError<T>> for Error {
499
2
    fn from(err: InsertError<T>) -> Self {
500
2
        Self::from(err.error)
501
2
    }
502
}
503

            
504
impl From<bonsaidb_core::Error> for Error {
505
2
    fn from(err: bonsaidb_core::Error) -> Self {
506
2
        match err {
507
2
            bonsaidb_core::Error::UniqueKeyViolation { .. } => Self::AlreadyExists,
508
            other => Self::Database(other),
509
        }
510
2
    }
511
}
512

            
513
impl From<Error> for bonsaidb_core::Error {
514
    fn from(err: Error) -> Self {
515
        match err {
516
            Error::Database(err) => err,
517
            other => Self::other("bonsaidb-files", other),
518
        }
519
    }
520
}
521

            
522
/// Controls which location of a file to remove data from during a truncation.
523
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
524
pub enum Truncate {
525
    /// Remove data from the start (head) of the file when truncating.
526
    RemovingStart,
527
    /// Remove data from the end (tail) of the file when truncating.
528
    RemovingEnd,
529
}
530

            
531
/// Statistics about a set of files contained in a collection.
532
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
533
pub struct Statistics {
534
    /// The total number of bytes contained within the path queried. This only
535
    /// counts bytes that would be returned when reading the contents of files.
536
    /// No metadata information is included in this calculation.
537
    pub total_bytes: u64,
538
    /// The number of files contained within the path queried.
539
    pub file_count: usize,
540
    /// The last timestamp data was appended to a file within the path queried.
541
    /// This contains `None` if there is no data present.
542
    pub last_appended_at: Option<TimestampAsNanoseconds>,
543
}
544

            
545
#[cfg(test)]
546
mod tests;