389 lines
12 KiB
C
389 lines
12 KiB
C
#include <limits.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
#include "basefs_allocator.h"
|
|
#include "block_range.h"
|
|
#include "hashmap.h"
|
|
#include "base_fs.h"
|
|
|
|
struct base_fs_allocator {
|
|
struct ext2fs_hashmap *entries;
|
|
struct basefs_entry *cur_entry;
|
|
/* The next expected logical block to allocate for cur_entry. */
|
|
blk64_t next_lblk;
|
|
/* Blocks which are definitely owned by a single inode in BaseFS. */
|
|
ext2fs_block_bitmap exclusive_block_map;
|
|
/* Blocks which are available to the first inode that requests it. */
|
|
ext2fs_block_bitmap dedup_block_map;
|
|
};
|
|
|
|
static errcode_t basefs_block_allocator(ext2_filsys, blk64_t, blk64_t *,
|
|
struct blk_alloc_ctx *ctx);
|
|
|
|
/*
|
|
* Free any reserved, but unconsumed block ranges in the allocator. This both
|
|
* frees the block_range_list data structure and unreserves exclusive blocks
|
|
* from the block map.
|
|
*/
|
|
static void fs_free_blocks_range(ext2_filsys fs,
|
|
struct base_fs_allocator *allocator,
|
|
struct block_range_list *list)
|
|
{
|
|
ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
|
|
|
|
blk64_t block;
|
|
while (list->head) {
|
|
block = consume_next_block(list);
|
|
if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
|
|
ext2fs_unmark_block_bitmap2(fs->block_map, block);
|
|
ext2fs_unmark_block_bitmap2(exclusive_map, block);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Free any blocks in the bitmap that were reserved but never used. This is
|
|
* needed to free dedup_block_map and ensure the free block bitmap is
|
|
* internally consistent.
|
|
*/
|
|
static void fs_free_blocks_bitmap(ext2_filsys fs, ext2fs_block_bitmap bitmap)
|
|
{
|
|
blk64_t block = 0;
|
|
blk64_t start = fs->super->s_first_data_block;
|
|
blk64_t end = ext2fs_blocks_count(fs->super) - 1;
|
|
errcode_t retval;
|
|
|
|
for (;;) {
|
|
retval = ext2fs_find_first_set_block_bitmap2(bitmap, start, end,
|
|
&block);
|
|
if (retval)
|
|
break;
|
|
ext2fs_unmark_block_bitmap2(fs->block_map, block);
|
|
start = block + 1;
|
|
}
|
|
}
|
|
|
|
static void basefs_allocator_free(ext2_filsys fs,
|
|
struct base_fs_allocator *allocator)
|
|
{
|
|
struct basefs_entry *e;
|
|
struct ext2fs_hashmap_entry *it = NULL;
|
|
struct ext2fs_hashmap *entries = allocator->entries;
|
|
|
|
if (entries) {
|
|
while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) {
|
|
fs_free_blocks_range(fs, allocator, &e->blocks);
|
|
delete_block_ranges(&e->blocks);
|
|
}
|
|
ext2fs_hashmap_free(entries);
|
|
}
|
|
fs_free_blocks_bitmap(fs, allocator->dedup_block_map);
|
|
ext2fs_free_block_bitmap(allocator->exclusive_block_map);
|
|
ext2fs_free_block_bitmap(allocator->dedup_block_map);
|
|
free(allocator);
|
|
}
|
|
|
|
/*
|
|
* Build a bitmap of which blocks are definitely owned by exactly one file in
|
|
* Base FS. Blocks which are not valid or are de-duplicated are skipped. This
|
|
* is called during allocator initialization, to ensure that libext2fs does
|
|
* not allocate which we want to re-use.
|
|
*
|
|
* If a block was allocated in the initial filesystem, it can never be re-used,
|
|
* so it will appear in neither the exclusive or dedup set. If a block is used
|
|
* by multiple files, it will be removed from the owned set and instead added
|
|
* to the dedup set.
|
|
*
|
|
* The dedup set is not removed from fs->block_map. This allows us to re-use
|
|
* dedup blocks separately and not have them be allocated outside of file data.
|
|
*
|
|
* This function returns non-zero if the block was owned, and 0 otherwise.
|
|
*/
|
|
static int fs_reserve_block(ext2_filsys fs,
|
|
struct base_fs_allocator *allocator,
|
|
blk64_t block)
|
|
{
|
|
ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
|
|
ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
|
|
|
|
if (block >= ext2fs_blocks_count(fs->super))
|
|
return 0;
|
|
|
|
if (ext2fs_test_block_bitmap2(fs->block_map, block)) {
|
|
if (!ext2fs_test_block_bitmap2(exclusive_map, block))
|
|
return 0;
|
|
ext2fs_unmark_block_bitmap2(exclusive_map, block);
|
|
ext2fs_mark_block_bitmap2(dedup_map, block);
|
|
return 0;
|
|
} else {
|
|
ext2fs_mark_block_bitmap2(fs->block_map, block);
|
|
ext2fs_mark_block_bitmap2(exclusive_map, block);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Walk the requested block list and reserve blocks, either into the owned
|
|
* pool or the dedup pool as appropriate. We stop once the file has enough
|
|
* owned blocks to satisfy |file_size|. This allows any extra blocks to be
|
|
* re-used, since otherwise a large block movement between files could
|
|
* trigger block allocation errors.
|
|
*/
|
|
static void fs_reserve_blocks_range(ext2_filsys fs,
|
|
struct base_fs_allocator *allocator,
|
|
struct block_range_list *list,
|
|
off_t file_size)
|
|
{
|
|
blk64_t block;
|
|
off_t blocks_needed;
|
|
off_t blocks_acquired = 0;
|
|
struct block_range *blocks = list->head;
|
|
|
|
blocks_needed = file_size + (fs->blocksize - 1);
|
|
blocks_needed /= fs->blocksize;
|
|
|
|
while (blocks) {
|
|
for (block = blocks->start; block <= blocks->end; block++) {
|
|
if (fs_reserve_block(fs, allocator, block))
|
|
blocks_acquired++;
|
|
if (blocks_acquired >= blocks_needed)
|
|
return;
|
|
}
|
|
blocks = blocks->next;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* For each file in the base FS map, ensure that its blocks are reserved in
|
|
* the actual block map. This prevents libext2fs from allocating them for
|
|
* general purpose use, and ensures that if the file needs data blocks, they
|
|
* can be re-acquired exclusively for that file.
|
|
*
|
|
* If a file in the base map is missing, or not a regular file in the new
|
|
* filesystem, then it's skipped to ensure that its blocks are reusable.
|
|
*/
|
|
static errcode_t fs_reserve_blocks(ext2_filsys fs,
|
|
struct base_fs_allocator *allocator,
|
|
const char *src_dir)
|
|
{
|
|
int nbytes;
|
|
char full_path[PATH_MAX];
|
|
const char *sep = "/";
|
|
struct stat st;
|
|
struct basefs_entry *e;
|
|
struct ext2fs_hashmap_entry *it = NULL;
|
|
struct ext2fs_hashmap *entries = allocator->entries;
|
|
|
|
if (strlen(src_dir) && src_dir[strlen(src_dir) - 1] == '/')
|
|
sep = "";
|
|
|
|
while ((e = ext2fs_hashmap_iter_in_order(entries, &it))) {
|
|
nbytes = snprintf(full_path, sizeof(full_path), "%s%s%s",
|
|
src_dir, sep, e->path);
|
|
if (nbytes >= sizeof(full_path))
|
|
return ENAMETOOLONG;
|
|
if (lstat(full_path, &st) || !S_ISREG(st.st_mode))
|
|
continue;
|
|
fs_reserve_blocks_range(fs, allocator, &e->blocks, st.st_size);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
errcode_t base_fs_alloc_load(ext2_filsys fs, const char *file,
|
|
const char *mountpoint, const char *src_dir)
|
|
{
|
|
errcode_t retval = 0;
|
|
struct base_fs_allocator *allocator;
|
|
|
|
allocator = calloc(1, sizeof(*allocator));
|
|
if (!allocator) {
|
|
retval = ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
retval = ext2fs_read_bitmaps(fs);
|
|
if (retval)
|
|
goto err_load;
|
|
|
|
allocator->cur_entry = NULL;
|
|
allocator->entries = basefs_parse(file, mountpoint);
|
|
if (!allocator->entries) {
|
|
retval = EIO;
|
|
goto err_load;
|
|
}
|
|
retval = ext2fs_allocate_block_bitmap(fs, "exclusive map",
|
|
&allocator->exclusive_block_map);
|
|
if (retval)
|
|
goto err_load;
|
|
retval = ext2fs_allocate_block_bitmap(fs, "dedup map",
|
|
&allocator->dedup_block_map);
|
|
if (retval)
|
|
goto err_load;
|
|
|
|
retval = fs_reserve_blocks(fs, allocator, src_dir);
|
|
if (retval)
|
|
goto err_load;
|
|
|
|
/* Override the default allocator */
|
|
fs->get_alloc_block2 = basefs_block_allocator;
|
|
fs->priv_data = allocator;
|
|
|
|
goto out;
|
|
|
|
err_load:
|
|
basefs_allocator_free(fs, allocator);
|
|
out:
|
|
return retval;
|
|
}
|
|
|
|
/* Try and acquire the next usable block from the Base FS map. */
|
|
static errcode_t get_next_block(ext2_filsys fs, struct base_fs_allocator *allocator,
|
|
struct block_range_list* list, blk64_t *ret)
|
|
{
|
|
blk64_t block;
|
|
ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
|
|
ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
|
|
|
|
if (!list->head)
|
|
return EXT2_ET_BLOCK_ALLOC_FAIL;
|
|
|
|
block = consume_next_block(list);
|
|
if (block >= ext2fs_blocks_count(fs->super))
|
|
return EXT2_ET_BLOCK_ALLOC_FAIL;
|
|
if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
|
|
ext2fs_unmark_block_bitmap2(exclusive_map, block);
|
|
*ret = block;
|
|
return 0;
|
|
}
|
|
if (ext2fs_test_block_bitmap2(dedup_map, block)) {
|
|
ext2fs_unmark_block_bitmap2(dedup_map, block);
|
|
*ret = block;
|
|
return 0;
|
|
}
|
|
return EXT2_ET_BLOCK_ALLOC_FAIL;
|
|
}
|
|
|
|
/*
|
|
* BaseFS lists blocks in logical block order. However, the allocator hook is
|
|
* only called if a block needs to be allocated. In the case of a deduplicated
|
|
* block, or a hole, the hook is not invoked. This means the next block
|
|
* allocation request will be out of sequence. For example, consider if BaseFS
|
|
* specifies the following (0 being a hole):
|
|
* 1 2 3 0 4 5
|
|
*
|
|
* If the new file has a hole at logical block 0, we could accidentally
|
|
* shift the entire expected block list as follows:
|
|
* 0 1 2 0 3 4
|
|
*
|
|
* To account for this, we track the next expected logical block in the
|
|
* allocator. If the current request is for a later logical block, we skip and
|
|
* free the intermediate physical blocks that would have been allocated. This
|
|
* ensures the original block assignment is respected.
|
|
*/
|
|
static void skip_blocks(ext2_filsys fs, struct base_fs_allocator *allocator,
|
|
struct blk_alloc_ctx *ctx)
|
|
{
|
|
blk64_t block;
|
|
struct block_range_list *list = &allocator->cur_entry->blocks;
|
|
ext2fs_block_bitmap exclusive_map = allocator->exclusive_block_map;
|
|
|
|
while (list->head && allocator->next_lblk < ctx->lblk) {
|
|
block = consume_next_block(list);
|
|
if (block >= ext2fs_blocks_count(fs->super))
|
|
continue;
|
|
if (ext2fs_test_block_bitmap2(exclusive_map, block)) {
|
|
ext2fs_unmark_block_bitmap2(exclusive_map, block);
|
|
ext2fs_unmark_block_bitmap2(fs->block_map, block);
|
|
}
|
|
allocator->next_lblk++;
|
|
}
|
|
}
|
|
|
|
static errcode_t basefs_block_allocator(ext2_filsys fs, blk64_t goal,
|
|
blk64_t *ret, struct blk_alloc_ctx *ctx)
|
|
{
|
|
errcode_t retval;
|
|
struct base_fs_allocator *allocator = fs->priv_data;
|
|
struct basefs_entry *e = allocator->cur_entry;
|
|
ext2fs_block_bitmap dedup_map = allocator->dedup_block_map;
|
|
|
|
if (e && ctx && (ctx->flags & BLOCK_ALLOC_DATA)) {
|
|
if (allocator->next_lblk < ctx->lblk)
|
|
skip_blocks(fs, allocator, ctx);
|
|
allocator->next_lblk = ctx->lblk + 1;
|
|
|
|
if (!get_next_block(fs, allocator, &e->blocks, ret))
|
|
return 0;
|
|
}
|
|
|
|
retval = ext2fs_new_block2(fs, goal, fs->block_map, ret);
|
|
if (!retval) {
|
|
ext2fs_mark_block_bitmap2(fs->block_map, *ret);
|
|
return 0;
|
|
}
|
|
if (retval != EXT2_ET_BLOCK_ALLOC_FAIL)
|
|
return retval;
|
|
|
|
/* Try to steal a block from the dedup pool. */
|
|
retval = ext2fs_find_first_set_block_bitmap2(dedup_map,
|
|
fs->super->s_first_data_block,
|
|
ext2fs_blocks_count(fs->super) - 1, ret);
|
|
if (!retval) {
|
|
ext2fs_unmark_block_bitmap2(dedup_map, *ret);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* As a last resort, take any block from our file's list. This
|
|
* risks bloating the diff, but means we are more likely to
|
|
* successfully build an image.
|
|
*/
|
|
while (e->blocks.head) {
|
|
if (!get_next_block(fs, allocator, &e->blocks, ret))
|
|
return 0;
|
|
}
|
|
return EXT2_ET_BLOCK_ALLOC_FAIL;
|
|
}
|
|
|
|
void base_fs_alloc_cleanup(ext2_filsys fs)
|
|
{
|
|
basefs_allocator_free(fs, fs->priv_data);
|
|
fs->priv_data = NULL;
|
|
fs->get_alloc_block2 = NULL;
|
|
}
|
|
|
|
errcode_t base_fs_alloc_set_target(ext2_filsys fs, const char *target_path,
|
|
const char *name EXT2FS_ATTR((unused)),
|
|
ext2_ino_t parent_ino EXT2FS_ATTR((unused)),
|
|
ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode)
|
|
{
|
|
struct base_fs_allocator *allocator = fs->priv_data;
|
|
|
|
if (mode != S_IFREG)
|
|
return 0;
|
|
|
|
if (allocator) {
|
|
allocator->cur_entry = ext2fs_hashmap_lookup(allocator->entries,
|
|
target_path,
|
|
strlen(target_path));
|
|
allocator->next_lblk = 0;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
errcode_t base_fs_alloc_unset_target(ext2_filsys fs,
|
|
const char *target_path EXT2FS_ATTR((unused)),
|
|
const char *name EXT2FS_ATTR((unused)),
|
|
ext2_ino_t parent_ino EXT2FS_ATTR((unused)),
|
|
ext2_ino_t root EXT2FS_ATTR((unused)), mode_t mode)
|
|
{
|
|
struct base_fs_allocator *allocator = fs->priv_data;
|
|
|
|
if (!allocator || !allocator->cur_entry || mode != S_IFREG)
|
|
return 0;
|
|
|
|
fs_free_blocks_range(fs, allocator, &allocator->cur_entry->blocks);
|
|
delete_block_ranges(&allocator->cur_entry->blocks);
|
|
return 0;
|
|
}
|