348 lines
7.7 KiB
C
348 lines
7.7 KiB
C
/*
|
|
* device DAX engine
|
|
*
|
|
* IO engine that reads/writes from files by doing memcpy to/from
|
|
* a memory mapped region of DAX enabled device.
|
|
*
|
|
* Copyright (C) 2016 Intel Corp
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public License,
|
|
* version 2 as published by the Free Software Foundation..
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* device dax engine
|
|
* IO engine that access a DAX device directly for read and write data
|
|
*
|
|
* To use:
|
|
* ioengine=dev-dax
|
|
*
|
|
* Other relevant settings:
|
|
* iodepth=1
|
|
* direct=0 REQUIRED
|
|
* filename=/dev/daxN.N
|
|
* bs=2m
|
|
*
|
|
* direct should be left to 0. Using dev-dax implies that memory access
|
|
* is direct. However, dev-dax does not support O_DIRECT flag by design
|
|
* since it is not necessary.
|
|
*
|
|
* bs should adhere to the device dax alignment at minimally.
|
|
*
|
|
* libpmem.so
|
|
* By default, the dev-dax engine will let the system find the libpmem.so
|
|
* that it uses. You can use an alternative libpmem by setting the
|
|
* FIO_PMEM_LIB environment variable to the full path to the desired
|
|
* libpmem.so.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <limits.h>
|
|
#include <stdlib.h>
|
|
#include <unistd.h>
|
|
#include <errno.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/sysmacros.h>
|
|
#include <libgen.h>
|
|
#include <libpmem.h>
|
|
|
|
#include "../fio.h"
|
|
#include "../verify.h"
|
|
|
|
/*
|
|
* Limits us to 1GiB of mapped files in total to model after
|
|
* mmap engine behavior
|
|
*/
|
|
#define MMAP_TOTAL_SZ (1 * 1024 * 1024 * 1024UL)
|
|
|
|
struct fio_devdax_data {
|
|
void *devdax_ptr;
|
|
size_t devdax_sz;
|
|
off_t devdax_off;
|
|
};
|
|
|
|
static int fio_devdax_file(struct thread_data *td, struct fio_file *f,
|
|
size_t length, off_t off)
|
|
{
|
|
struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
|
|
int flags = 0;
|
|
|
|
if (td_rw(td))
|
|
flags = PROT_READ | PROT_WRITE;
|
|
else if (td_write(td)) {
|
|
flags = PROT_WRITE;
|
|
|
|
if (td->o.verify != VERIFY_NONE)
|
|
flags |= PROT_READ;
|
|
} else
|
|
flags = PROT_READ;
|
|
|
|
fdd->devdax_ptr = mmap(NULL, length, flags, MAP_SHARED, f->fd, off);
|
|
if (fdd->devdax_ptr == MAP_FAILED) {
|
|
fdd->devdax_ptr = NULL;
|
|
td_verror(td, errno, "mmap");
|
|
}
|
|
|
|
if (td->error && fdd->devdax_ptr)
|
|
munmap(fdd->devdax_ptr, length);
|
|
|
|
return td->error;
|
|
}
|
|
|
|
/*
|
|
* Just mmap an appropriate portion, we cannot mmap the full extent
|
|
*/
|
|
static int fio_devdax_prep_limited(struct thread_data *td, struct io_u *io_u)
|
|
{
|
|
struct fio_file *f = io_u->file;
|
|
struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
|
|
|
|
if (io_u->buflen > f->real_file_size) {
|
|
log_err("dev-dax: bs too big for dev-dax engine\n");
|
|
return EIO;
|
|
}
|
|
|
|
fdd->devdax_sz = min(MMAP_TOTAL_SZ, f->real_file_size);
|
|
if (fdd->devdax_sz > f->io_size)
|
|
fdd->devdax_sz = f->io_size;
|
|
|
|
fdd->devdax_off = io_u->offset;
|
|
|
|
return fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off);
|
|
}
|
|
|
|
/*
|
|
* Attempt to mmap the entire file
|
|
*/
|
|
static int fio_devdax_prep_full(struct thread_data *td, struct io_u *io_u)
|
|
{
|
|
struct fio_file *f = io_u->file;
|
|
struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
|
|
int ret;
|
|
|
|
if (fio_file_partial_mmap(f))
|
|
return EINVAL;
|
|
|
|
if (io_u->offset != (size_t) io_u->offset ||
|
|
f->io_size != (size_t) f->io_size) {
|
|
fio_file_set_partial_mmap(f);
|
|
return EINVAL;
|
|
}
|
|
|
|
fdd->devdax_sz = f->io_size;
|
|
fdd->devdax_off = 0;
|
|
|
|
ret = fio_devdax_file(td, f, fdd->devdax_sz, fdd->devdax_off);
|
|
if (ret)
|
|
fio_file_set_partial_mmap(f);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int fio_devdax_prep(struct thread_data *td, struct io_u *io_u)
|
|
{
|
|
struct fio_file *f = io_u->file;
|
|
struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
|
|
int ret;
|
|
|
|
/*
|
|
* It fits within existing mapping, use it
|
|
*/
|
|
if (io_u->offset >= fdd->devdax_off &&
|
|
io_u->offset + io_u->buflen < fdd->devdax_off + fdd->devdax_sz)
|
|
goto done;
|
|
|
|
/*
|
|
* unmap any existing mapping
|
|
*/
|
|
if (fdd->devdax_ptr) {
|
|
if (munmap(fdd->devdax_ptr, fdd->devdax_sz) < 0)
|
|
return errno;
|
|
fdd->devdax_ptr = NULL;
|
|
}
|
|
|
|
if (fio_devdax_prep_full(td, io_u)) {
|
|
td_clear_error(td);
|
|
ret = fio_devdax_prep_limited(td, io_u);
|
|
if (ret)
|
|
return ret;
|
|
}
|
|
|
|
done:
|
|
io_u->mmap_data = fdd->devdax_ptr + io_u->offset - fdd->devdax_off -
|
|
f->file_offset;
|
|
return 0;
|
|
}
|
|
|
|
static int fio_devdax_queue(struct thread_data *td, struct io_u *io_u)
|
|
{
|
|
fio_ro_check(td, io_u);
|
|
io_u->error = 0;
|
|
|
|
switch (io_u->ddir) {
|
|
case DDIR_READ:
|
|
memcpy(io_u->xfer_buf, io_u->mmap_data, io_u->xfer_buflen);
|
|
break;
|
|
case DDIR_WRITE:
|
|
pmem_memcpy_persist(io_u->mmap_data, io_u->xfer_buf,
|
|
io_u->xfer_buflen);
|
|
break;
|
|
case DDIR_SYNC:
|
|
case DDIR_DATASYNC:
|
|
case DDIR_SYNC_FILE_RANGE:
|
|
break;
|
|
default:
|
|
io_u->error = EINVAL;
|
|
break;
|
|
}
|
|
|
|
return FIO_Q_COMPLETED;
|
|
}
|
|
|
|
static int fio_devdax_init(struct thread_data *td)
|
|
{
|
|
struct thread_options *o = &td->o;
|
|
|
|
if ((o->rw_min_bs & page_mask) &&
|
|
(o->fsync_blocks || o->fdatasync_blocks)) {
|
|
log_err("dev-dax: mmap options dictate a minimum block size of %llu bytes\n",
|
|
(unsigned long long) page_size);
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int fio_devdax_open_file(struct thread_data *td, struct fio_file *f)
|
|
{
|
|
struct fio_devdax_data *fdd;
|
|
int ret;
|
|
|
|
ret = generic_open_file(td, f);
|
|
if (ret)
|
|
return ret;
|
|
|
|
fdd = calloc(1, sizeof(*fdd));
|
|
if (!fdd) {
|
|
int fio_unused __ret;
|
|
__ret = generic_close_file(td, f);
|
|
return 1;
|
|
}
|
|
|
|
FILE_SET_ENG_DATA(f, fdd);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int fio_devdax_close_file(struct thread_data *td, struct fio_file *f)
|
|
{
|
|
struct fio_devdax_data *fdd = FILE_ENG_DATA(f);
|
|
|
|
FILE_SET_ENG_DATA(f, NULL);
|
|
free(fdd);
|
|
fio_file_clear_partial_mmap(f);
|
|
|
|
return generic_close_file(td, f);
|
|
}
|
|
|
|
static int
|
|
fio_devdax_get_file_size(struct thread_data *td, struct fio_file *f)
|
|
{
|
|
char spath[PATH_MAX];
|
|
char npath[PATH_MAX];
|
|
char *rpath;
|
|
FILE *sfile;
|
|
uint64_t size;
|
|
struct stat st;
|
|
int rc;
|
|
|
|
if (fio_file_size_known(f))
|
|
return 0;
|
|
|
|
if (f->filetype != FIO_TYPE_CHAR)
|
|
return -EINVAL;
|
|
|
|
rc = stat(f->file_name, &st);
|
|
if (rc < 0) {
|
|
log_err("%s: failed to stat file %s (%s)\n",
|
|
td->o.name, f->file_name, strerror(errno));
|
|
return -errno;
|
|
}
|
|
|
|
snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/subsystem",
|
|
major(st.st_rdev), minor(st.st_rdev));
|
|
|
|
rpath = realpath(spath, npath);
|
|
if (!rpath) {
|
|
log_err("%s: realpath on %s failed (%s)\n",
|
|
td->o.name, spath, strerror(errno));
|
|
return -errno;
|
|
}
|
|
|
|
/* check if DAX device */
|
|
if (strcmp("/sys/class/dax", rpath)) {
|
|
log_err("%s: %s not a DAX device!\n",
|
|
td->o.name, f->file_name);
|
|
}
|
|
|
|
snprintf(spath, PATH_MAX, "/sys/dev/char/%d:%d/size",
|
|
major(st.st_rdev), minor(st.st_rdev));
|
|
|
|
sfile = fopen(spath, "r");
|
|
if (!sfile) {
|
|
log_err("%s: fopen on %s failed (%s)\n",
|
|
td->o.name, spath, strerror(errno));
|
|
return 1;
|
|
}
|
|
|
|
rc = fscanf(sfile, "%lu", &size);
|
|
if (rc < 0) {
|
|
log_err("%s: fscanf on %s failed (%s)\n",
|
|
td->o.name, spath, strerror(errno));
|
|
return 1;
|
|
}
|
|
|
|
f->real_file_size = size;
|
|
|
|
fclose(sfile);
|
|
|
|
if (f->file_offset > f->real_file_size) {
|
|
log_err("%s: offset extends end (%llu > %llu)\n", td->o.name,
|
|
(unsigned long long) f->file_offset,
|
|
(unsigned long long) f->real_file_size);
|
|
return 1;
|
|
}
|
|
|
|
fio_file_set_size_known(f);
|
|
return 0;
|
|
}
|
|
|
|
static struct ioengine_ops ioengine = {
|
|
.name = "dev-dax",
|
|
.version = FIO_IOOPS_VERSION,
|
|
.init = fio_devdax_init,
|
|
.prep = fio_devdax_prep,
|
|
.queue = fio_devdax_queue,
|
|
.open_file = fio_devdax_open_file,
|
|
.close_file = fio_devdax_close_file,
|
|
.get_file_size = fio_devdax_get_file_size,
|
|
.flags = FIO_SYNCIO | FIO_DISKLESSIO | FIO_NOEXTEND | FIO_NODISKUTIL,
|
|
};
|
|
|
|
static void fio_init fio_devdax_register(void)
|
|
{
|
|
register_ioengine(&ioengine);
|
|
}
|
|
|
|
static void fio_exit fio_devdax_unregister(void)
|
|
{
|
|
unregister_ioengine(&ioengine);
|
|
}
|