^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
BE ENGINEERING INSIGHTS: Doing File I/O From A Device Driver
By Dmitriy Budko -- <dmitriy@be.com>
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Many BeOS developers ask if it is possible to do a file or
/dev/* I/O from a kernel driver. This is a very reasonable
question. On other OSes it's complicated: one has to use
special unfamiliar functions like Windows 95/98
IFSMgr_Ring0_FileIO set (OpenCreateFile(), ReadFile(),
WriteAbsoluteDisk(), etc.) or Windows NT ZwCreatefile(),
ZwReadFile(), etc. A small example from the Microsoft
Windows NT DDK:

ntStatus = ZwCreateFile( &NtFileHandle,
  SYNCHRONIZE | FILE_READ_DATA,
  &ObjectAttributes,
  &IoStatus,
  NULL,   // alloc size = none
  FILE_ATTRIBUTE_NORMAL,
  FILE_SHARE_READ,
  FILE_OPEN,
  FILE_SYNCHRONOUS_IO_NONALERT,
  NULL, // eabuffer
  0 );  // ealength

Can you understand without extensive comments what is going
on here?

Or from the NT DDK documentation:

NTSTATUS ZwReadFile(
  IN HANDLE FileHandle,
  IN HANDLE Event OPTIONAL,
  IN PIO_APC_ROUTINE ApcRoutine OPTIONAL,
  IN PVOID ApcContext OPTIONAL,
  OUT PIO_STATUS_BLOCK IoStatusBlock,
  OUT PVOID Buffer,
  IN ULONG Length,
  IN PLARGE_INTEGER ByteOffset OPTIONAL,
  IN PULONG Key OPTIONAL
  );


Under BeOS it's much easier: a driver can call the standard
POSIX low-level I/O functions: open(), close(), read(),
write(), etc.

Here is the simple driver that uses these functions and
provides very simple encryption capabilities. It publishes a
"secure" device in /dev/misc/cryptodevice. Programs can
read/write to it as it was a normal file but the data is
scrambled and stored in the normal
/boot/home/cryptod_storage file. The source code, makefile,
installation script, PPC and x86 BeIDE projects are at

<ftp://ftp.be.com/pub/samples/drivers/cryptodevice.zip>

#include <OS.h>
#include <KernelExport.h>
#include <Drivers.h>
#include <unistd.h>
#include <string.h>

int fh;
const char*  file_name  = "/boot/home/cryptod_storage";
const char*  key_string = "VERY lousy encryption";

static void
encrypt(uchar* buf, size_t len, off_t pos)
{
  size_t i;
  const size_t  key_len = strlen(key_string);

  for(i=0; i<len; i++)
    buf[i] ^= key_string[((unsigned)(pos+i))%key_len];
}

static void
decrypt(uchar* buf, size_t len, off_t pos)
{
  encrypt(buf, len, pos);
}

static status_t
cryptod_open (const char *name, uint32 flags, void **cookie)
{
  dprintf("cryptod: open()\n");
  return B_OK;
}

static status_t
cryptod_close (void *cookie)
{
  dprintf("cryptod: close()\n");
  return B_OK;
}

static status_t
cryptod_free (void *cookie)
{
  dprintf("cryptod: free()\n");
  return B_OK;
}

static status_t
cryptod_read (void *cookie, off_t pos, void *buf, size_t *len)
{
  dprintf("cryptod: read(%Ld, %d)\n", pos, *len);

  if(-1 == lseek(fh, pos, SEEK_SET))
  {
    *len = 0;
    return B_DEV_SEEK_ERROR;
  }

  if (-1 == (*len = read(fh, buf, *len)))
  {
    *len = 0;
    return B_DEV_READ_ERROR;
  }
  decrypt((uchar*)buf, *len, pos);
  return B_OK;
}

static status_t
cryptod_write (void *cookie, off_t pos, const void *buf,
  size_t *len)
{
  dprintf("cryptod: write(%Ld, %Ld)\n", pos, *len);

  if(-1 == lseek(fh, pos, SEEK_SET))
  {
    *len = 0;
    return B_DEV_SEEK_ERROR;
  }

  encrypt((uchar*)buf, *len, pos);

  if (-1 == (*len = write(fh, buf, *len)))
  {
    *len = 0;
    return B_DEV_WRITE_ERROR;
  }
  return B_OK;
}

static status_t
cryptod_readv (void *cookie, off_t pos, const iovec *vec,
  size_t count, size_t *len)
{
  size_t  i;
  off_t  cur_pos;

  dprintf("cryptod: readv(%Ld, %d, %d)\n", pos, count, *len);

  if(-1 == lseek(fh, pos, SEEK_SET))
  {
    *len = 0;
    return B_DEV_SEEK_ERROR;
  }

  if (-1 == (*len = readv(fh, vec, count)))
  {
    *len = 0;
    return B_DEV_READ_ERROR;
  }

  for(cur_pos=pos,i=0; i<count; i++)
  {
    decrypt((uchar*)vec[i].iov_base, vec[i].iov_len, cur_pos);
    cur_pos += vec[i].iov_len;
  }
  return B_OK;
}

static status_t
cryptod_writev (void *cookie, off_t pos, const iovec *vec,
  size_t count, size_t *len)
{
  size_t  i;
  off_t  cur_pos;

  dprintf("cryptod: writev(%Ld, %d, %d)\n", pos, count, *len);

  if(-1 == lseek(fh, pos, SEEK_SET))
  {
    *len = 0;
    return B_DEV_SEEK_ERROR;
  }

  if (-1 == (*len = writev(fh, vec, count)))
  {
    *len = 0;
    return B_DEV_WRITE_ERROR;
  }

  for(cur_pos=pos,i=0; i<count; i++)
  {
    encrypt((uchar*)vec[i].iov_base, vec[i].iov_len, cur_pos);
    cur_pos += vec[i].iov_len;
  }
  return B_OK;
}

static status_t
cryptod_control(void *cookie, uint32 msg, void *buf,
  size_t len)
{
  return B_DEV_INVALID_IOCTL;
}

static device_hooks cryptod_device = {
  &cryptod_open,
  &cryptod_close,
  &cryptod_free,
  &cryptod_control,
  &cryptod_read,
  &cryptod_write,
  NULL,      /* select */
  NULL,      /* deselect */
  cryptod_readv,
  cryptod_writev
};

static char *cryptod_name[] = {
  "misc/cryptodevice",
  NULL
};

status_t
init_driver()
{
  dprintf("cryptod: init_driver(), %s, %s\n",
    __DATE__, __TIME__);

  if(-1 == (fh=open(file_name, O_RDWR| O_CREAT)))
    return B_ERROR;
  return B_OK;
}

void
uninit_driver()
{
  dprintf("cryptod: uninit_driver()\n");
  close(fh);
}

const char **
publish_devices()
{
  return (const char **)&cryptod_name;
}

device_hooks *
find_device(const char *name)
{
  return &cryptod_device;
}

The driver just passes all read/write request to the file
system. Everything should be obvious to any C/DOS/POSIX
programmer except two functions: readv()/writev(). They are
common extensions to POSIX and are used to read/write
contiguous portion of a file from/to many buffers in one
system (or file system) call. These functions may provide
better performance in many cases then multiple calls to
read()/write().

>From Linux man pages, with a few changes:

#include <sys/uio.h>

int readv(
  int fd, const struct iovec *vector, size_t count);

int writev(
  int fd, const struct iovec *vector, size_t count);

struct iovec {
  __ptr_t iov_base; /* Starting address. */
  size_t iov_len; /* Length in bytes. */
  };


Description

readv reads data from file descriptor fd, and puts the
the order specified. Operates just like read except that
data is put in vector instead of a contiguous buffer.

writev writes data to file descriptor fd, and from the
buffers described by vector. The number of buffers is
specified by count. The buffers are used in the order
specified. Operates just like write except that data is
taken from vector instead of a contiguous buffer.


Return Value

On success readv returns the number of bytes read. On
success writev returns the number of bytes written. On
error, -1 is returned, and errno is set appropriately.

If you want to initialize and mount a file system on the
encrypted device then you will have to:

1) Use a raw device or partition like
   /dev/disk/ide/ata/1/master/0/0_1 as the backing storage
   for the data.
2) Change the published device name to /dev/disk/foo/bar.
3) Implement ioctl() handlers for the standard requests
   for a mass storage device. See old RAMDrive as an example:
   <ftp://ftp.be.com/pub/samples/drivers/obsolete/ramdrive.zip>.

Unfortunately, in current versions of the BeOS you can *not*
mount a file system over such device if it uses a file on a
file system as the backing storage. A deadlock will occur.
This will be fixed in a future version of the BeOS.
