yorick banner

Home

Manual

Packages

Global Index

Keywords

Quick Reference


/*
 * yeti_yhdf.i --
 *
 *	Implement support for Yeti Hierarchical Data File.
 *
 *-----------------------------------------------------------------------------
 *
 *	Copyright (C) 2002 Eric THIEBAUT.
 *
 *	This file is part of Yeti.
 *
 *	Yeti is  free software;  you can redistribute  it and/or  modify it
 *	under the terms  of the GNU General Public  License as published by
 *	the Free Software  Foundation; either version 2 of  the License, or
 *	(at your option) any later version.
 *
 *	Yeti is distributed in the hope that it will be useful, but WITHOUT
 *	ANY WARRANTY; without even  the implied warranty of MERCHANTABILITY
 *	or FITNESS  FOR A PARTICULAR  PURPOSE.  See the GNU  General Public
 *	License for more details.
 *
 *	You should have  received a copy of the  GNU General Public License
 *	along with  Yeti (file "COPYING"  in the top source  directory); if
 *	not, write to the Free  Software Foundation, Inc., 59 Temple Place,
 *	Suite 330,  Boston, MA  02111-1307 USA.
 *
 *-----------------------------------------------------------------------------
 *
 * History:
 *	$Id$
 *	$Log$
 */

func yhd_save (filename, obj, keylist, .., comment=, encoding=, overwrite=)
/* DOCUMENT yhd_save, filename, obj;
       -or- yhd_save, filename, obj, keylist, ...;
     Save contents of hash object OBJ into the Yeti Hierarchical Data (YHD)
     file FILENAME.  If additional arguments are provided, they are the
     names of members to save.  The default is to save every member.

     Keyword COMMENT can be used to store a (short) string comment in the
     file header.  The comment is truncated if it is too long (more than
     about 130 bytes) to fit into the header.  COMMENT must not contain
     any DEL (octal 177) character.

     Keyword ENCODING can be used to specify a particular binary data
     format for the file; ENCODING can be the name of some known data
     format (see get_encoding) or an array of 32 integers (see
     set_primitives).  The default is to use the native data format.

     If keyword OVERWRITE is true and file FILENAME already exists, the new
     file will (silently) overwrite the old one; othwerwise, file FILENAME
     must not already exists (defalut behaviour).

   SEE ALSO
     yhd_restore, yhd_info, yhd_check, get_encoding, set_primitives, h_new.

   DESCRIPTION OF YHD FILE FORMAT
     A YHD file consists in a header (256 bytes) followed by any number of
     records (one record for each member of the saved hash_table object).

     The file header is a 256 character array filled with a text string
     padded with nulls:
         YetiHD-VERSION (DATE)\n
         ENCODING\n
         COMMENT\n
     where VERSION is the version number (an integer); DATE is the creation
     date of the file (see Yorick builtin timestamp); ENCODING is a
     human-readable array of 32 integers separated by commas and enclosed
     in square brackets (ie.: [n1,n2,....,n32]); COMMENT is an optional
     comment string.

     All binary data of a YHD file is written following the ENCODING format
     of the file.

     The format of a record is as follow:
     | Number  Type Name     Description
     | -----------------------------------------------------------------------
     |      1 long  TYPE     data type of record
     |      1 long  IDSIZE   number of bytes in member identifier (may be 0)
     |      1 long  RANK     number of dimensions, 0 if scalar/non-array
     |   RANK long  DIMLIST  dimension list (absent if RANK is 0)
     | IDSIZE char  IDENT    identifier of record (see below)
     |   *special*  DATA     binary data of the record

     TYPE is: <0 - string array      4 - long array
     |         0 - void              5 - float array
     |         1 - char array        6 - double array
     |         2 - short array       7 - complex array
     |         3 - int array         8 - pointer array
     For string array, TYPE is strictly less than zero and is minus the
     number of characters needed to represent all elements of the array in
     packed form (more on this below).  Void objects are also used to
     represent NULL pointer data -- this means that a NULL pointer element
     takes 3*sizeof(long) bytes to be stored in the file, which may be an
     issue if you use large pointer array sparsely filled with data.

     IDENT is the full name of the member: it is a IDSIZE char array
     where null characters are used to separate submember names and with
     a final null.  If IDSIZE=0, no IDENT is written.

     Arrays of strings are written in packed form, every strings being
     prefixed with '\1' (nil string) or '\2' (non-nil string) and suffixed
     with '\0', hence:
     |  '\1' '\0'       (2 bytes) for a nil string
     |  '\2' ... '\0'   (2+LEN bytes) for a string of length LEN
     this is needed to distinguish nil-string from "".

     The data part of an arrays of pointers consists in anonymous records
     (records with IDSIZE=0 and no IDENT) for each element of the array. */
{
  /* Declaration of variables that will be inherited by subroutines called
     by this routine (not really necessary, but just to make this clear). */
  local file, address, elsize;

  /* Set some 'constants'. */
  YHD_HEADER_SIZE = 256;
  YHD_VERSION = 1; // version number

  /* Get list of members to save. */
  if (! is_hash(obj)) error, "expecting hash_table object";
  while (more_args()) grow, keylist, next_arg();
  if (! is_void(keylist) && structof(keylist) != string)
    error, "invalid member list";

  /* Check COMMENT string. */
  if (is_void(comment)) comment = "";
  else if (strmatch(comment, "\177")) error, "invalid character in COMMENT";

  /* Create binary file with correct primitives and avoid log-file. */
  if (! overwrite && open(filename,"r",1))
    error, "file \"" + filename + "\" already exists";
  logname = filename + "L";
  remove_log = (open(logname, "r", 1) ? 0n : 1n);
  file = open(filename, "wb");
  if (remove_log) remove, logname;
  if (is_void(encoding)) encoding = "native";
  if (structof(encoding) == string) encoding = get_encoding(encoding);
  install_encoding, file, encoding;
  save, file, complex; /* install the definition of a complex */

  /* Write header. */
  ident = swrite(format="YetiHD-%d (%s)\n[%d",
                 YHD_VERSION, timestamp(), encoding(1));
  for (i=2 ; i<=32 ; ++i) ident += swrite(format=",%d", encoding(i));
  maxlen = YHD_HEADER_SIZE - 3 - strlen(ident);
  if (strlen(comment) > maxlen) {
    __yhd_warn, "too long comment get truncated";
    comment = strpart(comment, 1:maxlen);
  }
  ident += swrite(format="]\n%s\n", comment);
  len = strlen(ident);
  (hdr = array(char, YHD_HEADER_SIZE))(1:len) = (*pointer(ident))(1:len);
  address = 0;
  _write, file, address, hdr;
  address += YHD_HEADER_SIZE;

  /* Build table of size of primary data types in file encoding. */
  elsize = [encoding(1), encoding(4), encoding(7), encoding(10),
            encoding(13), encoding(16), 2*encoding(16)];

  /* Save members. */
  __yhd_save_hash, obj, [], keylist;
}

func __yhd_save_member (data, ident)
{
  /**/extern file, address, elsize;
  long_size = elsize(4); /* sizeof(long) in file encoding */
  idsize = numberof(ident);
  if ((s = structof(data)) == char) {
    type = 1;
  } else if (s==short) {
    type = 2;
  } else if (s==int) {
    type = 3;
  } else if (s==long) {
    type = 4;
  } else if (s==float) {
    type = 5;
  } else if (s==double) {
    type = 6;
  } else if (s==complex) {
    type = 7;
  } else if (s==pointer) {
    type = 8;
  } else if (s==string) {
    /* For string arrays, TYPE is minus the number of character written
       to the file. */
    type = -(2*numberof(data) + sum(strlen(data)));
  } else if (is_hash(data)) {
    __yhd_save_hash, data, ident;
    return; /* end for non-array */
  } else {
    /* Void or unsupported data type. */
    if (! is_void(data)) {
      if (idsize) {
        /* hash table member */
        __yhd_warn, "unsupported data type: ", typeof(data),
          " for member \"", __yhd_member_name(ident),
          "\" - replaced by void data";
      } else {
        /* element of a pointer array: save NULL pointer */
        __yhd_warn, "unsupported data type: ", typeof(data),
          " - replaced by NULL pointer element";
      }
    }
    _write, file, address, [0, idsize, 0]; /* type, idsize, rank */
    address += 3*long_size;
    if (idsize) {
      _write, file, address, ident;
      address += idsize;
    }
    return; /* end for non-array */
  }

  /* Write array record header. */
  dimlist = dimsof(data);
  number = numberof(data);
  header = array(long, 2+numberof(dimlist));
  header(1) = type;
  header(2) = idsize;
  header(3:) = dimlist;
  _write, file, address, header;
  address += long_size*numberof(header);
  if (idsize) {
    _write, file, address, ident;
    address += idsize;
  }

  /* Write data array. */
  if (type < 0) {
    /* string array */
    nil = ['\1', '\0'];
    for (i=1 ; i<=number ; ++i) {
      c = *pointer(data(i));
      n =  numberof(c);
      if (n) {
        _write, file, address++, '\2';
        _write, file, address, c;
        address += n;
      } else {
        _write, file, address, nil;
        address += 2;
      }
    }
  } else if (type == 8) {
    /* pointer data array */
    for (i=1 ; i<=number ; ++i) __yhd_save_member, *data(i); /* no ident */
  } else {
    /* Numerical data array. */
    _write, file, address, data;
    address += elsize(type)*number;
  }
}

func __yhd_save_hash (hash, prefix, keylist)
{
  /* Recusively save all/some chidren of current member. */
  if (is_void(keylist)) {
    keylist = h_keys(hash);
    if (is_array(keylist)) keylist = keylist(sort(keylist));
  }
  n = numberof(keylist);
  for (i=1 ; i<=n ; ++i) {
    key = keylist(i);
    ident = prefix;
    grow, ident, *pointer(key);
    __yhd_save_member, h_get(hash, key), ident;
  }
}

func yhd_check (file, &version, &date, &encoding, &comment)
/* DOCUMENT yhd_check(file);
       -or- yhd_check(file, version, date, encoding, comment);
     Return 1 (true) if FILE is a valid YHD file; otherwise return 0
     (false).  The nature of FILE is guessed by reading its header.  Input
     argument FILE can be a file name (scalar string) of a binary file
     stream opened for reading; all other arguments are pure outputs and
     may be omitted (if result is false, the contents of these outputs is
     undetermined).

   SEE ALSO yhd_info, yhd_save, yhd_restore. */
{
  /* Read header array. */
  YHD_HEADER_SIZE = 256;
  if (structof(file) == string) file = open(file, "rb");
  hdr = array(char, YHD_HEADER_SIZE);
  if (_read(file, 0, hdr) != YHD_HEADER_SIZE) return 0n;
  hdrstr = string(&hdr);

  /* Parse header string (hopefully the DEL character (octal 177) is
     not part of the comment string). */
  comment = date = token = string();
  encoding = array(long, 32);
  version = 0;
  if (sread(hdrstr, format="YetiHD-%d (%[^)])\n[%[^]]]\n%[^\177]",
            version, date, token, comment) >= 3) {
    /* Parse encoding array. */
    value = 0;
    for (i=1 ; i<32 ; ++i) {
      if (sread(token, format="%d ,%[^]]", value, token) != 2) break;
      encoding(i) = value;
    }
    if (i==32 && sread(token, format="%d %[^]]", value, token) == 1) {
      /* Finalize encoding array and comment string. */
      encoding(i) = value;
      if (strpart(comment, 0:0) == "\n") comment = strpart(comment, :-1);
      return 1n;
    }
  }
  return 0n;
}

func yhd_info (file)
/* DOCUMENT yhd_info, file;
     Print out some information about YHD file.  FILE can be a file
     name (scalar string) of a binary file stream opened for reading.

   SEE ALSO yhd_check, yhd_restore, yhd_save. */
{
  local version, date, encoding, comment;
  if (! yhd_check(file, version, date, encoding, comment)) {
    error, (structof(file) == string ? "\""+file+"\" is not a valid YHD file"
            : "invalid YHD file");
  }
  write, format="%s:\n  version = %d\n  date    = %s\n  comment = %s\n",
    (structof(file) == string ? file : "YHD file"), version, date, comment;
}

func yhd_restore (filename, keylist, ..)
/* DOCUMENT yhd_restore(filename);
       -or- yhd_restore(filename, keylist, ...);
     Restore and return hash table object saved in YHD file FILENAME.  If
     additional arguments are provided, they are the names of members to
     restore.  The default is to restore every member.

   SEE ALSO yhd_check, yhd_info, yhd_save. */
{
  /* Declaration of variables that will be inherited by subroutines called
     by this routine (not really necessary, but just to make this clear). */
  local file, address, elsize, type, dimlist, ident;

  /* List of members to restore. */
  while (more_args()) grow, keylist, next_arg();
  if (! is_void(keylist) && structof(keylist) != string)
    error, "invalid member list";

  /* Open file, read header and set primitives. */
  local version, date, encoding, comment;
  file = open(filename, "rb");
  if (! yhd_check(file, version, date, encoding, comment))
    error, "\""+filename+"\" is not a valid YHD file";
  if (version != 1)
    error, swrite(format="unsupported YHD file version: %d", version);
  install_encoding, file, encoding;
  save, file, complex; /* install the definition of a complex */
  address = 256; /* header has already been read */

  /* Build table of size of primary data types in file encoding. */
  elsize = [encoding(1), encoding(4), encoding(7), encoding(10),
            encoding(13), encoding(16), 2*encoding(16)];

  /* Read contents of file. */
  obj = h_new();
  for (;;) {
    /* Read header of next member. */
    if (! __yhd_read_member_header(ident, type, dimlist))
      return obj; /* normal end-of-file */

    /* Skip member if not in KEYLIST. */
    key = string(&ident);
    if (! is_void(keylist) && noneof(keylist == key)) {
      __yhd_restore_data, type, dimlist, 1;
      continue;
    }

    /* Convert identifier to OWNER-KEY pair. */
    owner = obj;
    i = where(! ident);
    n = numberof(i);
    k2 = i(1);
    for (j=2 ; j<=n ; ++j) {
      if (h_has(owner, key)) {
        owner = h_get(owner, key);
        if (! is_hash(owner)) {
          error, "inconsistent hierarchical member \""+
            __yhd_member_name(ident)+"\"";
        }
      } else {
        h_set, owner, key, (tmp = h_new());
        owner = tmp;
      }
      k1 = k2 + 1;
      k2 = i(j);
      key = string(&ident(k1:k2));
    }
    if (h_has(owner, key)) __yhd_warn, "duplicate member \"",
                             __yhd_member_name(ident), "\" in YHD file";
    h_set, owner, key, __yhd_restore_data(type, dimlist);
  }
  return obj
}

func __yhd_read_member_header (&ident, &type, &dimlist, pt)
{
  /**/extern file, address, elsize;

  /* Figure out if there is anything else to read. */
  tmp = 'a';
  if (! _read(file, address, tmp)) return 0; /* normal end-of-file */

  /* Read record header */
  long_size = elsize(4); /* sizeof(long) in file encoding */
  char_size = elsize(1); /* sizeof(char) in file encoding */
  tmp = __yhd_read(long_size, long, 3);
  type = tmp(1);
  idsize = tmp(2);
  rank = tmp(3);
  if (type) {
    if (rank < 0) error, "bad RANK in record header of YHD file";
    dimlist = array(rank, rank+1);
    if (rank) dimlist(2:) = __yhd_read(long_size, long, rank);
  } else {
    /* Void object. */
    dimlist = [];
    if (rank) error, "bad RANK in record header of YHD file";
  }
  if (pt) {
    /* Element of pointer array: IDSIZE must be 0. */
    if (idsize) error, "unexpected named member in YHD file";
  } else {
    if (idsize < 1) error, "unexpected anonymous member in YHD file";
    ident = __yhd_read(char_size, char, idsize);
  }
  return 1;
}

func __yhd_restore_data (type, dimlist, skip)
{
  /**/extern file, address, elsize;

  if (type >= 1 && type <= 7) {
    /* Numerical array data. */
    size = elsize(type);
    if (skip) {
      n = numberof(dimlist);
      for (i=2 ; i<=n ; ++i) size *= dimlist(i);
      address += size;
      return;
    }
    return __yhd_read(size, (type==1?char:
                             (type==2?short:
                              (type==3?int:
                               (type==4?long:
                                (type==5?float:
                                 (type==6?double:complex)))))), dimlist);
  }

  if (type < 0) {
    /* Restore string array.  For string arrays, TYPE is minus the number
       of character written to the file. */
    char_size = elsize(1); /* sizeof(char) in file encoding */
    count = -type;
    if (skip) {
      address += char_size*count;
      return;
    }
    c = __yhd_read(char_size, char, count);
    j = where(! c);
    data = array(string, dimlist);
    number = numberof(data);
    if (numberof(j) != number) {
      __yhd_warn, "bad string array in file (elements left empty)";
    } else {
      k1 = 1;
      for (i=1 ; i<=number ; ++i) {
        k2 = j(i);
        if (c(k1) == '\2') data(i) = string(&c(k1+1:k2));
        k1 = k2+1;
      }
    }
    return data;
  }

  if (type == 8) {
    /* Pointer array. */
    local etype, edims; /* type and dimension list for every element */
    ptr = array(pointer, dimlist);
    number = numberof(ptr);
    for (i=1 ; i<=number ; ++i) {
      if (! __yhd_read_member_header(/*not needed*/, etype, edims, 1)) {
        __yhd_warn, "short YHD file (unterminated pointer array)";
        break;
      }
      tmp = __yhd_restore_data(etype, edims, skip);
      //write, format="%3d:", i; info, tmp;
      if (! skip && etype) ptr(i) = &tmp;
    }
    if (skip) return;
    return ptr;
  }

  if (type) error, "invalid TYPE in record header of YHD file";
}

func __yhd_read (element_size, data_type, dimlist)
{
  /**/extern file, address;
  data = array(data_type, dimlist);
  nbytes = element_size*numberof(data);
  if (data_type != char) _read, file, address, data;
  else if (_read(file, address, data) != nbytes) error, "short file";
  address += nbytes;
  return data;
}

func __yhd_warn (s, ..)
{
  while (more_args()) s += next_arg();
  write, format="WARNING - %s\n", s;
}

func __yhd_member_name (ident, separator)
{
  name = string(&ident);
  i = where(! ident);
  if ((n = numberof(i)) >= 2) {
    if (is_void(separator)) separator = ".";
    k2 = i(1);
    for (j=2 ; j<=n ; ++j) {
      k1 = k2 + 1;
      k2 = i(j);
      name += separator + string(&ident(k1:k2));
    }
  }
  return name;
}

func __yhd_insert_dim (dimlist, first_dim)
{
  newlist = 0;
  grow, newlist, dimlist;
  newlist(1) = numberof(newlist)-1;
  newlist(2) = first_dim;
  return newlist;
}

/*---------------------------------------------------------------------------*/