Group
Extension

lsn-data-hub/lib/Data/Hub/FileSystem/Node.pm

package Data::Hub::FileSystem::Node;
use strict;
use Tie::Hash;
use Perl::Module;
use Error::Programatic;
use Error::Logical;
use Data::OrderedHash;
use Data::Hub::Util qw(:all);
use Data::Hub::Container;
use Data::Hub::FileSystem::AccessLog;
use Fcntl qw(S_ISREG S_ISDIR S_ISBLK S_ISCHR S_IFMT);
push our @ISA, qw(Tie::ExtraHash Data::Hub::Container);

our $VERSION = 0.1;

# Implementing classes (FileSystem handlers)
our @Handlers = ();

# ------------------------------------------------------------------------------
# Add_Handler - Add a filesystem node handler
# Add_Handler 'Package::Name', %criteria
#
# criteria:
#
#   -type=d         # Directory
#   -type=f         # Regular file
#   -type=T         # Text file
#   -type=B         # Binary file
#   -type=!         # Does not exist
#   -path=$regex    # Path (with filename) matches $regex
#   -crown=$regex   # File's crown matches $regex
#
# ------------------------------------------------------------------------------

sub Add_Handler {
  my ($opts, $pkg) = my_opts(\@_);
  my ($fn) = $pkg =~ /^([\d\w:]+)/;
  $fn =~ s/::/\//g;
  $fn .= '.pm';
  require $fn;
  unshift @Handlers, {pkg=>$pkg, %$opts};
}

# Don't try to require the module
sub Add_Handler2 {
  my ($opts, $pkg) = my_opts(\@_);
  unshift @Handlers, {pkg=>$pkg, %$opts};
}

sub Get_Pkg {
  my $path = shift;
  my $default = shift || __PACKAGE__;
  my $stat = shift || str_ref();
  tie my $crown, 'Data::Hub::FileSystem::Node::Crown', $path;
  $$stat ||= stat $path;
  my $handler = grep_first {_Match($_, $path, \$crown)} @Handlers;
  return defined $handler ? $handler->{pkg} : $default;
}

sub _Match {
  my ($h, $path, $crown) = @_;
  if (defined $$h{'type'}) {
    return 0 if ($$h{'type'} eq 'd' && ! -d _);
    return 0 if ($$h{'type'} eq 'T' && ! -T _);
    return 0 if ($$h{'type'} eq 'B' && ! (-B _ && ! -d _));
    return 0 if ($$h{'type'} eq 'f' && ! -f _);
    return 0 if ($$h{'type'} eq '!' && -e _);
  }
  if (defined $$h{'path'}) {
    return 0 if ($path !~ /$$h{'path'}/i);
  }
  if (defined $$h{'crown'}) {
    return 0 if ($$crown !~ /$$h{'crown'}/);
  }
  1;
}

# ------------------------------------------------------------------------------
# Handlers added first are matched last.
# ------------------------------------------------------------------------------

# Core handlers
Add_Handler(q/Data::Hub::FileSystem::File/,        '-type=f');
Add_Handler(q/Data::Hub::FileSystem::BinaryFile/,  '-type=B');
Add_Handler(q/Data::Hub::FileSystem::Directory/,   '-type=d');
Add_Handler(q/Data::Hub::FileSystem::TextFile/,    '-type=T');
Add_Handler(q/Data::Hub::FileSystem::TextFile/,    '-type=!', '-path=\.txt$');

# PerlModule
Add_Handler(q/Data::Hub::FileSystem::PerlModule/,  '-type=T', '-crown=^#\s*PerlModule\b');

# HashFile
Add_Handler(q/Data::Hub::FileSystem::HashFile/,    '-type=!', '-path=\.hf$');
Add_Handler(q/Data::Hub::FileSystem::HashFile/,    '-type=T', '-path=\.hf$');
Add_Handler(q/Data::Hub::FileSystem::HashFile/,    '-type=T', '-crown=^#\s*HashFile\b');

# JSONFile
Add_Handler(q/Data::Hub::FileSystem::JSONFile/,    '-type=!', '-path=\.json$');
Add_Handler(q/Data::Hub::FileSystem::JSONFile/,    '-type=T', '-path=\.json$');

# YAMLFile
Add_Handler(q/Data::Hub::FileSystem::YAMLFile/,    '-type=!', '-path=\.ya?ml$');
Add_Handler(q/Data::Hub::FileSystem::YAMLFile/,    '-type=T', '-path=\.ya?ml$');

# Images
Add_Handler(q/Data::Hub::FileSystem::ImageFile/,    '-type=B', '-path=\.(jpe?g|gif|png)');

# Fixes (binary files which aren't always picked up by -B)
Add_Handler(q/Data::Hub::FileSystem::BinaryFile/,  '-path=\.(pdf|run)$');

# ------------------------------------------------------------------------------
# new - Consructor
# new $path
# new $path, \%Data::Hub
# ------------------------------------------------------------------------------

sub new {
  my $pkg = ref($_[0]) ? ref(shift) : shift;
  my $path = shift;
  my $stat = undef;

  if ($pkg eq __PACKAGE__) {
    $pkg = Get_Pkg($path, undef, \$stat);
  } else {
    $stat = stat $path;
  }

  my $self = bless {}, $pkg;
  tie %$self, $pkg, $path, $stat, @_;

  return $self;
}

# ------------------------------------------------------------------------------
# get_path - Return the full physical path to this node.
# get_path
# Base method.
# ------------------------------------------------------------------------------

sub get_path {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  tied(%$self)->__path;
}

# ------------------------------------------------------------------------------
# get_addr - Return the full physical path to this node.
# get_addr
# Base method.
# ------------------------------------------------------------------------------

sub get_addr {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  tied(%$self)->__addr;
}

# ------------------------------------------------------------------------------
# get_name - Return the file name of this node.
# get_name
# Base method.
# ------------------------------------------------------------------------------

sub get_name {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  addr_name(tied(%$self)->__addr);
}

# ------------------------------------------------------------------------------
# get_type - Return the Hub type signature for this node
# get_type
# Base method.
# ------------------------------------------------------------------------------

sub get_type {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  tied(%$self)->__scan();
  return typeof(tied(%$self)->__path, $self);
}

# ------------------------------------------------------------------------------
# set_data - Set the data segment of this node
# ------------------------------------------------------------------------------

sub set_data {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  my $value = shift;
  throw Error::Programatic 'provide a hash' unless isa($value, 'HASH');
  tied(%$self)->__scan();
  my $data = tied(%$self)->__data;
  %$data = %$value;
}

# ------------------------------------------------------------------------------
# get_data - Return the data segment of this node
# ------------------------------------------------------------------------------

sub get_data {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  tied(%$self)->__scan();
  tied(%$self)->__data();
}

# ------------------------------------------------------------------------------
# set_content - Set the content segment of this node
# ------------------------------------------------------------------------------

sub set_content {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  tied(%$self)->__scan();
  tied(%$self)->__content(@_);
}

# ------------------------------------------------------------------------------
# get_content - Return the content segment of this node
# ------------------------------------------------------------------------------

sub get_content {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  tied(%$self)->__scan();
  tied(%$self)->__content();
}

# ------------------------------------------------------------------------------
# set_raw_content - Set the literal contents
# ------------------------------------------------------------------------------

sub set_raw_content {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  tied(%$self)->__scan();
  tied(%$self)->__raw_content(@_);
}

# ------------------------------------------------------------------------------
# get_raw_content - Return the literal contents
# ------------------------------------------------------------------------------

sub get_raw_content {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  tied(%$self)->__scan();
  tied(%$self)->__raw_content();
}

# ------------------------------------------------------------------------------
# get_stat - Get the stat record for this node
# get_stat
# ------------------------------------------------------------------------------

sub get_stat {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  my $tied = tied(%$self);
  defined($tied->__checkpoint) && $tied->__checkpoint == ${$tied->__global_cp}
    ? $tied->__stat : stat $tied->__path;
}

# ------------------------------------------------------------------------------
# get_mtime - Modified time of persistent storage
# get_mtime
# ------------------------------------------------------------------------------

sub get_mtime {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  tied(%$self)->__scan;
  tied(%$self)->__disk_mtime;
}

sub rename {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  my $new_name = shift;
  throw Error::IllegalArg unless defined($new_name);
  my $tied = tied(%$self);
  my $path = $tied->__path;
  my $cwd = path_parent($path);
  my $new_path = addr_join($cwd, $new_name);
  throw Error::Logical 'Path mismatch' unless $cwd eq path_parent($new_path);
  rename $path, $new_path;
  $tied->__path($new_path);
  my $stat = stat $tied->__path or die "No stat after rename";
  $tied->__stat($stat);
  $tied->__mtime(int(time));
  $tied->__disk_mtime($stat ? $stat->mtime : undef);
  $self;
}

# ------------------------------------------------------------------------------
# save - Save changes made to this node
# ------------------------------------------------------------------------------

sub save {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  my $tied = tied(%$self);
  $tied->__scan();
  return if (-e $tied->__path && !$tied->__mtime); # Have not read
#$tied->__errlog( " <write> " . $tied->__path . "\n");
  $tied->__write_to_disk();
  my $stat = stat $tied->__path or die "No stat after save";
  $tied->__stat($stat);
  $tied->__mtime(int(time));
  $tied->__disk_mtime($stat ? $stat->mtime : undef);
  $tied->__track_change();
  unless(isa($self, Get_Pkg($tied->__path, undef, \$stat))) {
    # We are no longer a valid package for this path
    $tied->__checkpoint(0);
  }
  $self;
}

# ------------------------------------------------------------------------------
# expire - Detect changes made in persistent storage on next access
# expire
# ------------------------------------------------------------------------------

sub expire {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  my $tied = tied(%$self);
  $tied->__checkpoint(0);
}

# ------------------------------------------------------------------------------
# refresh - Detect changes made in persistent storage
# refresh
# refresh $key
# ------------------------------------------------------------------------------

sub refresh {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  tied(%$self)->__scan(@_);
}

# ------------------------------------------------------------------------------
# reload - Force this node to reload itself
# reload
# ------------------------------------------------------------------------------

sub reload {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  my $tied = tied(%$self);
  $tied->__mtime(0);
  $tied->__checkpoint(0);
  $tied->__scan;
}

sub free {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  my $tied = tied(%$self);
  $tied->__checkpoint(0);
  $tied->__mtime(0);
  %{$tied->__data} = ();
  %{$tied->__private} = ();
  $tied->___content('');
  $tied->__raw_content('');
}

# ------------------------------------------------------------------------------
# is_valid - Are we still a valid handler for this path?
# ------------------------------------------------------------------------------

sub is_valid {
  my $self = shift;
  throw Error::NotStatic unless isa($self, __PACKAGE__);
  my $tied = tied(%$self);
  # only check once per request (must happen for any call to __scan)
  return 1 if defined $tied->__checkpoint
    && $tied->__checkpoint >= ${$tied->__global_cp};
  isa($self, Get_Pkg($tied->__path));
}

# ==============================================================================
# Tie interface
#
# Note ___content is the same as __content, however it should not be
# overridden in derived classes, allowing the __content method to literally
# set the scalar value.
# ==============================================================================

sub __data        { exists $_[1] ? $_[0][0]    = $_[1] : $_[0][0] }
sub __content     { exists $_[1] ? $_[0][1]    = $_[1] : $_[0][1] }
sub ___content    { exists $_[1] ? $_[0][1]    = $_[1] : $_[0][1] } # See above
sub __meta        { exists $_[1] ? $_[0][2]    = $_[1] : $_[0][2] }
sub __path        { exists $_[1] ? $_[0][2][0] = $_[1] : $_[0][2][0] }
sub __addr        { exists $_[1] ? $_[0][2][1] = $_[1] : $_[0][2][1] }
sub __hub         { exists $_[1] ? $_[0][2][2] = $_[1] : $_[0][2][2] }
sub __checkpoint  { exists $_[1] ? $_[0][2][3] = $_[1] : $_[0][2][3] }
sub __mtime       { exists $_[1] ? $_[0][2][4] = $_[1] : $_[0][2][4] }
sub __access_log  { exists $_[1] ? $_[0][2][5] = $_[1] : $_[0][2][5] }
sub __global_cp   { exists $_[1] ? $_[0][2][6] = $_[1] : $_[0][2][6] }
sub __disk_mtime  { exists $_[1] ? $_[0][2][7] = $_[1] : $_[0][2][7] }
sub __stat        { exists $_[1] ? $_[0][2][8] = $_[1] : $_[0][2][8] }
sub __change_log  { exists $_[1] ? $_[0][2][9] = $_[1] : $_[0][2][9] }
sub __private     { exists $_[1] ? $_[0][3]    = $_[1] : $_[0][3] }
sub __raw_content { exists $_[1] ? $_[0][4]    = $_[1] : $_[0][4] }

# ------------------------------------------------------------------------------
# TIEHASH - See C<perltie>
# TIEHASH $class, $path
# TIEHASH $class, $path, $stat
# TIEHASH $class, $path, $stat, $hub
# ------------------------------------------------------------------------------

sub TIEHASH {
  my $class = shift;
  my $path = shift or confess '$path';
  my $stat = shift;
  my $hub = shift;
  # TODO refactor for efficiency (path_to_addr)
  my $addr = $hub ? $hub->__path_to_addr($path) : undef;
  my $access_log = $hub ? $hub->__fs_access : Data::Hub::FileSystem::AccessLog->new();
  my $change_log = $hub ? $hub->__fs_change : Data::Hub::FileSystem::AccessLog->new();
  my $local_cp = 1; # must be greater-than zero for expire to work
  my $global_cp = $hub ? $hub->__req_count : \$local_cp;
  bless [
    Data::OrderedHash->new(), # __data
    undef, # __content
    [ # __meta
      $path, # __path
      $addr, # __addr
      $hub, # __hub
      undef, # __checkpoint
      undef, # __mtime
      $access_log, # __access_log
      $global_cp, # __global_cp
      undef, # __disk_mtime
      $stat, # __stat
      $change_log, # __change_log
    ],
    {}, # __private
    undef, # __raw_content
  ], $class;
}

# ------------------------------------------------------------------------------
# FIRSTKEY - See C<perltie>
# Scan for changes. This allows cached instances to refresh themselves upon 
# member access.
# ------------------------------------------------------------------------------

sub FIRSTKEY {
  my $tied = shift;
  $tied->__scan(undef, 1);
  $tied->SUPER::FIRSTKEY(@_)
}

# ------------------------------------------------------------------------------
# FETCH - See C<perltie>
# Scan for changes.
# ------------------------------------------------------------------------------

sub FETCH {
  $_[0]->__scan($_[1]);
  $_[0]->__data->{$_[1]};
}

# ------------------------------------------------------------------------------
# EXISTS - See C<perltie>
# Scan for changes.
# ------------------------------------------------------------------------------

sub EXISTS {
  $_[0]->__scan($_[1]);
  exists $_[0][0]->{$_[1]};
}

# ------------------------------------------------------------------------------
# STORE - See C<perltie>
# Scan for changes and update modified time.
# ------------------------------------------------------------------------------

sub STORE {
  $_[0]->__scan($_[1]);
  $_[0]->__mtime(int(time));
  $_[0]->__data->{$_[1]} = $_[2];
}

# ------------------------------------------------------------------------------
# DELETE - See C<perltie>
# Scan for changes and update modified time.
# ------------------------------------------------------------------------------

sub DELETE {
  $_[0]->__scan($_[1]);
  $_[0]->__mtime(int(time));
  delete $_[0]->__data->{$_[1]};
}

# ------------------------------------------------------------------------------
# SCALAR - See C<perltie>
# Scan for changes and return node contents.
# ------------------------------------------------------------------------------

sub SCALAR {
  $_[0]->__scan;
  $_[0]->__content;
}

# ------------------------------------------------------------------------------
# __read_from_disk - Read instance data from persistent storage
# __read_from_disk
# Base method.
# ------------------------------------------------------------------------------

sub __read_from_disk {
}

# ------------------------------------------------------------------------------
# __write_to_disk - Write instance data to persistent storage
# __write_to_disk
# Abstract base method.
# ------------------------------------------------------------------------------

sub __write_to_disk {
  throw Error::Programatic "Not a persitent object";
}

# ------------------------------------------------------------------------------
# __scan - Read from disk if necessary
# __scan
# __scan $key             # used only by directories
# __scan undef, 1         # used by directories to index into fs_access_log
#
# When C<$key> is provided, this resource is then instantiated if necessary.
# This allows L<Data::Hub::FileSystem::Directory> to maintain a key/value pair of
# filename/undef when reading the directory and delay instantiating (and hence
# recursively reading) the value until it is called upon.
# ------------------------------------------------------------------------------

sub __scan {
  my $reload = 1;
  my $stat = undef;
#$_[0]->__errlog( " <scan> " . $_[0]->__path . "\n");
  if (defined($_[0]->__checkpoint)) {
    # we have read the node before
    if ($_[0]->__checkpoint < ${$_[0]->__global_cp}) {
      # no global checkpoint set, or
      # we have not checked the node during this request
      if ($_[0]->__path && $_[0]->__mtime) {
        # the node existed the last time we read it
        # -1 to capture files modified in the same second
        $stat = stat $_[0]->__path;
#       my $last_checked = $_[0]->__mtime - 1;
#       my $last_modified = $stat->mtime;
#$_[0]->__errlog( " <rechk> $last_checked < $last_modified " . $_[0]->__path . "\n");
        $reload = $stat ? ($_[0]->__mtime - 1) < $stat->mtime : 0;
      } else {
        # Forced reload scenario
        $stat = stat $_[0]->__path;
      }
    } else {
      # we have checked the node during this request
      $stat = $_[0]->__stat;
      $reload = 0;
    }
  } else {
    # first read
#   $stat = stat $_[0]->__path;
#   $stat = $_[0]->__stat || stat $_[0]->__path;
    $stat = $_[0]->__stat;
    $reload = defined $stat;
  }
  if ($reload) {
#$_[0]->__errlog( "  <read> " . $_[0]->__path . "\n");
    # $stat must be defined at this point
    $_[0]->__stat($stat);
    $_[0]->__mtime(int(time));
    $_[0]->__disk_mtime($stat ? $stat->mtime : undef);
    $_[0]->__read_from_disk();
  }
  if (defined $stat) {
    $_[0]->__track() unless S_ISDIR($stat->mode) && !$_[2];
  }
  $_[0]->__checkpoint(${$_[0]->__global_cp});
  $reload;
}

sub __track {
#$_[0]->__errlog( "track:", $_[0]->__path, "\n");
  $_[0]->__access_log->set_value($_[0]->__path, $_[0]->__disk_mtime);
}

sub __track_change {
# $_[0]->__errlog( "track change:", $_[0]->__path, "\n");
  $_[0]->__change_log->set_value($_[0]->__path, $_[0]->__disk_mtime);
}

sub __errlog {
  my $tied = shift;
  my $hub = $tied->__hub;
  my $log = $hub ? $hub->__fetch('/sys/log') : undef;
  if ($log) {
    $log->info(@_);
  } else {
    warn @_;
  }
}

1;

package Data::Hub::FileSystem::Node::Crown;
use strict;
use Carp qw(croak confess);
use IO::File;
use Fcntl qw(:flock);
# ------------------------------------------------------------------------------
# The first 128 bytes of a file
# 
#   tie \$crown, Data::Hub::FileSystem::Node::Crown, $path;
#
#   - Reading from the file is delayed until needed.
#   - Subsequent calls re-use the read buffer.
#   - Will croak on directories.
#
# Underlying structure is an array
#
#   [0] - Path to file
#   [1] - Buffer size (default=128)
#   [2] - (reserved)
#   [3] - Buffer
# ------------------------------------------------------------------------------
sub TIESCALAR {
  my $pkg = shift;
  my $path = shift or confess 'No file path provided';
  my $len = shift || 128;
  bless [
    $path,
    $len,
    undef,
    undef,
  ], $pkg;
}
sub FETCH {
  return $_[0][3] if defined $_[0][3];
  my $buf = undef;
  {
#warn sprintf("READ [%d]: %s\n", time, $_[0][0]);
    if (my $h = IO::File->new('<' . $_[0][0])) {
      # Removing the locking call as waiting for a lock is hardly ideal when
      # simply trying to determine a file's type.  This method is called _very_
      # often. (2001-02-23)
      #flock $h, LOCK_SH or die $!;
      sysread $h, $buf, $_[0][1], 0;
    }
  }
  $_[0][3] = defined $buf ? $buf : '';
}
sub DESTROY {}
sub STORE {}
1;

__END__


Powered by Groonga
Maintained by Kenichi Ishigaki <ishigaki@cpan.org>. If you find anything, submit it on GitHub.