#! /usr/bin/perl # # -f file the text after which to model the statistics # -i to init the file.stats table # -e -k key -m message -s startword to encode message in a # text starting with startword # -d -k key to decode a text given on stdin # # use strict; use warnings; use Storable qw(nstore retrieve); use Getopt::Std; use Digest::SHA1 qw(sha1); # use MLDBM qw(DB_File); my %stats; my $file; my ( $word, $i, $window, $wsize, $mess, $text, $key ); # We're using strict and must declare the $opts our ( $opt_s, $opt_f, $opt_m, $opt_h, $opt_w, $opt_i, $opt_d, $opt_e, $opt_l, $opt_k, $opt_D ); $window = ""; # forward declarations sub getweightrand; sub mkcoin; sub has_eq; sub usage; getopts('deif:k:m:s:w:l:Dh'); if ($opt_h) { usage; exit; # notreached } # windowsize $wsize = $opt_w; $wsize = 7 unless $wsize; # output length for $opt_D $outl = 1000; $file = $opt_f; usage unless $file; # $tie (%stats, 'MLDBM', "$file.stats") or die "could not tie to $file.stats"; # Init the tables if ($opt_i) { open I, "<$file" or die "Could not open $file: $@"; while () { chomp; $_ .= " "; my @chars = split //, $_; # print "Chars: " . (join "|", @chars) ."\n"; while ( length($window) < $wsize ) { $window .= shift @chars; } if ( length($window) == $wsize ) { while ( scalar @chars ) { my $next = shift @chars; $stats{$window}{$next}++; $stats{$window}{'num'}++; $window .= $next; # print "$window\n"; $window = substr $window, -($wsize), $wsize; } } else { print "window wrong size?\n"; } } close I; print STDERR "Finished parsing $file\n"; print STDERR "Calling nstore\n"; nstore( \%stats, "$file.stats" ) or die "Could not store statistics"; # untie %stats; exit; # notreached } # encoding/decoding requires a key to seed the PRNG if ( $opt_e or $opt_d ) { $key = $opt_k; usage unless $key; } if ($opt_D ) { $key = $opt_k; usage unless $key; } # Encoding if ($opt_e or $opt_D) { my $count = 0; # The word to start with $word = $opt_s; usage unless $word; if (length $word < $wlen) { die "Startword should be $wlen chars at least"; } if ($opt_e) { $mess = $opt_m; usage unless $mess; } my $sref = retrieve("$file.stats") or die "Could not open statsfile $file.stats"; %stats = %$sref; # $word should not be longer than $wsize if ( length($word) > $wsize ) { $word = substr $word, -$wsize, $wsize; } $text = $word; if ( not exists( $stats{$word} ) ) { die "$word is not in $file"; } $window = $word; my $alt; my $coin = mkcoin($key); if ($opt_e){ # convert the message to bits my @messbit = split //, ( unpack "b*", ( pack "a*", $mess ) ); } while ( exists( $stats{$window} ) and ( scalar @messbit or ($opt_D and $count < $outl ) ) ) { my $rand = &$coin; my $next = getweightrand( $window, $rand ); if ($opt_e) { # If there's another character equally likely # embed a bit here if ( defined( $alt = has_eq( $window, $next ) ) ) { print STDERR "Alternatives: $next and $alt\n"; my $b = shift @messbit; if ( defined $b and $b == 1 ) { $next = $alt; } } } $window .= $next; # shift the window $window = substr $window, 1, $wsize; $count++; $text .= $next; } print $text; exit; #notreached } if ($opt_d) { $key = $opt_k; usage unless $key; my $sref = retrieve("$file.stats") or die "Could not open statsfile $file.stats"; %stats = %$sref; local $/; undef $/; my $text = ; $word = substr $text, 0, $wsize; $text = substr $text, $wsize; if ( not exists( $stats{$word} ) ) { die "$word is not in $file"; } $window = $word; my $alt; my ( $n, $r, $check ); my $coin = mkcoin($key); my @message; while ( $n = substr( $text, 0, 1 ) and ( $text = substr $text, 1, length($text) ) ) { $r = &$coin; my $next = getweightrand( $window, $r ); # possible encoded bit? if ( defined( $check = has_eq( $window, $next ) ) ) { # 0bit ? if ( $next eq $n ) { push @message, 0; } elsif ( $check eq $n ) { push @message, 1; } else { print "Unclear ecoding\n"; } } # shift window $window .= $n; $window = substr $window, 1, $wsize; } my $clearbits = join "", @message; my $cleartext = unpack "a*", ( pack "b*", $clearbits ); print "$cleartext\n"; exit; # notreached } # notreached # returns a CODE ref to a seeded PRNG with internal counter sub mkcoin { my $ks = shift; my $c = 0; return sub { my $r = sha1( $ks . $c ); my $z = unpack "L", $r; $z = $z / 4294967295; $c++; return $z; }; } # looks in global %stats for the possible characters # following a given window. Chooses a character true # to the distribution and a random value \in {0,1}. sub getweightrand { my ( $w, $r ) = @_; # time/memory tradeoff. May not be usefull for # bigger $wsize or certain texts. if ( not exists( $stats{$w}{table} ) ) { my %t; my $sum; foreach my $i ( sort keys %{ $stats{$w} } ) { next if $i =~ m/^num$/; $sum += ( $stats{$w}{$i} / $stats{$w}{num} ); $t{$sum} = $i; } $stats{$w}{table} = \%t; } my $min = 1; foreach my $j ( keys %{ $stats{$w}{table} } ) { next if ( $j < $r ); $min = $j if $j < $min; } return $stats{$w}{table}->{$min}; } # Checks if there is another character with equal # likelihood in $stats{$w} for a given character $n # and window $w. Returns that character or undef # if no such exists sub has_eq { my ( $w, $n ) = @_; foreach my $k ( sort keys %{ $stats{$w} } ) { next if $k =~ m/^(num|table|\Q$n\E)$/; if ( $stats{$w}{$k} == $stats{$w}{$n} ) { return $k; } } return undef; } sub usage { print < : init the statistics for -e -m -f -k -s : create output after with embedded using starting with . -d -f -k : read from stdin and extract embedded message using after statistics modelled after . -D -l -k -f -s : create output after of bytes starting with with randomness generated from . [-w ] : set window size to , default is 7. OFF }