#!/usr/bin/perl

# Copyright (C) 2015 Thorsten Kukuk
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# in Version 2 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA  02110-1301, USA.

=head1 NAME

buchstabenhaeufigkeit - calculate number of different letters

=head1 SYNOPSIS

buchstabenhaeufigkeit [options] [word ...]

=head1 DESCRIPTION

Calculates the number of all letters of a word.
All other characters are ignored.

=head1 OPTIONS

    --version           Print version number and exit
    --usage             Print usage
    -h|-?|--help	Help

=cut

use strict;
use warnings;
use utf8;
use Encode qw(decode encode);
use Pod::Usage;

#
# process command line arguments
#
use Getopt::Long;
my $help = 0;
my $man = 0;
my $version = 0;
my $usage = 0;

GetOptions('version' => \$version,
	   'man' => \$man,
	   'usage' => \$usage,
	   'help|h|?' => \$help) or pod2usage(2);
pod2usage(0) if $help;
pod2usage(-exitstatus => 0, -verbose => 2) if $man;
pod2usage(-exitstatus => 0, -verbose => 0) if $usage;

if ($version) {
  print "buchstabenhaeufigkeit (geo-tools) 1.23\n";
  exit;
}

binmode(\*STDIN, ":utf8");
binmode(\*STDOUT, ":utf8");

if ($#ARGV >= 0) {
  foreach (@ARGV) {
    my $word = decode("utf-8",$_);
    print "Häufigkeit($word):\n";
    haeufigkeit($word);
    print "\n";
  }
} elsif (($#ARGV < 0) ||($#ARGV == 0 && $ARGV[0] eq "-")) {
  # read from tty
  while (<>) {
    chomp;
    my $word = $_;
    print "Häufigkeit($word):\n";
    haeufigkeit($word);
    print "\n";
  }
}

exit;

sub haeufigkeit {
  my @char = split (//, $_[0]);
  my %Buchstaben;

  foreach (@char) {
    my $c = lc($_);

    if ($c ge 'a' && $c le 'z') {
       if (defined $Buchstaben{$c}) {
	  $Buchstaben{$c}+=1;
       } else {
	  $Buchstaben{$c} = 1;
       }
    } else {
      print "Ignored: $c\n";
    }
  }

  foreach (sort keys %Buchstaben) {
      print "$_: $Buchstaben{$_}\n";
  }

  return;
}
