Is this code OK? I don't really have a clue which normalization-form I should us (the only thing I noticed is with NFD
I get a wrong output).
#!/usr/local/bin/perl
use warnings;
use 5.014;
use utf8;
binmode STDOUT, ':encoding(utf-8)';
use Unicode::Normalize;
use Unicode::Collate::Locale;
use Unicode::GCString;
my $text = "my taxt täxt";
my %hash;
while ( $text =~ m/(\p{Alphabetic}+(?:'\p{Alphabetic}+)?)/g ) { #'
my $word = $1;
my $NFC_word = NFC( $word );
$hash{$NFC_word}++;
}
my $collator = Unicode::Collate::Locale->new( locale => 'DE' );
for my $word ( $collator->sort( keys %hash ) ) {
my $gcword = Unicode::GCString->new( $word );
printf "%-10.10s : %5d\n", $gcword, $hash{$word};
}