#!/usr/bin/perl -w
use strict;
use utf8;
use Encode qw(encode);
use Encode::HanExtra;
my $input_file = "sample_file.txt";
my ($total, $valid);
my %count;
open (FILE, "< $input_file") or die "Can't open $input_file: $!";
while (<FILE>) {
foreach (split) { #break $_ into words, assign each to $_ in turn
$total++;
next if /\W|^\d+/; #strange words skip the remainder of the loop
$valid++;
$count{$_}++; # count each separate word stored in a hash
## next comes here ##
}
}
print "Total things = $total, valid words = $valid\n";
foreach my $word (sort keys %count) {
print "$word \t was seen \t $count{$word} \t times.\n";
}
Thursday, January 6, 2011
perl: count the Non-word in a file using regex
Labels:
perl
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment