diff options
author | FreeArtMan <dos21h@gmail.com> | 2015-02-07 15:30:09 +0900 |
---|---|---|
committer | FreeArtMan <dos21h@gmail.com> | 2015-02-07 15:30:09 +0900 |
commit | 7fffc29b196336f299fecc7e55db17e77d97d1e2 (patch) | |
tree | 3f43526801defa8d5b0b97047524eb536d11260c /utf8_count/utf8_count.lua | |
parent | a97d6a9ee64f384ef995261a1518f7d39b6f2649 (diff) | |
download | code-snippets-7fffc29b196336f299fecc7e55db17e77d97d1e2.tar.gz code-snippets-7fffc29b196336f299fecc7e55db17e77d97d1e2.zip |
added utf8 character statistics counter. utf8_count
Diffstat (limited to 'utf8_count/utf8_count.lua')
-rw-r--r-- | utf8_count/utf8_count.lua | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/utf8_count/utf8_count.lua b/utf8_count/utf8_count.lua new file mode 100644 index 0000000..a196e9a --- /dev/null +++ b/utf8_count/utf8_count.lua @@ -0,0 +1,45 @@ +#!/usr/bin/lua + +--check if version is supported +--why 5.3? utf8 and 64bit support +if (_VERSION ~= "Lua 5.3" ) then + print "Wrong lua versions" + print "Supported 5.3 only" + os.exit(1) +end + +--open file +utf8_file = io.open( "test/teabook.txt", "r" ) +if ( utf8_file == nil) then + print "Couldnot open file" + os.exit(1) +end + +--read contect in utf8 +--l = utf8_file:read("*l") +hier_table = {} +for line in utf8_file:lines() do + for p,c in utf8.codes( line ) do + if hier_table[c] == nil then + hier_table[c] = 1 + else + hier_table[c] = hier_table[c] + 1 + end + end +end + +hier_table_sorted = {} +for k,v in pairs(hier_table) do + table.insert( hier_table_sorted, {ch=k,val=v}) +end + +function cmpa( a,b ) + if a.val ~= nil and b.val ~= nil then + return a.val > b.val + end +end +table.sort( hier_table_sorted, cmpa ) +for k,v in ipairs(hier_table_sorted) do + print(utf8.char(v.ch), "=",hier_table[v.ch] ) +end + |