From 7fffc29b196336f299fecc7e55db17e77d97d1e2 Mon Sep 17 00:00:00 2001 From: FreeArtMan Date: Sat, 7 Feb 2015 15:30:09 +0900 Subject: added utf8 character statistics counter. utf8_count --- utf8_count/utf8_count.lua | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 utf8_count/utf8_count.lua (limited to 'utf8_count/utf8_count.lua') diff --git a/utf8_count/utf8_count.lua b/utf8_count/utf8_count.lua new file mode 100644 index 0000000..a196e9a --- /dev/null +++ b/utf8_count/utf8_count.lua @@ -0,0 +1,45 @@ +#!/usr/bin/lua + +--check if version is supported +--why 5.3? utf8 and 64bit support +if (_VERSION ~= "Lua 5.3" ) then + print "Wrong lua versions" + print "Supported 5.3 only" + os.exit(1) +end + +--open file +utf8_file = io.open( "test/teabook.txt", "r" ) +if ( utf8_file == nil) then + print "Couldnot open file" + os.exit(1) +end + +--read contect in utf8 +--l = utf8_file:read("*l") +hier_table = {} +for line in utf8_file:lines() do + for p,c in utf8.codes( line ) do + if hier_table[c] == nil then + hier_table[c] = 1 + else + hier_table[c] = hier_table[c] + 1 + end + end +end + +hier_table_sorted = {} +for k,v in pairs(hier_table) do + table.insert( hier_table_sorted, {ch=k,val=v}) +end + +function cmpa( a,b ) + if a.val ~= nil and b.val ~= nil then + return a.val > b.val + end +end +table.sort( hier_table_sorted, cmpa ) +for k,v in ipairs(hier_table_sorted) do + print(utf8.char(v.ch), "=",hier_table[v.ch] ) +end + -- cgit v1.2.3