#!/usr/bin/lua debug_mode = false --check if version is supported --why 5.3? utf8 and 64bit support if (_VERSION ~= "Lua 5.3" ) then print "Wrong lua versions" print "Supported 5.3 only" os.exit(1) end --if there is command line arguments then try to read and support them --get list of files if there is such "-f [FILE] [FILE] ... " list_ignore_chars = " :。,.][《》!?:" arg_files = {} flag_input_files = false flag_show_stats = false flag_compact_stats = false flag_ignore_chars = false function reset_flags() flag_input_files = false end for k,v in ipairs( arg ) do if ( v == "-f") then flag_input_files = true elseif ( v == "-s") then reset_flags() flag_show_stats = true elseif ( v == "-c" ) then reset_flags() flag_compact_stats = true elseif ( v == "-d" ) then debug_mode = true elseif ( v == "-i") then reset_flags() flag_ignore_chars = true elseif ( v == "-h") then reset_flags() print(arg[-1]," [FLAGS] [FILE] ") print( "-f [FILE] ... - list of file from with is used" ) print( "-s - show char stat") print( "-c - show compact char stat") print( "-d - debug mode") --print( "-i - ignore whitespaces, newlines") print( "-h - just this help") else --not very nice way to parse all things but still if ( (flag_input_files == true) and (v ~= "-f") ) then table.insert( arg_files, v ) end end end if debug_mode then print( arg_files ) for k,v in pairs(arg_files) do print(k,v) end end function merge_tables( t1, t2 ) local t = t1 for k,v in pairs( t2 ) do if ( t[k] == nil ) then t[k] = v elseif ( t[k] ~= nil ) then t[k] = t[k] + v end end return t end function get_file_stat( filename ) local hier_table = {} --open file utf8_file = io.open( filename, "r" ) if ( utf8_file == nil) then print "Couldnot open file" os.exit(1) end for line in utf8_file:lines() do for p,c in utf8.codes( line ) do --get characters code and ignore by comparing integer value --if flag_ignore_chars == true then --print( c, utf8.char(c) ) -- local ch = utf8.char(c) --if string.match( list_ignore_chars, c ) then -- if c == 12290 then -- print("asdsad") -- goto continue -- end --end if hier_table[c] == nil then hier_table[c] = 1 else hier_table[c] = hier_table[c] + 1 end ::continue:: end end io.close( utf8_file ) return hier_table end --read contect in utf8 --l = utf8_file:read("*l") hier_table = {} for k,fn in ipairs(arg_files) do local char_stats = get_file_stat( fn ) hier_table = merge_tables( hier_table, char_stats ) end hier_table_sorted = {} for k,v in pairs(hier_table) do table.insert( hier_table_sorted, {ch=k,val=v}) end --sort table chars function cmpa( a,b ) if a.val ~= nil and b.val ~= nil then return a.val > b.val end end table.sort( hier_table_sorted, cmpa ) if ( flag_show_stats == true ) then for k,v in pairs(hier_table_sorted) do print(utf8.char(v.ch),"=", v.val) end end --combined with "-s" should be counter as bug? if ( flag_compact_stats == true ) then local new_val = 0 for k,v in pairs( hier_table_sorted ) do if v.val ~= new_val then new_val = v.val print("") print(v.val) io.write(utf8.char(v.ch)," ") else io.write(utf8.char(v.ch)," ") end end print("") end