summaryrefslogtreecommitdiff
path: root/utf8_count
diff options
context:
space:
mode:
Diffstat (limited to 'utf8_count')
-rw-r--r--utf8_count/Makefile5
-rw-r--r--utf8_count/utf8_count.lua139
2 files changed, 127 insertions, 17 deletions
diff --git a/utf8_count/Makefile b/utf8_count/Makefile
index ba52396..6f93a96 100644
--- a/utf8_count/Makefile
+++ b/utf8_count/Makefile
@@ -1,4 +1,7 @@
LUA=/home/fam/downloads/source/lua/lua-5.3.0/src/lua
make:
- @$(LUA) utf8_count.lua
+ @$(LUA) utf8_count.lua
+
+test1:
+ $(LUA) utf8_count.lua -d -i -f test/chap1.txt test/chap2.txt -c
diff --git a/utf8_count/utf8_count.lua b/utf8_count/utf8_count.lua
index a196e9a..a1d0a4e 100644
--- a/utf8_count/utf8_count.lua
+++ b/utf8_count/utf8_count.lua
@@ -1,5 +1,7 @@
#!/usr/bin/lua
+debug_mode = false
+
--check if version is supported
--why 5.3? utf8 and 64bit support
if (_VERSION ~= "Lua 5.3" ) then
@@ -8,24 +10,110 @@ if (_VERSION ~= "Lua 5.3" ) then
os.exit(1)
end
---open file
-utf8_file = io.open( "test/teabook.txt", "r" )
-if ( utf8_file == nil) then
- print "Couldnot open file"
- os.exit(1)
+--if there is command line arguments then try to read and support them
+--get list of files if there is such "-f [FILE] [FILE] ... "
+list_ignore_chars = " :。,.][《》!?:"
+
+arg_files = {}
+flag_input_files = false
+flag_show_stats = false
+flag_compact_stats = false
+flag_ignore_chars = false
+
+function reset_flags()
+ flag_input_files = false
end
---read contect in utf8
---l = utf8_file:read("*l")
-hier_table = {}
-for line in utf8_file:lines() do
- for p,c in utf8.codes( line ) do
- if hier_table[c] == nil then
- hier_table[c] = 1
- else
- hier_table[c] = hier_table[c] + 1
+for k,v in ipairs( arg ) do
+ if ( v == "-f") then
+ flag_input_files = true
+ elseif ( v == "-s") then
+ reset_flags()
+ flag_show_stats = true
+ elseif ( v == "-c" ) then
+ reset_flags()
+ flag_compact_stats = true
+ elseif ( v == "-d" ) then
+ debug_mode = true
+ elseif ( v == "-i") then
+ reset_flags()
+ flag_ignore_chars = true
+ elseif ( v == "-h") then
+ reset_flags()
+ print(arg[-1]," [FLAGS] [FILE] ")
+ print( "-f [FILE] ... - list of file from with is used" )
+ print( "-s - show char stat")
+ print( "-c - show compact char stat")
+ print( "-d - debug mode")
+ --print( "-i - ignore whitespaces, newlines")
+ print( "-h - just this help")
+ else
+ --not very nice way to parse all things but still
+ if ( (flag_input_files == true) and (v ~= "-f") ) then
+ table.insert( arg_files, v )
+ end
+ end
+end
+
+if debug_mode then
+ print( arg_files )
+ for k,v in pairs(arg_files) do
+ print(k,v)
+ end
+end
+
+function merge_tables( t1, t2 )
+ local t = t1
+ for k,v in pairs( t2 ) do
+ if ( t[k] == nil ) then
+ t[k] = v
+ elseif ( t[k] ~= nil ) then
+ t[k] = t[k] + v
end
end
+ return t
+end
+
+function get_file_stat( filename )
+ local hier_table = {}
+ --open file
+ utf8_file = io.open( filename, "r" )
+ if ( utf8_file == nil) then
+ print "Couldnot open file"
+ os.exit(1)
+ end
+ for line in utf8_file:lines() do
+ for p,c in utf8.codes( line ) do
+ --get characters code and ignore by comparing integer value
+ --if flag_ignore_chars == true then
+ --print( c, utf8.char(c) )
+ -- local ch = utf8.char(c)
+ --if string.match( list_ignore_chars, c ) then
+ -- if c == 12290 then
+ -- print("asdsad")
+ -- goto continue
+ -- end
+ --end
+ if hier_table[c] == nil then
+ hier_table[c] = 1
+ else
+ hier_table[c] = hier_table[c] + 1
+ end
+ ::continue::
+ end
+ end
+ io.close( utf8_file )
+ return hier_table
+end
+
+
+
+--read contect in utf8
+--l = utf8_file:read("*l")
+hier_table = {}
+for k,fn in ipairs(arg_files) do
+ local char_stats = get_file_stat( fn )
+ hier_table = merge_tables( hier_table, char_stats )
end
hier_table_sorted = {}
@@ -33,13 +121,32 @@ for k,v in pairs(hier_table) do
table.insert( hier_table_sorted, {ch=k,val=v})
end
+--sort table chars
function cmpa( a,b )
if a.val ~= nil and b.val ~= nil then
return a.val > b.val
end
end
table.sort( hier_table_sorted, cmpa )
-for k,v in ipairs(hier_table_sorted) do
- print(utf8.char(v.ch), "=",hier_table[v.ch] )
+
+if ( flag_show_stats == true ) then
+ for k,v in pairs(hier_table_sorted) do
+ print(utf8.char(v.ch),"=", v.val)
+ end
end
+--combined with "-s" should be counter as bug?
+if ( flag_compact_stats == true ) then
+ local new_val = 0
+ for k,v in pairs( hier_table_sorted ) do
+ if v.val ~= new_val then
+ new_val = v.val
+ print("")
+ print(v.val)
+ io.write(utf8.char(v.ch)," ")
+ else
+ io.write(utf8.char(v.ch)," ")
+ end
+ end
+ print("")
+end