1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
|
#!/usr/bin/lua
debug_mode = false
--check if version is supported
--why 5.3? utf8 and 64bit support
if (_VERSION ~= "Lua 5.3" ) then
print "Wrong lua versions"
print "Supported 5.3 only"
os.exit(1)
end
--if there is command line arguments then try to read and support them
--get list of files if there is such "-f [FILE] [FILE] ... "
list_ignore_chars = " :。,.][《》!?:"
arg_files = {}
flag_input_files = false
flag_show_stats = false
flag_compact_stats = false
flag_ignore_chars = false
function reset_flags()
flag_input_files = false
end
for k,v in ipairs( arg ) do
if ( v == "-f") then
flag_input_files = true
elseif ( v == "-s") then
reset_flags()
flag_show_stats = true
elseif ( v == "-c" ) then
reset_flags()
flag_compact_stats = true
elseif ( v == "-d" ) then
debug_mode = true
elseif ( v == "-i") then
reset_flags()
flag_ignore_chars = true
elseif ( v == "-h") then
reset_flags()
print(arg[-1]," [FLAGS] [FILE] ")
print( "-f [FILE] ... - list of file from with is used" )
print( "-s - show char stat")
print( "-c - show compact char stat")
print( "-d - debug mode")
--print( "-i - ignore whitespaces, newlines")
print( "-h - just this help")
else
--not very nice way to parse all things but still
if ( (flag_input_files == true) and (v ~= "-f") ) then
table.insert( arg_files, v )
end
end
end
if debug_mode then
print( arg_files )
for k,v in pairs(arg_files) do
print(k,v)
end
end
function merge_tables( t1, t2 )
local t = t1
for k,v in pairs( t2 ) do
if ( t[k] == nil ) then
t[k] = v
elseif ( t[k] ~= nil ) then
t[k] = t[k] + v
end
end
return t
end
function get_file_stat( filename )
local hier_table = {}
--open file
utf8_file = io.open( filename, "r" )
if ( utf8_file == nil) then
print "Couldnot open file"
os.exit(1)
end
for line in utf8_file:lines() do
for p,c in utf8.codes( line ) do
--get characters code and ignore by comparing integer value
--if flag_ignore_chars == true then
--print( c, utf8.char(c) )
-- local ch = utf8.char(c)
--if string.match( list_ignore_chars, c ) then
-- if c == 12290 then
-- print("asdsad")
-- goto continue
-- end
--end
if hier_table[c] == nil then
hier_table[c] = 1
else
hier_table[c] = hier_table[c] + 1
end
::continue::
end
end
io.close( utf8_file )
return hier_table
end
--read contect in utf8
--l = utf8_file:read("*l")
hier_table = {}
for k,fn in ipairs(arg_files) do
local char_stats = get_file_stat( fn )
hier_table = merge_tables( hier_table, char_stats )
end
hier_table_sorted = {}
for k,v in pairs(hier_table) do
table.insert( hier_table_sorted, {ch=k,val=v})
end
--sort table chars
function cmpa( a,b )
if a.val ~= nil and b.val ~= nil then
return a.val > b.val
end
end
table.sort( hier_table_sorted, cmpa )
if ( flag_show_stats == true ) then
for k,v in pairs(hier_table_sorted) do
print(utf8.char(v.ch),"=", v.val)
end
end
--combined with "-s" should be counter as bug?
if ( flag_compact_stats == true ) then
local new_val = 0
for k,v in pairs( hier_table_sorted ) do
if v.val ~= new_val then
new_val = v.val
print("")
print(v.val)
io.write(utf8.char(v.ch)," ")
else
io.write(utf8.char(v.ch)," ")
end
end
print("")
end
|