summaryrefslogtreecommitdiff
path: root/utf8_count
diff options
context:
space:
mode:
authorFreeArtMan <dos21h@gmail.com>2015-02-07 15:30:09 +0900
committerFreeArtMan <dos21h@gmail.com>2015-02-07 15:30:09 +0900
commit7fffc29b196336f299fecc7e55db17e77d97d1e2 (patch)
tree3f43526801defa8d5b0b97047524eb536d11260c /utf8_count
parenta97d6a9ee64f384ef995261a1518f7d39b6f2649 (diff)
downloadcode-snippets-7fffc29b196336f299fecc7e55db17e77d97d1e2.tar.gz
code-snippets-7fffc29b196336f299fecc7e55db17e77d97d1e2.zip
added utf8 character statistics counter. utf8_count
Diffstat (limited to 'utf8_count')
-rw-r--r--utf8_count/Makefile4
-rw-r--r--utf8_count/utf8_count.lua45
2 files changed, 49 insertions, 0 deletions
diff --git a/utf8_count/Makefile b/utf8_count/Makefile
new file mode 100644
index 0000000..ba52396
--- /dev/null
+++ b/utf8_count/Makefile
@@ -0,0 +1,4 @@
+LUA=/home/fam/downloads/source/lua/lua-5.3.0/src/lua
+
+make:
+ @$(LUA) utf8_count.lua
diff --git a/utf8_count/utf8_count.lua b/utf8_count/utf8_count.lua
new file mode 100644
index 0000000..a196e9a
--- /dev/null
+++ b/utf8_count/utf8_count.lua
@@ -0,0 +1,45 @@
+#!/usr/bin/lua
+
+--check if version is supported
+--why 5.3? utf8 and 64bit support
+if (_VERSION ~= "Lua 5.3" ) then
+ print "Wrong lua versions"
+ print "Supported 5.3 only"
+ os.exit(1)
+end
+
+--open file
+utf8_file = io.open( "test/teabook.txt", "r" )
+if ( utf8_file == nil) then
+ print "Couldnot open file"
+ os.exit(1)
+end
+
+--read contect in utf8
+--l = utf8_file:read("*l")
+hier_table = {}
+for line in utf8_file:lines() do
+ for p,c in utf8.codes( line ) do
+ if hier_table[c] == nil then
+ hier_table[c] = 1
+ else
+ hier_table[c] = hier_table[c] + 1
+ end
+ end
+end
+
+hier_table_sorted = {}
+for k,v in pairs(hier_table) do
+ table.insert( hier_table_sorted, {ch=k,val=v})
+end
+
+function cmpa( a,b )
+ if a.val ~= nil and b.val ~= nil then
+ return a.val > b.val
+ end
+end
+table.sort( hier_table_sorted, cmpa )
+for k,v in ipairs(hier_table_sorted) do
+ print(utf8.char(v.ch), "=",hier_table[v.ch] )
+end
+