diff options
-rw-r--r-- | main.c | 93 | ||||
-rw-r--r-- | rabincarp.c | 190 | ||||
-rw-r--r-- | rabincarp.h | 36 |
3 files changed, 316 insertions, 3 deletions
@@ -64,6 +64,8 @@ typedef struct match_params { char *fname; char *match_str; + int match_len; + int buf_size; int verbose; } match_params; @@ -73,17 +75,102 @@ void helper( char *progname ) printf("Usage: %s [OPTS]\n\n" " -f - input file\n" " -m - match string\n" + //" -b - set buffer size to read\n" " -v - more extra info in output\n" "Version: 0.0.1 \n" "\n" , progname); } -int main(int argc, const char *argv[]) +int main(int argc, char **argv) { - + int c; + int i; + int fd; + int text_len; + int ret; + + char *text = NULL; + + //search + rol_hash_isearch search; + + //parametrs + match_params mp; + + memset(&mp,0,sizeof(match_params)); + + while ( (c = getopt(argc, argv, "f:m:b:v")) != -1 ) + { + switch(c) + { + case 'f': + mp.fname = optarg; + break; + case 'm': + mp.match_str = optarg; + mp.match_len = strlen(optarg); + break; + case 'b': + mp.buf_size = atoi(optarg); + break; + case 'v': + mp.verbose = 1; + break; + default: + helper( argv[0] ); + exit(1); + } + } + + if (argc<2) + { + helper(argv[0]); + return -1; + } + + + if (mp.fname == NULL) + { + printf("Set filename\n"); + return -1; + } + + if (mp.match_str == NULL) + { + printf("Set search string\n"); + return -1; + } + + fd = file_open( mp.fname ); + if (fd<0) + { + printf("Couldnt open file %s\n", mp.fname); + + return -1; + } + + text_len = lseek(fd,0,SEEK_END); + lseek(fd, 0,SEEK_SET); + text = malloc(text_len); + //printf("Read %d bytes\n",read(fd,text,text_len)); + + ret = read(fd, text, text_len); + if (mp.verbose) + { + printf("Read %d bytes\n",ret); + } + + + rlsi_hash_init( &search, 10, 13); + while ((i = rlsi_hash_search(&search, mp.match_str, mp.match_len, text, text_len)) == 1) + { + //printf("Found smth %d\n",rlsi_hash_get(&search)); + printf("%d\n",rlsi_hash_get(&search)); + } + + file_close( fd ); - return 0; }
\ No newline at end of file diff --git a/rabincarp.c b/rabincarp.c index e69de29..a58ab15 100644 --- a/rabincarp.c +++ b/rabincarp.c @@ -0,0 +1,190 @@ +#include "rabincarp.h" + + +int rlsi_hash_reset(rol_hash_isearch *rlh) +{ + rlh->pow = 1; + rlh->pattern_len = 0; + rlh->pattern_hash = 0; + rlh->text_hash = 0; + rlh->text_len = 0; + rlh->text = NULL; + rlh->pattern = NULL; + rlh->iter_pos = 0; + rlh->count = 0; + rlh->pre_text = NULL; + rlh->chunk_done = 0; + return 0; +} + +int rlsi_hash_init(rol_hash_isearch *rlh, int b, int m) +{ + rlh->b = b; + rlh->m = m; + rlsi_hash_reset(rlh); + return 0; +} + +int rlsi_hash_precalc(rol_hash_isearch *rlh, char *pattern, int plen, char *text, int tlen) +{ + int i; + + if (tlen<plen) + { + return -1; + } + + rlh->text_len = tlen; + rlh->text = text; + rlh->pattern = pattern; + rlh->pattern_len = plen; + + rlh->pow = 1; + for (i=0; i<plen-1; i++) + { + rlh->pow = (rlh->pow * rlh->b) % rlh->m; + } + + rlh->text_hash = 0; + rlh->pattern_hash = 0; + for (i=0;i<plen;i++) + { + rlh->pattern_hash = (rlh->pattern_hash * rlh->b + pattern[i]) % rlh->m; + rlh->text_hash = (rlh->text_hash * rlh->b + text[i]) % rlh->m; + } + + //printf("pow %d\n",rlh->pow); + //printf("pattern %d\n",rlh->pattern_hash); + //printf("text %d\n",rlh->text_hash); + + //set buffer size for presaved text + rlh->pre_text = malloc(plen);//dont check err =P, mem leak here + rlh->count = 0; + + return 0; +} + +//run after prepararion step +int rlsi_hash_search(rol_hash_isearch *rlh, char *pattern, int plen, char *text, int tlen) +{ + int j=0; + + int found=0; + + if ((rlh->text == NULL) && (rlh->pattern == NULL)) + { + rlsi_hash_precalc(rlh, pattern, plen, text, tlen); + } + + if (rlh->chunk_done == 1) + { + rlh->chunk_done = 0; + rlh->text_len = tlen; + } + + //printf("while %d",rlh->text_len-rlh->pattern_len); + //while (rlh->iter_pos<=(rlh->text_len-rlh->pattern_len)) + while (rlh->iter_pos<=(rlh->text_len-rlh->pattern_len)) + { + + //hashes are matching + if (rlh->text_hash == rlh->pattern_hash) + { + //printf("2\n"); + //check if string match + for (j=0;j<rlh->pattern_len;j++) + { + //??? + char i_c=0; + int k=rlh->iter_pos+j; + if (k<0) + { + i_c = rlh->pre_text[rlh->pattern_len+k]; + } else + { + //i_c = rlh->text[rlh->iter_pos+j]; + i_c = text[rlh->iter_pos+j]; + } + if (i_c != rlh->pattern[j]) + break; + } + + if (j == rlh->pattern_len) + { + //??? + //printf("Found pattern at %d\n",rlh->iter_pos+1); + //printf("Found pattern at %d\n",rlh->count); + found = 1; + } + + } + + //??? + if (rlh->iter_pos < rlh->text_len - rlh->pattern_len) + { + //printf("1\n"); + //??? + char i_c; + if (rlh->iter_pos < 0) + { + i_c = rlh->pre_text[rlh->pattern_len+rlh->iter_pos]; + } else + { + //i_c = rlh->text[rlh->iter_pos]; + i_c = text[rlh->iter_pos]; + } + //printf("CHAR: %c\n",i_c); + //rlh->text_hash = ( rlh->b *( rlh->text_hash - i_c*rlh->pow) + rlh->text[rlh->iter_pos+rlh->pattern_len] ) % rlh->m; + rlh->text_hash = ( rlh->b *( rlh->text_hash - i_c*rlh->pow) + text[rlh->iter_pos+rlh->pattern_len] ) % rlh->m; + //printf("text %d\n",rlh->text_hash); + if (rlh->text_hash < 0) + { + rlh->text_hash = rlh->text_hash + rlh->m; + } + } + + //iter over current buffer + rlh->iter_pos += 1; + //total data that have been iterated + rlh->count += 1; + + if (found == 1) + { + //rlh->count -= 1; + return 1; + } + //iter over current buffer + //rlh->iter_pos += 1; + //total data that have been iterated + //rlh->count += 1; + //printf("rlh->iter_pos %d\n",rlh->iter_pos); + //printf("rlh->count %d\n",rlh->count); + } + //printf("tlen %d\n",tlen); + //exit from while loop because end of the buffer, iter is +1 element larger + //copy pattern to intermidiate buffer + //if ((rlh->iter_pos)==(rlh->text_len - rlh->pattern_len+1)) + { + //copy last bit of buffer to further processing + //memcpy(rlh->pre_text, &rlh->text[rlh->iter_pos], rlh->pattern_len); + memcpy(rlh->pre_text, &text[rlh->iter_pos-1], rlh->pattern_len); + //printf("end of chunk\n"); + //rlh->count -= 1; + //printf("count %d\n",rlh->count); + + rlh->chunk_done = 1; + rlh->iter_pos = -rlh->pattern_len; + } + +//exit_loop: + //chunk finished search new need to be supplied pal + //printf("0\n"); + rlh->count -= 1; + return 0; +} + +int rlsi_hash_get(rol_hash_isearch *rlh) +{ + //return rlh->iter_pos; + return rlh->count; +}
\ No newline at end of file diff --git a/rabincarp.h b/rabincarp.h index e69de29..753ff66 100644 --- a/rabincarp.h +++ b/rabincarp.h @@ -0,0 +1,36 @@ +#ifndef __RABINCARP_H +#define __RABINCARP_H + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <fcntl.h> +#include <string.h> + +typedef struct rol_hash_isearch +{ + int b; + int m; + int pow; + + int pattern_len; + int text_len; + int pattern_hash; + int text_hash; + char *text; + char *pattern; + int iter_pos; + + char *pre_text; + int count; + int chunk_done; +} rol_hash_isearch; + +int rlsi_hash_reset(rol_hash_isearch *rlh); +int rlsi_hash_init(rol_hash_isearch *rlh, int b, int m); +int rlsi_hash_precalc(rol_hash_isearch *rlh, char *pattern, int plen, char *text, int tlen); +int rlsi_hash_search(rol_hash_isearch *rlh, char *pattern, int plen, char *text, int tlen); +int rlsi_hash_get(rol_hash_isearch *rlh); + +#endif
\ No newline at end of file |