From cef9304cf01742e1b27e633c9c342b502c149cbf Mon Sep 17 00:00:00 2001 From: Christophe Grenier Date: Sun, 14 Mar 2021 11:01:02 +0100 Subject: [PATCH] Move UTFsize() from src/file_txt.c to src/utfsize.c src/file_win.c: add Frama-C annotations --- src/Makefile.am | 9 ++- src/file_txt.c | 86 +----------------------- src/file_win.c | 49 +++++++------- src/utfsize.c | 120 ++++++++++++++++++++++++++++++++++ src/{file_txt.h => utfsize.h} | 8 +-- 5 files changed, 157 insertions(+), 115 deletions(-) create mode 100644 src/utfsize.c rename src/{file_txt.h => utfsize.h} (89%) diff --git a/src/Makefile.am b/src/Makefile.am index 10bfe689..4fbb3c94 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -387,9 +387,10 @@ file_C = filegen.c \ file_z2d.c \ file_zcode.c \ file_zip.c \ - file_zpr.c + file_zpr.c \ + utfsize.c -file_H = ext2.h hfsp_struct.h filegen.h file_doc.h file_jpg.h file_gz.h file_riff.h file_sp3.h file_tar.h file_tiff.h file_txt.h luks_struct.h ntfs_struct.h ole.h pe.h suspend.h +file_H = ext2.h hfsp_struct.h filegen.h file_doc.h file_jpg.h file_gz.h file_riff.h file_sp3.h file_tar.h file_tiff.h luks_struct.h ntfs_struct.h ole.h pe.h suspend.h utfsize.h photorec_C = photorec.c phcfg.c addpart.c chgarch.c chgtype.c dir.c exfatp.c ext2grp.c ext2_dir.c ext2p.c fat_dir.c fatp.c file_found.c geometry.c ntfs_dir.c ntfsp.c pdisksel.c poptions.c sessionp.c dfxml.c partgptro.c @@ -467,6 +468,10 @@ session_tiff_le.framac: file_tiff.c file_tiff_be.c file_tiff_le.c common.c fileg $(CC) $(CFLAGS) -DMAIN_tiff_le -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_tiff -I.. $^ -o demo_tiff_le frama-c $^ -cpp-extra-args="-DMAIN_tiff_le -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_tiff -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@ +session_txt.framac: file_txt.c common.c filegen.c log.c utfsize.c + $(CC) $(CFLAGS) -DMAIN_txt -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_txt -I.. $^ -o demo_txt + frama-c $^ -cpp-extra-args="-DMAIN_txt -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_txt -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@ + session_%.framac: file_%.c common.c filegen.c log.c $(CC) $(CFLAGS) -DMAIN_$* -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_$* -I.. $^ -o demo_$* frama-c $^ -cpp-extra-args="-DMAIN_$* -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_$* -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@ diff --git a/src/file_txt.c b/src/file_txt.c index e0375421..9cd3e8bc 100644 --- a/src/file_txt.c +++ b/src/file_txt.c @@ -41,7 +41,7 @@ #include "filegen.h" #include "log.h" #include "memmem.h" -#include "file_txt.h" +#include "utfsize.h" #if defined(__FRAMAC__) #include "__fc_builtin.h" #endif @@ -682,90 +682,6 @@ static int UTF2Lat(unsigned char *buffer_lower, const unsigned char *buffer, con return offset_dst; } -int UTFsize(const unsigned char *buffer, const unsigned int buf_len) -{ - const unsigned char *p=buffer; /* pointers to actual position in source buffer */ - unsigned int i=0; - /*@ - @ loop invariant 0 <= i < buf_len + 3; - @ loop invariant p == buffer + i; - @ loop assigns i, p; - @ loop variant buf_len - 1 - i; - @*/ - while(i=0xfd) - return i; - /*@ assert i + 1 >= buf_len || \valid_read(p+1); */ - /*@ assert i + 2 >= buf_len || \valid_read(p+2); */ - if((c & 0xf0)==0xe0 && - (i+1 >= buf_len || (*(p+1) & 0xc0)==0x80) && - (i+2 >= buf_len || (*(p+2) & 0xc0)==0x80)) - { /* UTF8 l=3 */ -#ifdef DEBUG_TXT - log_info("UTFsize i=%u l=3\n", i); -#endif - p+=3; - i+=3; - } - else if((c & 0xe0)==0xc0 && - (i+1 >= buf_len || (*(p+1) & 0xc0)==0x80)) - { /* UTF8 l=2 */ -#ifdef DEBUG_TXT - log_info("UTFsize i=%u l=2\n", i); -#endif - p+=2; - i+=2; - } - else - { /* Ascii UCS */ -#ifdef DEBUG_TXT - log_info("UTFsize i=%u l=1 ? *p=%c\n", i, c); -#endif - switch(c) - { - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x0b: - case 0x0c: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - case 0x18: - case 0x19: - case 0x1a: - case 0x1b: - case 0x1c: - case 0x1d: - case 0x1e: - case 0x1f: - case 0x7f: - return i; - } - p++; - i++; - } - } - return (i= 2 && (buffer_size&1)==0; @ requires \valid_read((char *)buffer+(0..buffer_size-1)); diff --git a/src/file_win.c b/src/file_win.c index 3d7cfe9f..c32ce5c4 100644 --- a/src/file_win.c +++ b/src/file_win.c @@ -33,53 +33,54 @@ #include #include "types.h" #include "filegen.h" -#include "file_txt.h" +#include "utfsize.h" #include "common.h" +/*@ requires \valid(file_stat); */ static void register_header_check_win(file_stat_t *file_stat); -const file_hint_t file_hint_win= { - .extension="win", - .description="Opera preferences", - .max_filesize=PHOTOREC_MAX_FILE_SIZE, - .recover=1, - .enable_by_default=1, - .register_header_check=®ister_header_check_win +const file_hint_t file_hint_win = { + .extension = "win", + .description = "Opera preferences", + .max_filesize = PHOTOREC_MAX_FILE_SIZE, + .recover = 1, + .enable_by_default = 1, + .register_header_check = ®ister_header_check_win }; static data_check_t data_check_win(const unsigned char *buffer, const unsigned int buffer_size, file_recovery_t *file_recovery) { unsigned int i; - unsigned int offset=0; - if(file_recovery->calculated_file_size==0) - offset=3; - i=UTFsize(&buffer[buffer_size/2+offset], buffer_size/2-offset); - if(icalculated_file_size == 0) + offset = 3; + i = UTFsize(&buffer[buffer_size / 2 + offset], buffer_size / 2 - offset); + if(i < buffer_size / 2 - offset) { - if(i>=10) - file_recovery->calculated_file_size=file_recovery->file_size+offset+i; + if(i >= 10) + file_recovery->calculated_file_size = file_recovery->file_size + offset + i; return DC_STOP; } - file_recovery->calculated_file_size=file_recovery->file_size+(buffer_size/2); + file_recovery->calculated_file_size = file_recovery->file_size + (buffer_size / 2); return DC_CONTINUE; } static int header_check_win(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { reset_file_recovery(file_recovery_new); - file_recovery_new->extension=file_hint_win.extension; - file_recovery_new->data_check=&data_check_win; - file_recovery_new->file_check=&file_check_size; + file_recovery_new->extension = file_hint_win.extension; + file_recovery_new->data_check = &data_check_win; + file_recovery_new->file_check = &file_check_size; return 1; } static void register_header_check_win(file_stat_t *file_stat) { - static const unsigned char win_header[31]= { - 0xef, 0xbb, 0xbf, 'O' , 'p' , 'e' , 'r' , 'a' , - ' ' , 'P' , 'r' , 'e' , 'f' , 'e' , 'r' , 'e' , - 'n' , 'c' , 'e' , 's' , ' ' , 'v' , 'e' , 'r' , - 's' , 'i' , 'o' , 'n' , ' ' , '2' , '.' + static const unsigned char win_header[31] = { + 0xef, 0xbb, 0xbf, 'O', 'p', 'e', 'r', 'a', + ' ', 'P', 'r', 'e', 'f', 'e', 'r', 'e', + 'n', 'c', 'e', 's', ' ', 'v', 'e', 'r', + 's', 'i', 'o', 'n', ' ', '2', '.' }; register_header_check(0, win_header, sizeof(win_header), &header_check_win, file_stat); } diff --git a/src/utfsize.c b/src/utfsize.c new file mode 100644 index 00000000..504c1477 --- /dev/null +++ b/src/utfsize.c @@ -0,0 +1,120 @@ +/* + + File: utfsize.c + + Copyright (C) 2021 Christophe GRENIER + + This software is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write the Free Software Foundation, Inc., 51 + Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + + */ + +#if !defined(SINGLE_FORMAT) || defined(SINGLE_FORMAT_txt) || defined(SINGLE_FORMAT_win) +#ifdef HAVE_CONFIG_H +#include +#endif +#ifdef HAVE_STDLIB_H +#include +#endif +#ifdef HAVE_STRING_H +#include +#endif +#include +#include "log.h" +#include "utfsize.h" + +int UTFsize(const unsigned char *buffer, const unsigned int buf_len) +{ + const unsigned char *p=buffer; /* pointers to actual position in source buffer */ + unsigned int i=0; + /*@ + @ loop invariant 0 <= i < buf_len + 3; + @ loop invariant p == buffer + i; + @ loop assigns i, p; + @ loop variant buf_len - 1 - i; + @*/ + while(i=0xfd) + return i; + /*@ assert i + 1 >= buf_len || \valid_read(p+1); */ + /*@ assert i + 2 >= buf_len || \valid_read(p+2); */ + if((c & 0xf0)==0xe0 && + (i+1 >= buf_len || (*(p+1) & 0xc0)==0x80) && + (i+2 >= buf_len || (*(p+2) & 0xc0)==0x80)) + { /* UTF8 l=3 */ +#ifdef DEBUG_TXT + log_info("UTFsize i=%u l=3\n", i); +#endif + p+=3; + i+=3; + } + else if((c & 0xe0)==0xc0 && + (i+1 >= buf_len || (*(p+1) & 0xc0)==0x80)) + { /* UTF8 l=2 */ +#ifdef DEBUG_TXT + log_info("UTFsize i=%u l=2\n", i); +#endif + p+=2; + i+=2; + } + else + { /* Ascii UCS */ +#ifdef DEBUG_TXT + log_info("UTFsize i=%u l=1 ? *p=%c\n", i, c); +#endif + switch(c) + { + case 0x00: + case 0x01: + case 0x02: + case 0x03: + case 0x04: + case 0x05: + case 0x06: + case 0x07: + case 0x0b: + case 0x0c: + case 0x10: + case 0x11: + case 0x12: + case 0x13: + case 0x14: + case 0x15: + case 0x16: + case 0x17: + case 0x18: + case 0x19: + case 0x1a: + case 0x1b: + case 0x1c: + case 0x1d: + case 0x1e: + case 0x1f: + case 0x7f: + return i; + } + p++; + i++; + } + } + return (i + Copyright (C) 2009-2021 Christophe GRENIER This software is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -19,8 +19,8 @@ Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#ifndef _FILE_TXT_H -#define _FILE_TXT_H +#ifndef _UTFSIZE_H +#define _UTFSIZE_H #ifdef __cplusplus extern "C" { #endif