Move UTFsize() from src/file_txt.c to src/utfsize.c
src/file_win.c: add Frama-C annotations
This commit is contained in:
parent
5909fc97d0
commit
cef9304cf0
5 changed files with 157 additions and 115 deletions
|
@ -387,9 +387,10 @@ file_C = filegen.c \
|
|||
file_z2d.c \
|
||||
file_zcode.c \
|
||||
file_zip.c \
|
||||
file_zpr.c
|
||||
file_zpr.c \
|
||||
utfsize.c
|
||||
|
||||
file_H = ext2.h hfsp_struct.h filegen.h file_doc.h file_jpg.h file_gz.h file_riff.h file_sp3.h file_tar.h file_tiff.h file_txt.h luks_struct.h ntfs_struct.h ole.h pe.h suspend.h
|
||||
file_H = ext2.h hfsp_struct.h filegen.h file_doc.h file_jpg.h file_gz.h file_riff.h file_sp3.h file_tar.h file_tiff.h luks_struct.h ntfs_struct.h ole.h pe.h suspend.h utfsize.h
|
||||
|
||||
photorec_C = photorec.c phcfg.c addpart.c chgarch.c chgtype.c dir.c exfatp.c ext2grp.c ext2_dir.c ext2p.c fat_dir.c fatp.c file_found.c geometry.c ntfs_dir.c ntfsp.c pdisksel.c poptions.c sessionp.c dfxml.c partgptro.c
|
||||
|
||||
|
@ -467,6 +468,10 @@ session_tiff_le.framac: file_tiff.c file_tiff_be.c file_tiff_le.c common.c fileg
|
|||
$(CC) $(CFLAGS) -DMAIN_tiff_le -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_tiff -I.. $^ -o demo_tiff_le
|
||||
frama-c $^ -cpp-extra-args="-DMAIN_tiff_le -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_tiff -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@
|
||||
|
||||
session_txt.framac: file_txt.c common.c filegen.c log.c utfsize.c
|
||||
$(CC) $(CFLAGS) -DMAIN_txt -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_txt -I.. $^ -o demo_txt
|
||||
frama-c $^ -cpp-extra-args="-DMAIN_txt -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_txt -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@
|
||||
|
||||
session_%.framac: file_%.c common.c filegen.c log.c
|
||||
$(CC) $(CFLAGS) -DMAIN_$* -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_$* -I.. $^ -o demo_$*
|
||||
frama-c $^ -cpp-extra-args="-DMAIN_$* -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_$* -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
#include "filegen.h"
|
||||
#include "log.h"
|
||||
#include "memmem.h"
|
||||
#include "file_txt.h"
|
||||
#include "utfsize.h"
|
||||
#if defined(__FRAMAC__)
|
||||
#include "__fc_builtin.h"
|
||||
#endif
|
||||
|
@ -682,90 +682,6 @@ static int UTF2Lat(unsigned char *buffer_lower, const unsigned char *buffer, con
|
|||
return offset_dst;
|
||||
}
|
||||
|
||||
int UTFsize(const unsigned char *buffer, const unsigned int buf_len)
|
||||
{
|
||||
const unsigned char *p=buffer; /* pointers to actual position in source buffer */
|
||||
unsigned int i=0;
|
||||
/*@
|
||||
@ loop invariant 0 <= i < buf_len + 3;
|
||||
@ loop invariant p == buffer + i;
|
||||
@ loop assigns i, p;
|
||||
@ loop variant buf_len - 1 - i;
|
||||
@*/
|
||||
while(i<buf_len)
|
||||
{
|
||||
/*@ assert i < buf_len; */
|
||||
/*@ assert p == buffer + i; */
|
||||
const unsigned char c=*p;
|
||||
if(c=='\0')
|
||||
return i;
|
||||
/* Reject some invalid UTF-8 sequences */
|
||||
if(c==0xc0 || c==0xc1 || c==0xf7 || c>=0xfd)
|
||||
return i;
|
||||
/*@ assert i + 1 >= buf_len || \valid_read(p+1); */
|
||||
/*@ assert i + 2 >= buf_len || \valid_read(p+2); */
|
||||
if((c & 0xf0)==0xe0 &&
|
||||
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80) &&
|
||||
(i+2 >= buf_len || (*(p+2) & 0xc0)==0x80))
|
||||
{ /* UTF8 l=3 */
|
||||
#ifdef DEBUG_TXT
|
||||
log_info("UTFsize i=%u l=3\n", i);
|
||||
#endif
|
||||
p+=3;
|
||||
i+=3;
|
||||
}
|
||||
else if((c & 0xe0)==0xc0 &&
|
||||
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80))
|
||||
{ /* UTF8 l=2 */
|
||||
#ifdef DEBUG_TXT
|
||||
log_info("UTFsize i=%u l=2\n", i);
|
||||
#endif
|
||||
p+=2;
|
||||
i+=2;
|
||||
}
|
||||
else
|
||||
{ /* Ascii UCS */
|
||||
#ifdef DEBUG_TXT
|
||||
log_info("UTFsize i=%u l=1 ? *p=%c\n", i, c);
|
||||
#endif
|
||||
switch(c)
|
||||
{
|
||||
case 0x00:
|
||||
case 0x01:
|
||||
case 0x02:
|
||||
case 0x03:
|
||||
case 0x04:
|
||||
case 0x05:
|
||||
case 0x06:
|
||||
case 0x07:
|
||||
case 0x0b:
|
||||
case 0x0c:
|
||||
case 0x10:
|
||||
case 0x11:
|
||||
case 0x12:
|
||||
case 0x13:
|
||||
case 0x14:
|
||||
case 0x15:
|
||||
case 0x16:
|
||||
case 0x17:
|
||||
case 0x18:
|
||||
case 0x19:
|
||||
case 0x1a:
|
||||
case 0x1b:
|
||||
case 0x1c:
|
||||
case 0x1d:
|
||||
case 0x1e:
|
||||
case 0x1f:
|
||||
case 0x7f:
|
||||
return i;
|
||||
}
|
||||
p++;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return (i<buf_len?i:buf_len);
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires buffer_size >= 2 && (buffer_size&1)==0;
|
||||
@ requires \valid_read((char *)buffer+(0..buffer_size-1));
|
||||
|
|
|
@ -33,53 +33,54 @@
|
|||
#include <stdio.h>
|
||||
#include "types.h"
|
||||
#include "filegen.h"
|
||||
#include "file_txt.h"
|
||||
#include "utfsize.h"
|
||||
#include "common.h"
|
||||
|
||||
/*@ requires \valid(file_stat); */
|
||||
static void register_header_check_win(file_stat_t *file_stat);
|
||||
|
||||
const file_hint_t file_hint_win= {
|
||||
.extension="win",
|
||||
.description="Opera preferences",
|
||||
.max_filesize=PHOTOREC_MAX_FILE_SIZE,
|
||||
.recover=1,
|
||||
.enable_by_default=1,
|
||||
.register_header_check=®ister_header_check_win
|
||||
const file_hint_t file_hint_win = {
|
||||
.extension = "win",
|
||||
.description = "Opera preferences",
|
||||
.max_filesize = PHOTOREC_MAX_FILE_SIZE,
|
||||
.recover = 1,
|
||||
.enable_by_default = 1,
|
||||
.register_header_check = ®ister_header_check_win
|
||||
};
|
||||
|
||||
static data_check_t data_check_win(const unsigned char *buffer, const unsigned int buffer_size, file_recovery_t *file_recovery)
|
||||
{
|
||||
unsigned int i;
|
||||
unsigned int offset=0;
|
||||
if(file_recovery->calculated_file_size==0)
|
||||
offset=3;
|
||||
i=UTFsize(&buffer[buffer_size/2+offset], buffer_size/2-offset);
|
||||
if(i<buffer_size/2-offset)
|
||||
unsigned int offset = 0;
|
||||
if(file_recovery->calculated_file_size == 0)
|
||||
offset = 3;
|
||||
i = UTFsize(&buffer[buffer_size / 2 + offset], buffer_size / 2 - offset);
|
||||
if(i < buffer_size / 2 - offset)
|
||||
{
|
||||
if(i>=10)
|
||||
file_recovery->calculated_file_size=file_recovery->file_size+offset+i;
|
||||
if(i >= 10)
|
||||
file_recovery->calculated_file_size = file_recovery->file_size + offset + i;
|
||||
return DC_STOP;
|
||||
}
|
||||
file_recovery->calculated_file_size=file_recovery->file_size+(buffer_size/2);
|
||||
file_recovery->calculated_file_size = file_recovery->file_size + (buffer_size / 2);
|
||||
return DC_CONTINUE;
|
||||
}
|
||||
|
||||
static int header_check_win(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new)
|
||||
{
|
||||
reset_file_recovery(file_recovery_new);
|
||||
file_recovery_new->extension=file_hint_win.extension;
|
||||
file_recovery_new->data_check=&data_check_win;
|
||||
file_recovery_new->file_check=&file_check_size;
|
||||
file_recovery_new->extension = file_hint_win.extension;
|
||||
file_recovery_new->data_check = &data_check_win;
|
||||
file_recovery_new->file_check = &file_check_size;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static void register_header_check_win(file_stat_t *file_stat)
|
||||
{
|
||||
static const unsigned char win_header[31]= {
|
||||
0xef, 0xbb, 0xbf, 'O' , 'p' , 'e' , 'r' , 'a' ,
|
||||
' ' , 'P' , 'r' , 'e' , 'f' , 'e' , 'r' , 'e' ,
|
||||
'n' , 'c' , 'e' , 's' , ' ' , 'v' , 'e' , 'r' ,
|
||||
's' , 'i' , 'o' , 'n' , ' ' , '2' , '.'
|
||||
static const unsigned char win_header[31] = {
|
||||
0xef, 0xbb, 0xbf, 'O', 'p', 'e', 'r', 'a',
|
||||
' ', 'P', 'r', 'e', 'f', 'e', 'r', 'e',
|
||||
'n', 'c', 'e', 's', ' ', 'v', 'e', 'r',
|
||||
's', 'i', 'o', 'n', ' ', '2', '.'
|
||||
};
|
||||
register_header_check(0, win_header, sizeof(win_header), &header_check_win, file_stat);
|
||||
}
|
||||
|
|
120
src/utfsize.c
Normal file
120
src/utfsize.c
Normal file
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
|
||||
File: utfsize.c
|
||||
|
||||
Copyright (C) 2021 Christophe GRENIER <grenier@cgsecurity.org>
|
||||
|
||||
This software is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write the Free Software Foundation, Inc., 51
|
||||
Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
*/
|
||||
|
||||
#if !defined(SINGLE_FORMAT) || defined(SINGLE_FORMAT_txt) || defined(SINGLE_FORMAT_win)
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include "log.h"
|
||||
#include "utfsize.h"
|
||||
|
||||
int UTFsize(const unsigned char *buffer, const unsigned int buf_len)
|
||||
{
|
||||
const unsigned char *p=buffer; /* pointers to actual position in source buffer */
|
||||
unsigned int i=0;
|
||||
/*@
|
||||
@ loop invariant 0 <= i < buf_len + 3;
|
||||
@ loop invariant p == buffer + i;
|
||||
@ loop assigns i, p;
|
||||
@ loop variant buf_len - 1 - i;
|
||||
@*/
|
||||
while(i<buf_len)
|
||||
{
|
||||
/*@ assert i < buf_len; */
|
||||
/*@ assert p == buffer + i; */
|
||||
const unsigned char c=*p;
|
||||
if(c=='\0')
|
||||
return i;
|
||||
/* Reject some invalid UTF-8 sequences */
|
||||
if(c==0xc0 || c==0xc1 || c==0xf7 || c>=0xfd)
|
||||
return i;
|
||||
/*@ assert i + 1 >= buf_len || \valid_read(p+1); */
|
||||
/*@ assert i + 2 >= buf_len || \valid_read(p+2); */
|
||||
if((c & 0xf0)==0xe0 &&
|
||||
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80) &&
|
||||
(i+2 >= buf_len || (*(p+2) & 0xc0)==0x80))
|
||||
{ /* UTF8 l=3 */
|
||||
#ifdef DEBUG_TXT
|
||||
log_info("UTFsize i=%u l=3\n", i);
|
||||
#endif
|
||||
p+=3;
|
||||
i+=3;
|
||||
}
|
||||
else if((c & 0xe0)==0xc0 &&
|
||||
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80))
|
||||
{ /* UTF8 l=2 */
|
||||
#ifdef DEBUG_TXT
|
||||
log_info("UTFsize i=%u l=2\n", i);
|
||||
#endif
|
||||
p+=2;
|
||||
i+=2;
|
||||
}
|
||||
else
|
||||
{ /* Ascii UCS */
|
||||
#ifdef DEBUG_TXT
|
||||
log_info("UTFsize i=%u l=1 ? *p=%c\n", i, c);
|
||||
#endif
|
||||
switch(c)
|
||||
{
|
||||
case 0x00:
|
||||
case 0x01:
|
||||
case 0x02:
|
||||
case 0x03:
|
||||
case 0x04:
|
||||
case 0x05:
|
||||
case 0x06:
|
||||
case 0x07:
|
||||
case 0x0b:
|
||||
case 0x0c:
|
||||
case 0x10:
|
||||
case 0x11:
|
||||
case 0x12:
|
||||
case 0x13:
|
||||
case 0x14:
|
||||
case 0x15:
|
||||
case 0x16:
|
||||
case 0x17:
|
||||
case 0x18:
|
||||
case 0x19:
|
||||
case 0x1a:
|
||||
case 0x1b:
|
||||
case 0x1c:
|
||||
case 0x1d:
|
||||
case 0x1e:
|
||||
case 0x1f:
|
||||
case 0x7f:
|
||||
return i;
|
||||
}
|
||||
p++;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return (i<buf_len?i:buf_len);
|
||||
}
|
||||
#endif
|
|
@ -1,8 +1,8 @@
|
|||
/*
|
||||
|
||||
File: file_txt.h
|
||||
File: utfsize.h
|
||||
|
||||
Copyright (C) 2009 Christophe GRENIER <grenier@cgsecurity.org>
|
||||
Copyright (C) 2009-2021 Christophe GRENIER <grenier@cgsecurity.org>
|
||||
|
||||
This software is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -19,8 +19,8 @@
|
|||
Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
*/
|
||||
#ifndef _FILE_TXT_H
|
||||
#define _FILE_TXT_H
|
||||
#ifndef _UTFSIZE_H
|
||||
#define _UTFSIZE_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
Loading…
Reference in a new issue