Move UTFsize() from src/file_txt.c to src/utfsize.c
src/file_win.c: add Frama-C annotations
This commit is contained in:
parent
5909fc97d0
commit
cef9304cf0
5 changed files with 157 additions and 115 deletions
|
@ -387,9 +387,10 @@ file_C = filegen.c \
|
|||
file_z2d.c \
|
||||
file_zcode.c \
|
||||
file_zip.c \
|
||||
file_zpr.c
|
||||
file_zpr.c \
|
||||
utfsize.c
|
||||
|
||||
file_H = ext2.h hfsp_struct.h filegen.h file_doc.h file_jpg.h file_gz.h file_riff.h file_sp3.h file_tar.h file_tiff.h file_txt.h luks_struct.h ntfs_struct.h ole.h pe.h suspend.h
|
||||
file_H = ext2.h hfsp_struct.h filegen.h file_doc.h file_jpg.h file_gz.h file_riff.h file_sp3.h file_tar.h file_tiff.h luks_struct.h ntfs_struct.h ole.h pe.h suspend.h utfsize.h
|
||||
|
||||
photorec_C = photorec.c phcfg.c addpart.c chgarch.c chgtype.c dir.c exfatp.c ext2grp.c ext2_dir.c ext2p.c fat_dir.c fatp.c file_found.c geometry.c ntfs_dir.c ntfsp.c pdisksel.c poptions.c sessionp.c dfxml.c partgptro.c
|
||||
|
||||
|
@ -467,6 +468,10 @@ session_tiff_le.framac: file_tiff.c file_tiff_be.c file_tiff_le.c common.c fileg
|
|||
$(CC) $(CFLAGS) -DMAIN_tiff_le -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_tiff -I.. $^ -o demo_tiff_le
|
||||
frama-c $^ -cpp-extra-args="-DMAIN_tiff_le -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_tiff -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@
|
||||
|
||||
session_txt.framac: file_txt.c common.c filegen.c log.c utfsize.c
|
||||
$(CC) $(CFLAGS) -DMAIN_txt -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_txt -I.. $^ -o demo_txt
|
||||
frama-c $^ -cpp-extra-args="-DMAIN_txt -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_txt -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@
|
||||
|
||||
session_%.framac: file_%.c common.c filegen.c log.c
|
||||
$(CC) $(CFLAGS) -DMAIN_$* -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_$* -I.. $^ -o demo_$*
|
||||
frama-c $^ -cpp-extra-args="-DMAIN_$* -DHAVE_CONFIG_H -DSINGLE_FORMAT -DSINGLE_FORMAT_$* -D__x86_64__ -I.." $(FRAMA_C_FLAGS) -save $@
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
#include "filegen.h"
|
||||
#include "log.h"
|
||||
#include "memmem.h"
|
||||
#include "file_txt.h"
|
||||
#include "utfsize.h"
|
||||
#if defined(__FRAMAC__)
|
||||
#include "__fc_builtin.h"
|
||||
#endif
|
||||
|
@ -682,90 +682,6 @@ static int UTF2Lat(unsigned char *buffer_lower, const unsigned char *buffer, con
|
|||
return offset_dst;
|
||||
}
|
||||
|
||||
int UTFsize(const unsigned char *buffer, const unsigned int buf_len)
|
||||
{
|
||||
const unsigned char *p=buffer; /* pointers to actual position in source buffer */
|
||||
unsigned int i=0;
|
||||
/*@
|
||||
@ loop invariant 0 <= i < buf_len + 3;
|
||||
@ loop invariant p == buffer + i;
|
||||
@ loop assigns i, p;
|
||||
@ loop variant buf_len - 1 - i;
|
||||
@*/
|
||||
while(i<buf_len)
|
||||
{
|
||||
/*@ assert i < buf_len; */
|
||||
/*@ assert p == buffer + i; */
|
||||
const unsigned char c=*p;
|
||||
if(c=='\0')
|
||||
return i;
|
||||
/* Reject some invalid UTF-8 sequences */
|
||||
if(c==0xc0 || c==0xc1 || c==0xf7 || c>=0xfd)
|
||||
return i;
|
||||
/*@ assert i + 1 >= buf_len || \valid_read(p+1); */
|
||||
/*@ assert i + 2 >= buf_len || \valid_read(p+2); */
|
||||
if((c & 0xf0)==0xe0 &&
|
||||
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80) &&
|
||||
(i+2 >= buf_len || (*(p+2) & 0xc0)==0x80))
|
||||
{ /* UTF8 l=3 */
|
||||
#ifdef DEBUG_TXT
|
||||
log_info("UTFsize i=%u l=3\n", i);
|
||||
#endif
|
||||
p+=3;
|
||||
i+=3;
|
||||
}
|
||||
else if((c & 0xe0)==0xc0 &&
|
||||
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80))
|
||||
{ /* UTF8 l=2 */
|
||||
#ifdef DEBUG_TXT
|
||||
log_info("UTFsize i=%u l=2\n", i);
|
||||
#endif
|
||||
p+=2;
|
||||
i+=2;
|
||||
}
|
||||
else
|
||||
{ /* Ascii UCS */
|
||||
#ifdef DEBUG_TXT
|
||||
log_info("UTFsize i=%u l=1 ? *p=%c\n", i, c);
|
||||
#endif
|
||||
switch(c)
|
||||
{
|
||||
case 0x00:
|
||||
case 0x01:
|
||||
case 0x02:
|
||||
case 0x03:
|
||||
case 0x04:
|
||||
case 0x05:
|
||||
case 0x06:
|
||||
case 0x07:
|
||||
case 0x0b:
|
||||
case 0x0c:
|
||||
case 0x10:
|
||||
case 0x11:
|
||||
case 0x12:
|
||||
case 0x13:
|
||||
case 0x14:
|
||||
case 0x15:
|
||||
case 0x16:
|
||||
case 0x17:
|
||||
case 0x18:
|
||||
case 0x19:
|
||||
case 0x1a:
|
||||
case 0x1b:
|
||||
case 0x1c:
|
||||
case 0x1d:
|
||||
case 0x1e:
|
||||
case 0x1f:
|
||||
case 0x7f:
|
||||
return i;
|
||||
}
|
||||
p++;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return (i<buf_len?i:buf_len);
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires buffer_size >= 2 && (buffer_size&1)==0;
|
||||
@ requires \valid_read((char *)buffer+(0..buffer_size-1));
|
||||
|
|
|
@ -33,9 +33,10 @@
|
|||
#include <stdio.h>
|
||||
#include "types.h"
|
||||
#include "filegen.h"
|
||||
#include "file_txt.h"
|
||||
#include "utfsize.h"
|
||||
#include "common.h"
|
||||
|
||||
/*@ requires \valid(file_stat); */
|
||||
static void register_header_check_win(file_stat_t *file_stat);
|
||||
|
||||
const file_hint_t file_hint_win = {
|
||||
|
|
120
src/utfsize.c
Normal file
120
src/utfsize.c
Normal file
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
|
||||
File: utfsize.c
|
||||
|
||||
Copyright (C) 2021 Christophe GRENIER <grenier@cgsecurity.org>
|
||||
|
||||
This software is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License along
|
||||
with this program; if not, write the Free Software Foundation, Inc., 51
|
||||
Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
*/
|
||||
|
||||
#if !defined(SINGLE_FORMAT) || defined(SINGLE_FORMAT_txt) || defined(SINGLE_FORMAT_win)
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#ifdef HAVE_STDLIB_H
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
#ifdef HAVE_STRING_H
|
||||
#include <string.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include "log.h"
|
||||
#include "utfsize.h"
|
||||
|
||||
int UTFsize(const unsigned char *buffer, const unsigned int buf_len)
|
||||
{
|
||||
const unsigned char *p=buffer; /* pointers to actual position in source buffer */
|
||||
unsigned int i=0;
|
||||
/*@
|
||||
@ loop invariant 0 <= i < buf_len + 3;
|
||||
@ loop invariant p == buffer + i;
|
||||
@ loop assigns i, p;
|
||||
@ loop variant buf_len - 1 - i;
|
||||
@*/
|
||||
while(i<buf_len)
|
||||
{
|
||||
/*@ assert i < buf_len; */
|
||||
/*@ assert p == buffer + i; */
|
||||
const unsigned char c=*p;
|
||||
if(c=='\0')
|
||||
return i;
|
||||
/* Reject some invalid UTF-8 sequences */
|
||||
if(c==0xc0 || c==0xc1 || c==0xf7 || c>=0xfd)
|
||||
return i;
|
||||
/*@ assert i + 1 >= buf_len || \valid_read(p+1); */
|
||||
/*@ assert i + 2 >= buf_len || \valid_read(p+2); */
|
||||
if((c & 0xf0)==0xe0 &&
|
||||
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80) &&
|
||||
(i+2 >= buf_len || (*(p+2) & 0xc0)==0x80))
|
||||
{ /* UTF8 l=3 */
|
||||
#ifdef DEBUG_TXT
|
||||
log_info("UTFsize i=%u l=3\n", i);
|
||||
#endif
|
||||
p+=3;
|
||||
i+=3;
|
||||
}
|
||||
else if((c & 0xe0)==0xc0 &&
|
||||
(i+1 >= buf_len || (*(p+1) & 0xc0)==0x80))
|
||||
{ /* UTF8 l=2 */
|
||||
#ifdef DEBUG_TXT
|
||||
log_info("UTFsize i=%u l=2\n", i);
|
||||
#endif
|
||||
p+=2;
|
||||
i+=2;
|
||||
}
|
||||
else
|
||||
{ /* Ascii UCS */
|
||||
#ifdef DEBUG_TXT
|
||||
log_info("UTFsize i=%u l=1 ? *p=%c\n", i, c);
|
||||
#endif
|
||||
switch(c)
|
||||
{
|
||||
case 0x00:
|
||||
case 0x01:
|
||||
case 0x02:
|
||||
case 0x03:
|
||||
case 0x04:
|
||||
case 0x05:
|
||||
case 0x06:
|
||||
case 0x07:
|
||||
case 0x0b:
|
||||
case 0x0c:
|
||||
case 0x10:
|
||||
case 0x11:
|
||||
case 0x12:
|
||||
case 0x13:
|
||||
case 0x14:
|
||||
case 0x15:
|
||||
case 0x16:
|
||||
case 0x17:
|
||||
case 0x18:
|
||||
case 0x19:
|
||||
case 0x1a:
|
||||
case 0x1b:
|
||||
case 0x1c:
|
||||
case 0x1d:
|
||||
case 0x1e:
|
||||
case 0x1f:
|
||||
case 0x7f:
|
||||
return i;
|
||||
}
|
||||
p++;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
return (i<buf_len?i:buf_len);
|
||||
}
|
||||
#endif
|
|
@ -1,8 +1,8 @@
|
|||
/*
|
||||
|
||||
File: file_txt.h
|
||||
File: utfsize.h
|
||||
|
||||
Copyright (C) 2009 Christophe GRENIER <grenier@cgsecurity.org>
|
||||
Copyright (C) 2009-2021 Christophe GRENIER <grenier@cgsecurity.org>
|
||||
|
||||
This software is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
|
@ -19,8 +19,8 @@
|
|||
Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
|
||||
*/
|
||||
#ifndef _FILE_TXT_H
|
||||
#define _FILE_TXT_H
|
||||
#ifndef _UTFSIZE_H
|
||||
#define _UTFSIZE_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
Loading…
Reference in a new issue