From 96ecc02a966db11897b5889f2b3ce88d39a0855c Mon Sep 17 00:00:00 2001 From: Christophe Grenier Date: Sun, 25 Oct 2020 16:23:47 +0100 Subject: [PATCH] src/file_mkv.c: rewrite mkv parser --- src/file_mkv.c | 173 +++++++++++++++++++++++++++++++++++-------------- 1 file changed, 125 insertions(+), 48 deletions(-) diff --git a/src/file_mkv.c b/src/file_mkv.c index 91701a2f..4ab70547 100644 --- a/src/file_mkv.c +++ b/src/file_mkv.c @@ -35,13 +35,14 @@ #include "types.h" #include "filegen.h" #include "common.h" -#include "memmem.h" #ifdef DEBUG_MKV #include "log.h" #endif +/*@ + @ requires \valid(file_stat); + @*/ static void register_header_check_mkv(file_stat_t *file_stat); -static int header_check_mkv(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new); const file_hint_t file_hint_mkv= { .extension="mkv", @@ -52,71 +53,127 @@ const file_hint_t file_hint_mkv= { .register_header_check=®ister_header_check_mkv }; -static const unsigned char *EBML_find(const unsigned char *buffer, const unsigned int buffer_size, const unsigned char *EBML_Header, const unsigned int EBML_size) -{ - const unsigned char *tmp=(const unsigned char *)td_memmem(buffer, buffer_size, EBML_Header, EBML_size); - if(tmp==NULL) - return NULL; - return tmp+EBML_size; -} - +/*@ + @ requires \valid_read(p + (0 .. p_size-1)); + @ requires \valid(uint64); + @ requires \separated(p + (..), uint64); + @ ensures -1 == \result || (1 <= \result <= 8); + @ ensures -1 != \result ==> \initialized(uint64); + @ ensures -1 != \result ==> *uint64 <= 0xfeffffffffffffff; + @ assigns *uint64; + @*/ static int EBML_read_unsigned(const unsigned char *p, const unsigned int p_size, uint64_t *uint64) { unsigned char test_bit = 0x80; unsigned int i, bytes = 1; - if(p_size==0 || *p== 0x00) + const unsigned char c=*p; + uint64_t val; + if(p_size==0 || c== 0x00) return -1; - while((*p & test_bit) != test_bit) + /*@ assert c != 0; */ + /*@ + @ loop invariant test_bit > 0; + @ loop invariant test_bit == (0x100 >> bytes); + @ loop assigns test_bit, bytes; + @ loop unroll 8; + @*/ + while((c & test_bit) != test_bit) { + /*@ assert c < test_bit; */ test_bit >>= 1; bytes++; } + /*@ assert (c & test_bit) == test_bit; */ + /*@ assert 1 <= bytes <= 8; */ + /*@ assert c >= test_bit; */ if(p_size < bytes) return -1; - *uint64 = *p - test_bit; //eliminate first bit + /*@ assert bytes <= p_size; */ + val = c - test_bit; //eliminate first bit, val < 0x80 + /*@ assert val <= 0xfe; */ + /*@ + @ loop assigns i, val; + @ loop unroll 8; + @ loop variant bytes-i; + @*/ for(i=1; i 0; + @ requires \valid_read(buffer + (0 .. buffer_size-1)); + @ requires \valid_read(EBML_Header + (0 .. EBML_size-1)); + @ assigns \result; + @*/ +static int EBML_find(const unsigned char *buffer, const unsigned int buffer_size, const unsigned char *EBML_Header, const unsigned int EBML_size) { - uint64_t strlength; - unsigned char test_bit = 0x80; - unsigned int i, bytes = 1; - if(p_size==0 || *p== 0x00) - return -1; - while((*p & test_bit) != test_bit) + unsigned int offset=0; + /*@ + @ loop assigns offset; + @*/ + while(offset < buffer_size) { - test_bit >>= 1; - bytes++; + uint64_t uint64=0; + int bytes; + bytes = EBML_read_unsigned(&buffer[offset], buffer_size-offset, &uint64); +#ifdef DEBUG_MKV + log_info("EBML_find %02x%02x bytes=%d\n", buffer[offset], buffer[offset+1], bytes); +#endif + if(bytes <= 0) + return -1; + if((unsigned int)bytes == EBML_size && memcmp(&buffer[offset], EBML_Header, EBML_size)==0) + { + return offset+bytes; + } + offset += bytes; + if(offset >= buffer_size) + return -1; + bytes = EBML_read_unsigned(&buffer[offset], buffer_size-offset, &uint64); + if(bytes <= 0 || uint64 > buffer_size) + return -1; + offset += bytes; + offset += uint64; } - if(p_size < bytes) + return -1; +} + +/*@ + @ requires \valid_read(p + (0 .. p_size-1)); + @ requires \valid(strlength); + @ requires \separated(p + (..), strlength); + @ ensures -1 == \result || (1 <= \result <= 8); + @ ensures -1 != \result ==> \initialized(strlength); + @ ensures -1 != \result ==> *strlength <= 0xfeffffffffffffff; + @ ensures -1 != \result ==> \result + *strlength <= p_size; + @ assigns *strlength, \result; + @*/ +static int EBML_read_string(const unsigned char *p, const unsigned int p_size, uint64_t *strlength) +{ + int bytes; + *strlength = 0; + bytes = EBML_read_unsigned(p, p_size, strlength); +#ifdef DEBUG_MKV + log_info("EBML_read_string bytes=%d strlength=%llu\n", bytes, (long long unsigned)*strlength); +#endif + if(bytes <= 0) return -1; - strlength = (uint64_t)(*p - test_bit); //eliminate first bit - for(i=1; i p_size) + /*@ assert 1 <= bytes <= 8; */ + /*@ assert *strlength <= 0xfeffffffffffffff; */ + if(bytes + *strlength > p_size) return -1; - *string = (char *)MALLOC(strlength+1); - memcpy(*string, p+bytes, strlength); - (*string)[strlength] = '\0'; - return bytes+strlength; + /*@ assert bytes + *strlength <= p_size; */ + return bytes; } static const unsigned char EBML_header[4]= { 0x1a,0x45,0xdf,0xa3}; -static void register_header_check_mkv(file_stat_t *file_stat) -{ - register_header_check(0, EBML_header,sizeof(EBML_header), &header_check_mkv, file_stat); -} - static int header_check_mkv(const unsigned char *buffer, const unsigned int buffer_size, const unsigned int safe_header_only, const file_recovery_t *file_recovery, file_recovery_t *file_recovery_new) { if(memcmp(buffer,EBML_header,sizeof(EBML_header))!=0) @@ -126,12 +183,14 @@ static int header_check_mkv(const unsigned char *buffer, const unsigned int buff const unsigned char EBML_Segment[4]= { 0x18,0x53,0x80,0x67}; uint64_t segment_size=0; uint64_t header_data_size=0; - char *doctype=NULL; const unsigned char *p; unsigned int header_data_offset; unsigned int segment_offset; unsigned int segment_data_offset; + uint64_t strlength = 0; + int bytes; int len; + int offset_doctype; if((len=EBML_read_unsigned(buffer+sizeof(EBML_header), buffer_size-sizeof(EBML_header), &header_data_size)) < 0) @@ -139,10 +198,14 @@ static int header_check_mkv(const unsigned char *buffer, const unsigned int buff header_data_offset = sizeof(EBML_header) + len; if(header_data_offset >= buffer_size) return 0; - segment_offset = header_data_offset + header_data_size; #ifdef DEBUG_MKV log_info("header_data_offset %llu\n", (long long unsigned) header_data_offset); log_info("header_data_size %llu\n", (long long unsigned) header_data_size); +#endif + if(header_data_size >= buffer_size) + return 0; + segment_offset = header_data_offset + header_data_size; +#ifdef DEBUG_MKV log_info("segment_offset %llu\n", (long long unsigned) segment_offset); #endif if(segment_offset +sizeof(EBML_Segment) >= buffer_size) @@ -161,17 +224,26 @@ static int header_check_mkv(const unsigned char *buffer, const unsigned int buff log_info("segment size %llu\n", (long long unsigned) segment_size); #endif /* get EBML_DocType, it will be used to set the file extension */ - p=EBML_find(&buffer[header_data_offset], header_data_size, EBML_DocType, sizeof(EBML_DocType)); - if (p == NULL || EBML_read_string(p, header_data_size-(p-&buffer[header_data_offset]), &doctype) < 0) + offset_doctype=EBML_find(&buffer[header_data_offset], header_data_size, EBML_DocType, sizeof(EBML_DocType)); +#ifdef DEBUG_MKV + log_info("offset_doctype = %u\n", offset_doctype); +#endif + if(offset_doctype < 0 || header_data_size <= (uint64_t)offset_doctype) + return 0; + /*@ assert header_data_size > offset_doctype; */ + p = &buffer[header_data_offset+offset_doctype]; + bytes = EBML_read_string(&buffer[header_data_offset+offset_doctype], header_data_size-offset_doctype, &strlength); + if (bytes < 0) return 0; reset_file_recovery(file_recovery_new); - if(strcmp(doctype,"matroska")==0) + if( (strlength == 8 && memcmp(p+bytes,"matroska", 8)==0) || + (strlength == 9 && memcmp(p+bytes,"matroska", 9)==0)) file_recovery_new->extension=file_hint_mkv.extension; - else if(strcmp(doctype,"webm")==0) + else if((strlength == 4 && memcmp(p+bytes,"webm", 4)==0) || + ( strlength == 5 && memcmp(p+bytes,"webm", 5)==0)) file_recovery_new->extension="webm"; else file_recovery_new->extension="ebml"; - free(doctype); if(segment_size > 0) { file_recovery_new->calculated_file_size = segment_data_offset + segment_size; @@ -184,4 +256,9 @@ static int header_check_mkv(const unsigned char *buffer, const unsigned int buff } return 1; } + +static void register_header_check_mkv(file_stat_t *file_stat) +{ + register_header_check(0, EBML_header,sizeof(EBML_header), &header_check_mkv, file_stat); +} #endif