diff options
author | Thomas Deutschmann <whissi@gentoo.org> | 2021-03-30 10:59:39 +0200 |
---|---|---|
committer | Thomas Deutschmann <whissi@gentoo.org> | 2021-04-01 00:04:14 +0200 |
commit | 5ff1d6955496b3cf9a35042c9ac35db43bc336b1 (patch) | |
tree | 6d470f7eb448f59f53e8df1010aec9dad8ce1f72 /extract/src | |
parent | Import Ghostscript 9.53.1 (diff) | |
download | ghostscript-gpl-patches-5ff1d6955496b3cf9a35042c9ac35db43bc336b1.tar.gz ghostscript-gpl-patches-5ff1d6955496b3cf9a35042c9ac35db43bc336b1.tar.bz2 ghostscript-gpl-patches-5ff1d6955496b3cf9a35042c9ac35db43bc336b1.zip |
Import Ghostscript 9.54ghostscript-9.54
Signed-off-by: Thomas Deutschmann <whissi@gentoo.org>
Diffstat (limited to 'extract/src')
31 files changed, 11346 insertions, 0 deletions
diff --git a/extract/src/alloc.c b/extract/src/alloc.c new file mode 100644 index 00000000..dee2f99a --- /dev/null +++ b/extract/src/alloc.c @@ -0,0 +1,120 @@ +#include "../include/extract_alloc.h" +#include "memento.h" + +#include <assert.h> +#include <errno.h> +#include <stdlib.h> +#include <string.h> + + +struct extract_alloc_t +{ + extract_realloc_fn_t realloc_fn; + void* realloc_state; + size_t exp_min_alloc_size; + extract_alloc_stats_t stats; +}; + +int extract_alloc_create(extract_realloc_fn_t realloc_fn, void* realloc_state, extract_alloc_t** palloc) +{ + assert(realloc_fn); + assert(palloc); + *palloc = realloc_fn(realloc_state, NULL /*ptr*/, sizeof(**palloc)); + if (!*palloc) { + errno = ENOMEM; + return -1; + } + memset(*palloc, 0, sizeof(**palloc)); + (*palloc)->realloc_fn = realloc_fn; + (*palloc)->realloc_state = realloc_state; + (*palloc)->exp_min_alloc_size = 0; + return 0; +} + +void extract_alloc_destroy(extract_alloc_t** palloc) +{ + if (!*palloc) return; + (*palloc)->realloc_fn((*palloc)->realloc_state, *palloc, 0 /*newsize*/); + *palloc = NULL; +} + +extract_alloc_stats_t* extract_alloc_stats(extract_alloc_t* alloc) +{ + return &alloc->stats; +} + +static size_t round_up(extract_alloc_t* alloc, size_t n) +{ + if (alloc && alloc->exp_min_alloc_size) { + /* Round up to power of two. */ + size_t ret; + if (n==0) return 0; + ret = alloc->exp_min_alloc_size; + for(;;) { + size_t ret_old; + if (ret >= n) return ret; + ret_old = ret; + ret *= 2; + assert(ret > ret_old); + (void) ret_old; + } + } + else { + return n; + } +} + +int (extract_malloc)(extract_alloc_t* alloc, void** pptr, size_t size) +{ + void* p; + size = round_up(alloc, size); + p = (alloc) ? alloc->realloc_fn(alloc->realloc_state, NULL, size) : malloc(size); + *pptr = p; + if (!p && size) + { + if (alloc) errno = ENOMEM; + return -1; + } + if (alloc) alloc->stats.num_malloc += 1; + return 0; +} + +int (extract_realloc)(extract_alloc_t* alloc, void** pptr, size_t newsize) +{ + void* p = (alloc) ? alloc->realloc_fn(alloc->realloc_state, *pptr, newsize) : realloc(*pptr, newsize); + if (!p && newsize) + { + if (alloc) errno = ENOMEM; + return -1; + } + *pptr = p; + if (alloc) alloc->stats.num_realloc += 1; + return 0; +} + +int (extract_realloc2)(extract_alloc_t* alloc, void** pptr, size_t oldsize, size_t newsize) +{ + /* We ignore <oldsize> if <ptr> is NULL - allows callers to not worry about + edge cases e.g. with strlen+1. */ + oldsize = (*pptr) ? round_up(alloc, oldsize) : 0; + newsize = round_up(alloc, newsize); + if (newsize == oldsize) return 0; + return (extract_realloc)(alloc, pptr, newsize); +} + +void (extract_free)(extract_alloc_t* alloc, void** pptr) +{ + if (alloc) { + (void) alloc->realloc_fn(alloc->realloc_state, *pptr, 0); + } + else { + free(*pptr); + } + *pptr = NULL; + if (alloc) alloc->stats.num_free += 1; +} + +void extract_alloc_exp_min(extract_alloc_t* alloc, size_t size) +{ + alloc->exp_min_alloc_size = size; +} diff --git a/extract/src/astring.c b/extract/src/astring.c new file mode 100644 index 00000000..1d273c9e --- /dev/null +++ b/extract/src/astring.c @@ -0,0 +1,41 @@ +#include "../include/extract_alloc.h" + +#include "astring.h" +#include "memento.h" + +#include <stdlib.h> +#include <string.h> + + +void extract_astring_init(extract_astring_t* string) +{ + string->chars = NULL; + string->chars_num = 0; +} + +void extract_astring_free(extract_alloc_t* alloc, extract_astring_t* string) +{ + extract_free(alloc, &string->chars); + extract_astring_init(string); +} + + +int extract_astring_catl(extract_alloc_t* alloc, extract_astring_t* string, const char* s, size_t s_len) +{ + if (extract_realloc2(alloc, &string->chars, string->chars_num+1, string->chars_num + s_len + 1)) return -1; + memcpy(string->chars + string->chars_num, s, s_len); + string->chars[string->chars_num + s_len] = 0; + string->chars_num += s_len; + return 0; +} + +int extract_astring_catc(extract_alloc_t* alloc, extract_astring_t* string, char c) +{ + return extract_astring_catl(alloc, string, &c, 1); +} + +int extract_astring_cat(extract_alloc_t* alloc, extract_astring_t* string, const char* s) +{ + return extract_astring_catl(alloc, string, s, strlen(s)); +} + diff --git a/extract/src/astring.h b/extract/src/astring.h new file mode 100644 index 00000000..947e6587 --- /dev/null +++ b/extract/src/astring.h @@ -0,0 +1,23 @@ +#ifndef ARTIFEX_EXTRACT_AUTOSTRING_XML +#define ARTIFEX_EXTRACT_AUTOSTRING_XML + +/* Only for internal use by extract code. */ + +/* A simple string struct that reallocs as required. */ +typedef struct +{ + char* chars; /* NULL or zero-terminated. */ + size_t chars_num; /* Length of string pointed to by .chars. */ +} extract_astring_t; + +void extract_astring_init(extract_astring_t* string); + +void extract_astring_free(extract_alloc_t* alloc, extract_astring_t* string); + +int extract_astring_catl(extract_alloc_t* alloc, extract_astring_t* string, const char* s, size_t s_len); + +int extract_astring_catc(extract_alloc_t* alloc, extract_astring_t* string, char c); + +int extract_astring_cat(extract_alloc_t* alloc, extract_astring_t* string, const char* s); + +#endif diff --git a/extract/src/buffer-test.c b/extract/src/buffer-test.c new file mode 100644 index 00000000..6701fbab --- /dev/null +++ b/extract/src/buffer-test.c @@ -0,0 +1,306 @@ +#include "../include/extract_buffer.h" +#include "../include/extract_alloc.h" + +#include "mem.h" +#include "memento.h" +#include "outf.h" + +#include <assert.h> +#include <errno.h> +#include <stdlib.h> + + +static int rand_int(int max) +/* Returns random int from 0..max-1. */ +{ + return (int) (rand() / (RAND_MAX+1.0) * max); +} + + +/* Support for an extract_buffer_t that reads from / writes to a fixed block of +memory, with a fn_cache() that returns a randomly-sized cache each time it is +called, and read/write functions that do random short reads and writes. */ + +typedef struct +{ + extract_alloc_t* alloc; + char* data; + size_t bytes; /* Size of data[]. */ + size_t pos; /* Current position in data[]. */ + char cache[137]; + int num_calls_cache; + int num_calls_read; + int num_calls_write; +} mem_t; + +static int s_read(void* handle, void* destination, size_t bytes, size_t* o_actual) +/* Does a randomised short read. */ +{ + mem_t* r = handle; + size_t n = 91; + assert(bytes > 0); + r->num_calls_read += 1; + assert(r->pos <= r->bytes); + if (n > bytes) n = bytes; + if (n > r->bytes - r->pos) n = r->bytes - r->pos; + if (n) n = rand_int((int) n-1) + 1; + memcpy(destination, r->data + r->pos, n); + r->pos += n; + *o_actual = n; + return 0; +} + +static int s_read_cache(void* handle, void** o_cache, size_t* o_numbytes) +/* Returns a cache with randomised size. */ +{ + mem_t* r = handle; + int n; + r->num_calls_cache += 1; + *o_cache = r->cache; + n = (int) (r->bytes - r->pos); + if (n > (int) sizeof(r->cache)) n = sizeof(r->cache); + if (n) n = rand_int( n - 1) + 1; + memcpy(r->cache, r->data + r->pos, n); + r->pos += n; + *o_cache = r->cache; + *o_numbytes = n; + return 0; +} + +static void s_read_buffer_close(void* handle) +{ + mem_t* r = handle; + extract_free(r->alloc, &r->data); +} + +static void s_create_read_buffer(extract_alloc_t* alloc, int bytes, mem_t* r, extract_buffer_t** o_buffer) +/* Creates extract_buffer_t that reads from randomised data using randomised +short reads and cache with randomised sizes. */ +{ + int i; + int e; + if (extract_malloc(alloc, &r->data, bytes)) abort(); + for (i=0; i<bytes; ++i) { + r->data[i] = (char) rand(); + } + r->alloc = alloc; + r->bytes = bytes; + r->pos = 0; + r->num_calls_cache = 0; + r->num_calls_read = 0; + r->num_calls_write = 0; + e = extract_buffer_open(alloc, r, s_read, NULL /*write*/, s_read_cache, s_read_buffer_close, o_buffer); + assert(!e); +} + +static void test_read(void) +{ + /* Create read buffer with randomised content. */ + int len = 12345; + mem_t r; + char* out_buffer; + int out_pos; + int its; + int e; + extract_buffer_t* buffer; + s_create_read_buffer(NULL /*alloc*/, len, &r, &buffer); + + /* Repeatedly read from read-buffer until we get EOF, and check we read the + original content. */ + if (extract_malloc(r.alloc, &out_buffer, len)) abort(); + out_pos = 0; + for (its=0;; ++its) { + size_t actual; + int n = rand_int(120)+1; + int e = extract_buffer_read(buffer, out_buffer + out_pos, n, &actual); + out_pos += (int) actual; + assert(out_pos == (int) extract_buffer_pos(buffer)); + if (e == 1) break; + assert(!e); + assert(!memcmp(out_buffer, r.data, out_pos)); + } + assert(out_pos == len); + assert(!memcmp(out_buffer, r.data, len)); + outf("its=%i num_calls_read=%i num_calls_write=%i num_calls_cache=%i", + its, r.num_calls_read, r.num_calls_write, r.num_calls_cache); + extract_free(r.alloc, &out_buffer); + out_buffer = NULL; + e = extract_buffer_close(&buffer); + assert(!e); + + outf("Read test passed.\n"); +} + + +static int s_write(void* handle, const void* source, size_t bytes, size_t* o_actual) +/* Does a randomised short write. */ +{ + mem_t* r = handle; + int n = 61; + r->num_calls_write += 1; + if (n > (int) bytes) n = (int) bytes; + if (n > (int) (r->bytes - r->pos)) n = (int) (r->bytes - r->pos); + assert(n); + n = rand_int((int) n-1) + 1; + memcpy(r->data + r->pos, source, n); + r->data[r->bytes] = 0; + r->pos += n; + *o_actual = n; + return 0; +} + +static int s_write_cache(void* handle, void** o_cache, size_t* o_numbytes) +/* Returns a cache with randomised size. */ +{ + mem_t* r = handle; + int n; + r->num_calls_cache += 1; + assert(r->bytes >= r->pos); + *o_cache = r->cache; + n = (int) (r->bytes - r->pos); + if (n > (int) sizeof(r->cache)) n = sizeof(r->cache); + if (n) n = rand_int( n - 1) + 1; + *o_cache = r->cache; + *o_numbytes = n; + /* We will return a zero-length cache at EOF. */ + return 0; +} + +static void s_write_buffer_close(void* handle) +{ + mem_t* mem = handle; + outf("*** freeing mem->data=%p", mem->data); + extract_free(mem->alloc, &mem->data); +} + +static void s_create_write_buffer(extract_alloc_t* alloc, size_t bytes, mem_t* r, extract_buffer_t** o_buffer) +/* Creates extract_buffer_t that reads from randomised data using randomised +short reads and cache with randomised sizes. */ +{ + int e; + if (extract_malloc(alloc, &r->data, bytes+1)) abort(); + extract_bzero(r->data, bytes); + r->alloc = alloc; + r->bytes = bytes; + r->pos = 0; + r->num_calls_cache = 0; + r->num_calls_read = 0; + r->num_calls_write = 0; + e = extract_buffer_open(r->alloc, r, NULL /*read*/, s_write, s_write_cache, s_write_buffer_close, o_buffer); + assert(!e); +} + + +static void test_write(void) +{ + /* Create write buffer. */ + size_t len = 12345; + mem_t r; + extract_buffer_t* buffer; + char* out_buffer; + unsigned i; + size_t out_pos = 0; + int its; + int e; + + s_create_write_buffer(NULL /*alloc*/, len, &r, &buffer); + + /* Write to read-buffer, and check it contains the original content. */ + if (extract_malloc(r.alloc, &out_buffer, len)) abort(); + for (i=0; i<len; ++i) { + out_buffer[i] = (char) ('a' + rand_int(26)); + } + for (its=0;; ++its) { + size_t actual; + size_t n = rand_int(12)+1; + int e = extract_buffer_write(buffer, out_buffer+out_pos, n, &actual); + out_pos += actual; + assert(out_pos == extract_buffer_pos(buffer)); + if (e == 1) break; + assert(!e); + } + assert(out_pos == len); + assert(!memcmp(out_buffer, r.data, len)); + extract_free(r.alloc, &out_buffer); + outf("its=%i num_calls_read=%i num_calls_write=%i num_calls_cache=%i", + its, r.num_calls_read, r.num_calls_write, r.num_calls_cache); + e = extract_buffer_close(&buffer); + assert(!e); + outf("Write test passed.\n"); +} + +static void test_file(void) +{ + /* Check we can write 3 bytes to file. */ + extract_buffer_t* file_buffer; + if (extract_buffer_open_file(NULL /*alloc*/, "test/generated/buffer-file", 1 /*writable*/, &file_buffer)) abort(); + + { + size_t n; + int e; + errno = 0; + e = extract_buffer_write(file_buffer, "foo", 3, &n); + if (e == 0 && n == 3) {} + else { + outf("extract_buffer_write() returned e=%i errno=%i n=%zi", e, errno, n); + abort(); + } + } + if (extract_buffer_close(&file_buffer)) abort(); + + /* Check we get back expected short reads and EOF when reading from 3-byte + file created above. */ + if (extract_buffer_open_file(NULL /*alloc*/, "test/generated/buffer-file", 0 /*writable*/, &file_buffer)) abort(); + + { + size_t n; + char buffer[10]; + int e; + errno = 0; + e = extract_buffer_read(file_buffer, buffer, 2, &n); + if (e == 0 && n == 2) {} + else { + outf("extract_buffer_read() returned e=%i errno=%i n=%zi", e, errno, n); + abort(); + } + e = extract_buffer_read(file_buffer, buffer, 3, &n); + if (e == 1 && n == 1) {} + else { + outf("extract_buffer_read() returned e=%i errno=%i n=%zi", e, errno, n); + abort(); + } + e = extract_buffer_read(file_buffer, buffer, 3, &n); + if (e == 1 && n == 0) {} + else { + outf("extract_buffer_read() returned e=%i errno=%i n=%zi", e, errno, n); + abort(); + } + } + if (extract_buffer_close(&file_buffer)) abort(); + + /* Check writing to read-only file buffer fails. */ + { + int e; + char text[] = "hello world"; + size_t actual; + if (extract_buffer_open_file(NULL /*alloc*/, "test/generated/buffer-file", 0 /*writable*/, &file_buffer)) { + abort(); + } + + e = extract_buffer_write(file_buffer, text, sizeof(text)-1, &actual); + outf("extract_buffer_write() on read buffer returned e=%i actual=%zi", e, actual); + if (e != -1 || errno != EINVAL) abort(); + if (extract_buffer_close(&file_buffer)) abort(); + } + + outf("file buffer tests passed.\n"); +} + +int main(void) +{ + outf_verbose_set(1); + test_read(); + test_write(); + test_file(); + return 0; +} diff --git a/extract/src/buffer.c b/extract/src/buffer.c new file mode 100644 index 00000000..3fd35bfd --- /dev/null +++ b/extract/src/buffer.c @@ -0,0 +1,477 @@ +#include "../include/extract_buffer.h" +#include "../include/extract_alloc.h" + +#include "memento.h" +#include "outf.h" + +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +struct extract_buffer_t +{ + /* First member must be extract_buffer_cache_t - required by inline + implementations of extract_buffer_read() and extract_buffer_write(). */ + extract_buffer_cache_t cache; + extract_alloc_t* alloc; + void* handle; + extract_buffer_fn_read fn_read; + extract_buffer_fn_write fn_write; + extract_buffer_fn_cache fn_cache; + extract_buffer_fn_close fn_close; + size_t pos; /* Does not include bytes currently read/written to cache. */ +}; + + +extract_alloc_t* extract_buffer_alloc(extract_buffer_t* buffer) +{ + return buffer->alloc; +} + + +int extract_buffer_open( + extract_alloc_t* alloc, + void* handle, + extract_buffer_fn_read fn_read, + extract_buffer_fn_write fn_write, + extract_buffer_fn_cache fn_cache, + extract_buffer_fn_close fn_close, + extract_buffer_t** o_buffer + ) +{ + int e = -1; + extract_buffer_t* buffer; + if (extract_malloc(alloc, &buffer, sizeof(*buffer))) goto end; + + buffer->alloc = alloc; + buffer->handle = handle; + buffer->fn_read = fn_read; + buffer->fn_write = fn_write; + buffer->fn_cache = fn_cache; + buffer->fn_close = fn_close; + buffer->cache.cache = NULL; + buffer->cache.numbytes = 0; + buffer->cache.pos = 0; + buffer->pos = 0; + e = 0; + + end: + if (e) { + extract_free(alloc, &buffer); + } + else { + *o_buffer = buffer; + } + return e; +} + + +size_t extract_buffer_pos(extract_buffer_t* buffer) +{ + size_t ret = buffer->pos; + if (buffer->cache.cache) { + ret += buffer->cache.pos; + } + return ret; +} + + +static int s_cache_flush(extract_buffer_t* buffer, size_t* o_actual) +/* Sends contents of cache to fn_write() using a loop to cope with short +writes. Returns with *o_actual containing the number of bytes successfully +sent, and buffer->cache.{cache,numbytes,pos} all set to zero. + +If we return zero but *actual is less than original buffer->cache.numbytes, +then fn_write returned EOF. */ +{ + int e = -1; + size_t p = 0; + assert(buffer->cache.pos <= buffer->cache.numbytes); + for(;;) { + size_t actual; + if (p == buffer->cache.pos) break; + if (buffer->fn_write( + buffer->handle, + (char*) buffer->cache.cache + p, + buffer->cache.pos - p, + &actual + )) goto end; + buffer->pos += actual; + p += actual; + if (actual == 0) { + /* EOF while flushing cache. We set <pos> to the number of bytes + in data..+numbytes that we know have been successfully handled by + buffer->fn_write(). This can be negative if we failed to flush + earlier data. */ + outf("*** buffer->fn_write() EOF\n"); + e = 0; + goto end; + } + } + outfx("cache flush, buffer->pos=%i p=%i buffer->cache.pos=%i\n", + buffer->pos, p, buffer->cache.pos); + assert(p == buffer->cache.pos); + buffer->cache.cache = NULL; + buffer->cache.numbytes = 0; + buffer->cache.pos = 0; + e = 0; + end: + + *o_actual = p; + return e; +} + +int extract_buffer_close(extract_buffer_t** p_buffer) +{ + extract_buffer_t* buffer = *p_buffer; + int e = -1; + + if (!buffer) { + return 0; + } + + if (buffer->cache.cache && buffer->fn_write) { + /* Flush cache. */ + size_t cache_bytes = buffer->cache.pos; + size_t actual; + if (s_cache_flush(buffer, &actual)) goto end; + if (actual != cache_bytes) { + e = +1; + goto end; + } + } + if (buffer->fn_close) buffer->fn_close(buffer->handle); + e = 0; + end: + extract_free(buffer->alloc, &buffer); + *p_buffer = NULL; + return e; +} + +static int s_simple_cache(void* handle, void** o_cache, size_t* o_numbytes) +{ + /* Indicate EOF. */ + (void) handle; + *o_cache = NULL; + *o_numbytes = 0; + return 0; +} + +int extract_buffer_open_simple( + extract_alloc_t* alloc, + const void* data, + size_t numbytes, + void* handle, + extract_buffer_fn_close fn_close, + extract_buffer_t** o_buffer + ) +{ + extract_buffer_t* buffer; + if (extract_malloc(alloc, &buffer, sizeof(*buffer))) return -1; + + /* We need cast away the const here. data[] will be written-to if caller + uses us as a write buffer. */ + buffer->alloc = alloc; + buffer->cache.cache = (void*) data; + buffer->cache.numbytes = numbytes; + buffer->cache.pos = 0; + buffer->handle = handle; + buffer->fn_read = NULL; + buffer->fn_write = NULL; + buffer->fn_cache = s_simple_cache; + buffer->fn_close = fn_close; + *o_buffer = buffer; + return 0; +} + + +/* Implementation of extract_buffer_file*. */ + +static int s_file_read(void* handle, void* data, size_t numbytes, size_t* o_actual) +{ + FILE* file = handle; + size_t n = fread(data, 1, numbytes, file); + outfx("file=%p numbytes=%i => n=%zi", file, numbytes, n); + assert(o_actual); /* We are called by other extract_buffer fns, not by user code. */ + *o_actual = n; + if (!n && ferror(file)) { + errno = EIO; + return -1; + } + return 0; +} + +static int s_file_write(void* handle, const void* data, size_t numbytes, size_t* o_actual) +{ + FILE* file = handle; + size_t n = fwrite(data, 1 /*size*/, numbytes /*nmemb*/, file); + outfx("file=%p numbytes=%i => n=%zi", file, numbytes, n); + assert(o_actual); /* We are called by other extract_buffer fns, not by user code. */ + *o_actual = n; + if (!n && ferror(file)) { + errno = EIO; + return -1; + } + return 0; +} + +static void s_file_close(void* handle) +{ + FILE* file = handle; + if (!file) return; + fclose(file); +} + +int extract_buffer_open_file(extract_alloc_t* alloc, const char* path, int writable, extract_buffer_t** o_buffer) +{ + int e = -1; + FILE* file = fopen(path, (writable) ? "wb" : "rb"); + if (!file) { + outf("failed to open '%s': %s", path, strerror(errno)); + goto end; + } + + if (extract_buffer_open( + alloc, + file /*handle*/, + writable ? NULL : s_file_read, + writable ? s_file_write : NULL, + NULL /*fn_cache*/, + s_file_close, + o_buffer + )) goto end; + e = 0; + + end: + if (e) { + if (file) fclose(file); + *o_buffer = NULL; + } + return e; +} + + +/* Support for read/write. */ + +int extract_buffer_read_internal( + extract_buffer_t* buffer, + void* destination, + size_t numbytes, + size_t* o_actual + ) +/* Called by extract_buffer_read() if not enough space in buffer->cache. */ +{ + int e = -1; + size_t pos = 0; /* Number of bytes read so far. */ + + /* In each iteration we either read from cache, or use buffer->fn_read() + directly or repopulate the cache. */ + for(;;) { + size_t n; + if (pos == numbytes) break; + n = buffer->cache.numbytes - buffer->cache.pos; + if (n) { + /* There is data in cache. */ + if (n > numbytes - pos) n = numbytes - pos; + memcpy((char*) destination + pos, (char*) buffer->cache.cache + buffer->cache.pos, n); + pos += n; + buffer->cache.pos += n; + } + else { + /* No data in cache. */ + int use_read = 0; + if (buffer->fn_read) { + if (!buffer->fn_cache) { + use_read = 1; + } + else if (buffer->cache.numbytes && numbytes - pos > buffer->cache.numbytes / 2) { + /* This read is large compared to previously-returned + cache size, so let's ignore buffer->fn_cache and use + buffer->fn_read() directly instead. */ + use_read = 1; + } + } + if (use_read) { + /* Use buffer->fn_read() directly, carrying on looping in case + of short read. */ + size_t actual; + outfx("using buffer->fn_read() directly for numbytes-pos=%i\n", numbytes-pos); + if (buffer->fn_read(buffer->handle, (char*) destination + pos, numbytes - pos, &actual)) goto end; + if (actual == 0) break; /* EOF. */ + pos += actual; + buffer->pos += actual; + } + else { + /* Repopulate cache. */ + outfx("using buffer->fn_cache() for buffer->cache.numbytes=%i\n", buffer->cache.numbytes); + if (buffer->fn_cache(buffer->handle, &buffer->cache.cache, &buffer->cache.numbytes)) goto end; + buffer->pos += buffer->cache.pos; + buffer->cache.pos = 0; + if (buffer->cache.numbytes == 0) break; /* EOF. */ + } + } + } + e = 0; + + end: + if (o_actual) *o_actual = pos; + if (e == 0 && pos != numbytes) return +1; /* EOF. */ + return e; +} + + +int extract_buffer_write_internal( + extract_buffer_t* buffer, + const void* source, + size_t numbytes, + size_t* o_actual + ) +{ + int e = -1; + size_t pos = 0; /* Number of bytes written so far. */ + + if (!buffer->fn_write) { + errno = EINVAL; + return -1; + } + + /* In each iteration we either write to cache, or use buffer->fn_write() + directly or flush the cache. */ + for(;;) { + size_t n; + outfx("numbytes=%i pos=%i. buffer->cache.numbytes=%i buffer->cache.pos=%i\n", + numbytes, pos, buffer->cache.numbytes, buffer->cache.pos); + if (pos == numbytes) break; + n = buffer->cache.numbytes - buffer->cache.pos; + if (n) { + /* There is space in cache for writing. */ + if (n > numbytes - pos) n = numbytes - pos; + outfx("writing to cache: numbytes=%i n=%i\n", numbytes, n); + memcpy((char*) buffer->cache.cache + buffer->cache.pos, (char*) source + pos, n); + pos += n; + buffer->cache.pos += n; + } + else { + /* No space left in cache. */ + int use_write = 0; + outfx("cache empty. pos=%i. buffer->cache.numbytes=%i buffer->cache.pos=%i\n", + pos, buffer->cache.numbytes, buffer->cache.pos); + { + /* Flush the cache. */ + size_t actual; + int ee; + size_t b = buffer->cache.numbytes; + ptrdiff_t delta; + ee = s_cache_flush(buffer, &actual); + assert(actual <= b); + delta = actual - b; + pos += delta; + buffer->pos += delta; + if (delta) { + /* We have only partially flushed the cache. This is + not recoverable. <pos> will be the number of bytes in + source..+numbytes that have been successfully flushed, and + could be negative if we failed to flush earlier data. */ + outf("failed to flush. actual=%i delta=%i\n", actual, delta); + e = 0; + goto end; + } + if (ee) goto end; + } + + if (!buffer->fn_cache) { + use_write = 1; + } + else if (buffer->cache.numbytes && numbytes - pos > buffer->cache.numbytes / 2) { + /* This write is large compared to previously-returned cache + size, so let's ignore the cache and call buffer->fn_write() + directly instead. */ + use_write = 1; + } + if (use_write) { + /* Use buffer->fn_write() directly, carrying on looping in case + of short write. */ + size_t actual; + if (buffer->fn_write(buffer->handle, (char*) source + pos, numbytes - pos, &actual)) goto end; + if (actual == 0) break; /* EOF. */ + outfx("direct write numbytes-pos=%i actual=%i buffer->pos=%i => %i\n", + numbytes-pos, actual, buffer->pos, buffer->pos + actual); + pos += actual; + buffer->pos += actual; + } + else { + /* Repopulate cache. */ + outfx("repopulating cache buffer->pos=%i", buffer->pos); + if (buffer->fn_cache(buffer->handle, &buffer->cache.cache, &buffer->cache.numbytes)) goto end; + buffer->cache.pos = 0; + if (buffer->cache.numbytes == 0) break; /* EOF. */ + } + } + } + e = 0; + + end: + if (o_actual) *o_actual = pos; + if (e == 0 && pos != numbytes) e = +1; /* EOF. */ + return e; +} + + +static int expanding_memory_buffer_write(void* handle, const void* source, size_t numbytes, size_t* o_actual) +{ + /* We realloc our memory region as required. For efficiency, we also use + any currently-unused region of our memory buffer as an extract_buffer + cache. So we can be called either to 'flush the cache' (in which case we + don't actually copy any data) or to accept data from somewhere else (in + which case we need to increase the size of our memory region. */ + extract_buffer_expanding_t* ebe = handle; + if ((char*) source >= ebe->data && (char*) source < ebe->data + ebe->alloc_size) { + /* Source is inside our memory region so we are being called by + extract_buffer_write_internal() to re-populate the cache. We don't + actually have to copy anything. */ + assert((size_t) ((char*) source - ebe->data) == ebe->data_size); + assert((size_t) ((char*) source - ebe->data + numbytes) <= ebe->alloc_size); + ebe->data_size += numbytes; + } + else { + /* Data is external, so copy into our buffer. We will have already been + called to flush the cache. */ + if (extract_realloc2(ebe->buffer->alloc, &ebe->data, ebe->alloc_size, ebe->data_size + numbytes)) return -1; + ebe->alloc_size = ebe->data_size + numbytes; + memcpy(ebe->data + ebe->data_size, source, numbytes); + ebe->data_size += numbytes; + } + *o_actual = numbytes; + return 0; +} + +static int expanding_memory_buffer_cache(void* handle, void** o_cache, size_t* o_numbytes) +{ + extract_buffer_expanding_t* ebe = handle; + size_t delta = 4096; + if (extract_realloc2(ebe->buffer->alloc, &ebe->data, ebe->alloc_size, ebe->data_size + delta)) return -1; + ebe->alloc_size = ebe->data_size + delta; + *o_cache = ebe->data + ebe->data_size; + *o_numbytes = delta; + return 0; +} + +int extract_buffer_expanding_create(extract_alloc_t* alloc, extract_buffer_expanding_t* ebe) +{ + ebe->data = NULL; + ebe->data_size = 0; + ebe->alloc_size = 0; + if (extract_buffer_open( + alloc, + ebe, + NULL /*fn_read*/, + expanding_memory_buffer_write, + expanding_memory_buffer_cache, + NULL /*fn_close*/, + &ebe->buffer + )) return -1; + return 0; +} diff --git a/extract/src/compat_stdint.h b/extract/src/compat_stdint.h new file mode 100644 index 00000000..174c72ae --- /dev/null +++ b/extract/src/compat_stdint.h @@ -0,0 +1,25 @@ +#ifndef ARTIFEX_EXTRACT_COMPAT_STDINT_H +#define ARTIFEX_EXTRACT_COMPAT_STDINT_H + +/* Fake what we need from stdint.h on MSVS. */ + +#if defined(_MSC_VER) && (_MSC_VER < 1700) /* MSVC older than VS2012 */ + typedef signed char int8_t; + typedef short int int16_t; + typedef int int32_t; + typedef __int64 int64_t; + typedef unsigned char uint8_t; + typedef unsigned short int uint16_t; + typedef unsigned int uint32_t; + typedef unsigned __int64 uint64_t; + #ifndef INT64_MAX + #define INT64_MAX 9223372036854775807i64 + #endif + #ifndef SIZE_MAX + #define SIZE_MAX ((size_t) -1) + #endif +#else + #include <stdint.h> +#endif + +#endif diff --git a/extract/src/compat_strtoll.h b/extract/src/compat_strtoll.h new file mode 100644 index 00000000..76ed3530 --- /dev/null +++ b/extract/src/compat_strtoll.h @@ -0,0 +1,9 @@ +#ifndef ARTIFEX_EXTRACT_COMPAT_STRTOLL_H +#define ARTIFEX_EXTRACT_COMPAT_STRTOLL_H + +#if defined(_MSC_VER) && (_MSC_VER < 1800) /* MSVC older than VS2013 */ + #define strtoll( text, end, base) (long long) _strtoi64(text, end, base) + #define strtoull( text, end, base) (unsigned long long) _strtoi64(text, end, base) +#endif + +#endif diff --git a/extract/src/compat_va_copy.h b/extract/src/compat_va_copy.h new file mode 100644 index 00000000..9b9ae8dc --- /dev/null +++ b/extract/src/compat_va_copy.h @@ -0,0 +1,8 @@ +#ifndef ARTIFEX_EXTRACT_COMPAT_VA_COPY_H +#define ARTIFEX_EXTRACT_COMPAT_VA_COPY_H + +#if defined(_MSC_VER) && (_MSC_VER < 1800) /* MSVC older than VS2013 */ + #define va_copy(dst, src) ((dst) = (src)) +#endif + +#endif diff --git a/extract/src/document.h b/extract/src/document.h new file mode 100644 index 00000000..7a1470e4 --- /dev/null +++ b/extract/src/document.h @@ -0,0 +1,150 @@ +#ifndef ARTIFEX_EXTRACT_DOCUMENT_H +#define ARTIFEX_EXTRACT_DOCUMENT_H + +static const double pi = 3.141592653589793; + +typedef struct +{ + double x; + double y; +} point_t; + +typedef struct +{ + double a; + double b; + double c; + double d; + double e; + double f; +} matrix_t; + +double matrix_expansion(matrix_t m); + +int matrix_cmp4(const matrix_t* lhs, const matrix_t* rhs) +; +/* Returns zero if first four members of *lhs and *rhs are equal, otherwise ++/-1. */ + +typedef struct +{ + /* (x,y) before transformation by ctm and trm. */ + double pre_x; + double pre_y; + + /* (x,y) after transformation by ctm and trm. */ + double x; + double y; + + unsigned ucs; + double adv; +} char_t; +/* A single char in a span. +*/ + +typedef struct +{ + matrix_t ctm; + matrix_t trm; + char* font_name; + + /* font size is matrix_expansion(trm). */ + + struct { + unsigned font_bold : 1; + unsigned font_italic : 1; + unsigned wmode : 1; + }; + + char_t* chars; + int chars_num; +} span_t; +/* List of chars that have same font and are usually adjacent. */ + +char_t* span_char_last(span_t* span); +/* Returns last character in span. */ + +int span_append_c(extract_alloc_t* alloc, span_t* span, int c); +/* Appends new char_t to an span_t with .ucs=c and all other +fields zeroed. */ + +const char* span_string(extract_alloc_t* alloc, span_t* span); +/* Returns static string containing info about span_t. */ + +typedef struct +{ + span_t** spans; + int spans_num; +} line_t; +/* List of spans that are aligned on same line. */ + +span_t* line_span_first(line_t* line); +/* Returns first span in a line. */ + +span_t* line_span_last(line_t* line); +/* Returns last span in a line. */ + +typedef struct +{ + line_t** lines; + int lines_num; +} paragraph_t; +/* List of lines that are aligned and adjacent to each other so as to form a +paragraph. */ + +typedef struct +{ + char* type; /* jpg, png etc. */ + char* name; /* Name of image file within docx. */ + char* id; /* ID of image within docx. */ + char* data; + size_t data_size; + + extract_image_data_free data_free; + void* data_free_handle; + +} image_t; +/* Information about an image. <type> is as passed to extract_add_image(); +<name> and <id> are created to be unique identifiers for use in generated docx +file. */ + +typedef struct +{ + span_t** spans; + int spans_num; + + image_t* images; + int images_num; + + line_t** lines; + int lines_num; + /* These refer to items in .spans. Initially empty, then set by + extract_join(). */ + + paragraph_t** paragraphs; + int paragraphs_num; + /* These refer to items in .lines. Initially empty, then set + by extract_join(). */ + +} page_t; +/* A page. Contains different representations of the list of spans. */ + +typedef struct +{ + page_t** pages; + int pages_num; +} document_t; +/* A list of pages. */ + + +typedef struct +{ + image_t* images; + int images_num; + char** imagetypes; + int imagetypes_num; +} images_t; + +int extract_document_join(extract_alloc_t* alloc, document_t* document); + +#endif diff --git a/extract/src/docx.c b/extract/src/docx.c new file mode 100644 index 00000000..238e81d4 --- /dev/null +++ b/extract/src/docx.c @@ -0,0 +1,1097 @@ +/* These extract_docx_*() functions generate docx content and docx zip archive +data. + +Caller must call things in a sensible order to create valid content - +e.g. don't call docx_paragraph_start() twice without intervening call to +docx_paragraph_finish(). */ + +#include "../include/extract.h" + +#include "docx_template.h" + +#include "astring.h" +#include "document.h" +#include "docx.h" +#include "mem.h" +#include "memento.h" +#include "outf.h" +#include "zip.h" + +#include <assert.h> +#include <errno.h> +#include <math.h> +#include <stdarg.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include <sys/stat.h> + + +static int extract_docx_paragraph_start(extract_alloc_t* alloc, extract_astring_t* content) +{ + return extract_astring_cat(alloc, content, "\n\n<w:p>"); +} + +static int extract_docx_paragraph_finish(extract_alloc_t* alloc, extract_astring_t* content) +{ + return extract_astring_cat(alloc, content, "\n</w:p>"); +} + +static int extract_docx_run_start( + extract_alloc_t* alloc, + extract_astring_t* content, + const char* font_name, + double font_size, + int bold, + int italic + ) +/* Starts a new run. Caller must ensure that extract_docx_run_finish() was +called to terminate any previous run. */ +{ + int e = 0; + if (!e) e = extract_astring_cat(alloc, content, "\n<w:r><w:rPr><w:rFonts w:ascii=\""); + if (!e) e = extract_astring_cat(alloc, content, font_name); + if (!e) e = extract_astring_cat(alloc, content, "\" w:hAnsi=\""); + if (!e) e = extract_astring_cat(alloc, content, font_name); + if (!e) e = extract_astring_cat(alloc, content, "\"/>"); + if (!e && bold) e = extract_astring_cat(alloc, content, "<w:b/>"); + if (!e && italic) e = extract_astring_cat(alloc, content, "<w:i/>"); + { + char font_size_text[32]; + if (0) font_size = 10; + + if (!e) e = extract_astring_cat(alloc, content, "<w:sz w:val=\""); + snprintf(font_size_text, sizeof(font_size_text), "%f", font_size * 2); + extract_astring_cat(alloc, content, font_size_text); + extract_astring_cat(alloc, content, "\"/>"); + + if (!e) e = extract_astring_cat(alloc, content, "<w:szCs w:val=\""); + snprintf(font_size_text, sizeof(font_size_text), "%f", font_size * 1.5); + extract_astring_cat(alloc, content, font_size_text); + extract_astring_cat(alloc, content, "\"/>"); + } + if (!e) e = extract_astring_cat(alloc, content, "</w:rPr><w:t xml:space=\"preserve\">"); + return e; + +} + +static int extract_docx_run_finish(extract_alloc_t* alloc, extract_astring_t* content) +{ + return extract_astring_cat(alloc, content, "</w:t></w:r>"); +} + +static int extract_docx_char_append_string(extract_alloc_t* alloc, extract_astring_t* content, const char* text) +{ + return extract_astring_cat(alloc, content, text); +} + +static int extract_docx_char_append_stringf(extract_alloc_t* alloc, extract_astring_t* content, const char* format, ...) +{ + char* buffer = NULL; + int e; + va_list va; + va_start(va, format); + e = extract_vasprintf(alloc, &buffer, format, va); + va_end(va); + if (e < 0) return e; + e = extract_astring_cat(alloc, content, buffer); + extract_free(alloc, &buffer); + return e; +} + +static int extract_docx_char_append_char(extract_alloc_t* alloc, extract_astring_t* content, char c) +{ + return extract_astring_catc(alloc, content, c); +} + +static int extract_docx_paragraph_empty(extract_alloc_t* alloc, extract_astring_t* content) +/* Append an empty paragraph to *content. */ +{ + int e = -1; + if (extract_docx_paragraph_start(alloc, content)) goto end; + /* It seems like our choice of font size here doesn't make any difference + to the ammount of vertical space, unless we include a non-space + character. Presumably something to do with the styles in the template + document. */ + if (extract_docx_run_start( + alloc, + content, + "OpenSans", + 10 /*font_size*/, + 0 /*font_bold*/, + 0 /*font_italic*/ + )) goto end; + //docx_char_append_string(content, " "); /*   is non-break space. */ + if (extract_docx_run_finish(alloc, content)) goto end; + if (extract_docx_paragraph_finish(alloc, content)) goto end; + e = 0; + end: + return e; +} + + +/* Removes last <len> chars. */ +static int docx_char_truncate(extract_astring_t* content, int len) +{ + assert((size_t) len <= content->chars_num); + content->chars_num -= len; + content->chars[content->chars_num] = 0; + return 0; +} + +static int extract_docx_char_truncate_if(extract_astring_t* content, char c) +/* Removes last char if it is <c>. */ +{ + if (content->chars_num && content->chars[content->chars_num-1] == c) { + docx_char_truncate(content, 1); + } + return 0; +} + + +static double matrices_to_font_size(matrix_t* ctm, matrix_t* trm) +{ + double font_size = matrix_expansion(*trm) + * matrix_expansion(*ctm); + /* Round font_size to nearest 0.01. */ + font_size = (double) (int) (font_size * 100.0f + 0.5f) / 100.0f; + return font_size; +} + +typedef struct +{ + const char* font_name; + double font_size; + int font_bold; + int font_italic; + matrix_t* ctm_prev; +} content_state_t; +/* Used to keep track of font information when writing paragraphs of docx +content, e.g. so we know whether a font has changed so need to start a new docx +span. */ + + +static int extract_document_to_docx_content_paragraph( + extract_alloc_t* alloc, + content_state_t* state, + paragraph_t* paragraph, + extract_astring_t* content + ) +/* Append docx xml for <paragraph> to <content>. Updates *state if we change +font. */ +{ + int e = -1; + int l; + if (extract_docx_paragraph_start(alloc, content)) goto end; + + for (l=0; l<paragraph->lines_num; ++l) { + line_t* line = paragraph->lines[l]; + int s; + for (s=0; s<line->spans_num; ++s) { + int si; + span_t* span = line->spans[s]; + double font_size_new; + state->ctm_prev = &span->ctm; + font_size_new = matrices_to_font_size(&span->ctm, &span->trm); + if (!state->font_name + || strcmp(span->font_name, state->font_name) + || span->font_bold != state->font_bold + || span->font_italic != state->font_italic + || font_size_new != state->font_size + ) { + if (state->font_name) { + if (extract_docx_run_finish(alloc, content)) goto end; + } + state->font_name = span->font_name; + state->font_bold = span->font_bold; + state->font_italic = span->font_italic; + state->font_size = font_size_new; + if (extract_docx_run_start( + alloc, + content, + state->font_name, + state->font_size, + state->font_bold, + state->font_italic + )) goto end; + } + + for (si=0; si<span->chars_num; ++si) { + char_t* char_ = &span->chars[si]; + int c = char_->ucs; + + if (0) {} + + /* Escape XML special characters. */ + else if (c == '<') extract_docx_char_append_string(alloc, content, "<"); + else if (c == '>') extract_docx_char_append_string(alloc, content, ">"); + else if (c == '&') extract_docx_char_append_string(alloc, content, "&"); + else if (c == '"') extract_docx_char_append_string(alloc, content, """); + else if (c == '\'') extract_docx_char_append_string(alloc, content, "'"); + + /* Expand ligatures. */ + else if (c == 0xFB00) { + if (extract_docx_char_append_string(alloc, content, "ff")) goto end; + } + else if (c == 0xFB01) { + if (extract_docx_char_append_string(alloc, content, "fi")) goto end; + } + else if (c == 0xFB02) { + if (extract_docx_char_append_string(alloc, content, "fl")) goto end; + } + else if (c == 0xFB03) { + if (extract_docx_char_append_string(alloc, content, "ffi")) goto end; + } + else if (c == 0xFB04) { + if (extract_docx_char_append_string(alloc, content, "ffl")) goto end; + } + + /* Output ASCII verbatim. */ + else if (c >= 32 && c <= 127) { + if (extract_docx_char_append_char(alloc, content, (char) c)) goto end; + } + + /* Escape all other characters. */ + else { + char buffer[32]; + snprintf(buffer, sizeof(buffer), "&#x%x;", c); + if (extract_docx_char_append_string(alloc, content, buffer)) goto end; + } + } + /* Remove any trailing '-' at end of line. */ + if (extract_docx_char_truncate_if(content, '-')) goto end; + } + } + if (state->font_name) { + if (extract_docx_run_finish(alloc, content)) goto end; + state->font_name = NULL; + } + if (extract_docx_paragraph_finish(alloc, content)) goto end; + + e = 0; + + end: + return e; +} + +static int extract_document_append_image( + extract_alloc_t* alloc, + extract_astring_t* content, + image_t* image + ) +/* Write reference to image into docx content. */ +{ + extract_docx_char_append_string(alloc, content, "\n"); + extract_docx_char_append_string(alloc, content, " <w:p>\n"); + extract_docx_char_append_string(alloc, content, " <w:r>\n"); + extract_docx_char_append_string(alloc, content, " <w:rPr>\n"); + extract_docx_char_append_string(alloc, content, " <w:noProof/>\n"); + extract_docx_char_append_string(alloc, content, " </w:rPr>\n"); + extract_docx_char_append_string(alloc, content, " <w:drawing>\n"); + extract_docx_char_append_string(alloc, content, " <wp:inline distT=\"0\" distB=\"0\" distL=\"0\" distR=\"0\" wp14:anchorId=\"7057A832\" wp14:editId=\"466EB3FB\">\n"); + extract_docx_char_append_string(alloc, content, " <wp:extent cx=\"2933700\" cy=\"2200275\"/>\n"); + extract_docx_char_append_string(alloc, content, " <wp:effectExtent l=\"0\" t=\"0\" r=\"0\" b=\"9525\"/>\n"); + extract_docx_char_append_string(alloc, content, " <wp:docPr id=\"1\" name=\"Picture 1\"/>\n"); + extract_docx_char_append_string(alloc, content, " <wp:cNvGraphicFramePr>\n"); + extract_docx_char_append_string(alloc, content, " <a:graphicFrameLocks xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\" noChangeAspect=\"1\"/>\n"); + extract_docx_char_append_string(alloc, content, " </wp:cNvGraphicFramePr>\n"); + extract_docx_char_append_string(alloc, content, " <a:graphic xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\">\n"); + extract_docx_char_append_string(alloc, content, " <a:graphicData uri=\"http://schemas.openxmlformats.org/drawingml/2006/picture\">\n"); + extract_docx_char_append_string(alloc, content, " <pic:pic xmlns:pic=\"http://schemas.openxmlformats.org/drawingml/2006/picture\">\n"); + extract_docx_char_append_string(alloc, content, " <pic:nvPicPr>\n"); + extract_docx_char_append_string(alloc, content, " <pic:cNvPr id=\"1\" name=\"Picture 1\"/>\n"); + extract_docx_char_append_string(alloc, content, " <pic:cNvPicPr>\n"); + extract_docx_char_append_string(alloc, content, " <a:picLocks noChangeAspect=\"1\" noChangeArrowheads=\"1\"/>\n"); + extract_docx_char_append_string(alloc, content, " </pic:cNvPicPr>\n"); + extract_docx_char_append_string(alloc, content, " </pic:nvPicPr>\n"); + extract_docx_char_append_string(alloc, content, " <pic:blipFill>\n"); + extract_docx_char_append_stringf(alloc, content," <a:blip r:embed=\"%s\">\n", image->id); + extract_docx_char_append_string(alloc, content, " <a:extLst>\n"); + extract_docx_char_append_string(alloc, content, " <a:ext uri=\"{28A0092B-C50C-407E-A947-70E740481C1C}\">\n"); + extract_docx_char_append_string(alloc, content, " <a14:useLocalDpi xmlns:a14=\"http://schemas.microsoft.com/office/drawing/2010/main\" val=\"0\"/>\n"); + extract_docx_char_append_string(alloc, content, " </a:ext>\n"); + extract_docx_char_append_string(alloc, content, " </a:extLst>\n"); + extract_docx_char_append_string(alloc, content, " </a:blip>\n"); + //extract_docx_char_append_string(alloc, content, " <a:srcRect/>\n"); + extract_docx_char_append_string(alloc, content, " <a:stretch>\n"); + extract_docx_char_append_string(alloc, content, " <a:fillRect/>\n"); + extract_docx_char_append_string(alloc, content, " </a:stretch>\n"); + extract_docx_char_append_string(alloc, content, " </pic:blipFill>\n"); + extract_docx_char_append_string(alloc, content, " <pic:spPr bwMode=\"auto\">\n"); + extract_docx_char_append_string(alloc, content, " <a:xfrm>\n"); + extract_docx_char_append_string(alloc, content, " <a:off x=\"0\" y=\"0\"/>\n"); + extract_docx_char_append_string(alloc, content, " <a:ext cx=\"2933700\" cy=\"2200275\"/>\n"); + extract_docx_char_append_string(alloc, content, " </a:xfrm>\n"); + extract_docx_char_append_string(alloc, content, " <a:prstGeom prst=\"rect\">\n"); + extract_docx_char_append_string(alloc, content, " <a:avLst/>\n"); + extract_docx_char_append_string(alloc, content, " </a:prstGeom>\n"); + extract_docx_char_append_string(alloc, content, " <a:noFill/>\n"); + extract_docx_char_append_string(alloc, content, " <a:ln>\n"); + extract_docx_char_append_string(alloc, content, " <a:noFill/>\n"); + extract_docx_char_append_string(alloc, content, " </a:ln>\n"); + extract_docx_char_append_string(alloc, content, " </pic:spPr>\n"); + extract_docx_char_append_string(alloc, content, " </pic:pic>\n"); + extract_docx_char_append_string(alloc, content, " </a:graphicData>\n"); + extract_docx_char_append_string(alloc, content, " </a:graphic>\n"); + extract_docx_char_append_string(alloc, content, " </wp:inline>\n"); + extract_docx_char_append_string(alloc, content, " </w:drawing>\n"); + extract_docx_char_append_string(alloc, content, " </w:r>\n"); + extract_docx_char_append_string(alloc, content, " </w:p>\n"); + extract_docx_char_append_string(alloc, content, "\n"); + return 0; +} + + +static int extract_document_output_rotated_paragraphs( + extract_alloc_t* alloc, + page_t* page, + int paragraph_begin, + int paragraph_end, + int rot, + int x, + int y, + int w, + int h, + int text_box_id, + extract_astring_t* content, + content_state_t* state + ) +/* Writes paragraph to content inside rotated text box. */ +{ + int e = 0; + int p; + outf("x,y=%ik,%ik = %i,%i", x/1000, y/1000, x, y); + extract_docx_char_append_string(alloc, content, "\n"); + extract_docx_char_append_string(alloc, content, "\n"); + extract_docx_char_append_string(alloc, content, "<w:p>\n"); + extract_docx_char_append_string(alloc, content, " <w:r>\n"); + extract_docx_char_append_string(alloc, content, " <mc:AlternateContent>\n"); + extract_docx_char_append_string(alloc, content, " <mc:Choice Requires=\"wps\">\n"); + extract_docx_char_append_string(alloc, content, " <w:drawing>\n"); + extract_docx_char_append_string(alloc, content, " <wp:anchor distT=\"0\" distB=\"0\" distL=\"0\" distR=\"0\" simplePos=\"0\" relativeHeight=\"0\" behindDoc=\"0\" locked=\"0\" layoutInCell=\"1\" allowOverlap=\"1\" wp14:anchorId=\"53A210D1\" wp14:editId=\"2B7E8016\">\n"); + extract_docx_char_append_string(alloc, content, " <wp:simplePos x=\"0\" y=\"0\"/>\n"); + extract_docx_char_append_string(alloc, content, " <wp:positionH relativeFrom=\"page\">\n"); + extract_docx_char_append_stringf(alloc, content," <wp:posOffset>%i</wp:posOffset>\n", x); + extract_docx_char_append_string(alloc, content, " </wp:positionH>\n"); + extract_docx_char_append_string(alloc, content, " <wp:positionV relativeFrom=\"page\">\n"); + extract_docx_char_append_stringf(alloc, content," <wp:posOffset>%i</wp:posOffset>\n", y); + extract_docx_char_append_string(alloc, content, " </wp:positionV>\n"); + extract_docx_char_append_stringf(alloc, content," <wp:extent cx=\"%i\" cy=\"%i\"/>\n", w, h); + extract_docx_char_append_string(alloc, content, " <wp:effectExtent l=\"381000\" t=\"723900\" r=\"371475\" b=\"723900\"/>\n"); + extract_docx_char_append_string(alloc, content, " <wp:wrapNone/>\n"); + extract_docx_char_append_stringf(alloc, content," <wp:docPr id=\"%i\" name=\"Text Box %i\"/>\n", text_box_id, text_box_id); + extract_docx_char_append_string(alloc, content, " <wp:cNvGraphicFramePr/>\n"); + extract_docx_char_append_string(alloc, content, " <a:graphic xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\">\n"); + extract_docx_char_append_string(alloc, content, " <a:graphicData uri=\"http://schemas.microsoft.com/office/word/2010/wordprocessingShape\">\n"); + extract_docx_char_append_string(alloc, content, " <wps:wsp>\n"); + extract_docx_char_append_string(alloc, content, " <wps:cNvSpPr txBox=\"1\"/>\n"); + extract_docx_char_append_string(alloc, content, " <wps:spPr>\n"); + extract_docx_char_append_stringf(alloc, content," <a:xfrm rot=\"%i\">\n", rot); + extract_docx_char_append_string(alloc, content, " <a:off x=\"0\" y=\"0\"/>\n"); + extract_docx_char_append_string(alloc, content, " <a:ext cx=\"3228975\" cy=\"2286000\"/>\n"); + extract_docx_char_append_string(alloc, content, " </a:xfrm>\n"); + extract_docx_char_append_string(alloc, content, " <a:prstGeom prst=\"rect\">\n"); + extract_docx_char_append_string(alloc, content, " <a:avLst/>\n"); + extract_docx_char_append_string(alloc, content, " </a:prstGeom>\n"); + + /* Give box a solid background. */ + if (0) { + extract_docx_char_append_string(alloc, content, " <a:solidFill>\n"); + extract_docx_char_append_string(alloc, content, " <a:schemeClr val=\"lt1\"/>\n"); + extract_docx_char_append_string(alloc, content, " </a:solidFill>\n"); + } + + /* Draw line around box. */ + if (0) { + extract_docx_char_append_string(alloc, content, " <a:ln w=\"175\">\n"); + extract_docx_char_append_string(alloc, content, " <a:solidFill>\n"); + extract_docx_char_append_string(alloc, content, " <a:prstClr val=\"black\"/>\n"); + extract_docx_char_append_string(alloc, content, " </a:solidFill>\n"); + extract_docx_char_append_string(alloc, content, " </a:ln>\n"); + } + + extract_docx_char_append_string(alloc, content, " </wps:spPr>\n"); + extract_docx_char_append_string(alloc, content, " <wps:txbx>\n"); + extract_docx_char_append_string(alloc, content, " <w:txbxContent>"); + + #if 0 + if (0) { + /* Output inline text describing the rotation. */ + extract_docx_char_append_stringf(content, "<w:p>\n" + "<w:r><w:rPr><w:rFonts w:ascii=\"OpenSans\" w:hAnsi=\"OpenSans\"/><w:sz w:val=\"20.000000\"/><w:szCs w:val=\"15.000000\"/></w:rPr><w:t xml:space=\"preserve\">*** rotate: %f rad, %f deg. rot=%i</w:t></w:r>\n" + "</w:p>\n", + rotate, + rotate * 180 / pi, + rot + ); + } + #endif + + /* Output paragraphs p0..p2-1. */ + for (p=paragraph_begin; p<paragraph_end; ++p) { + paragraph_t* paragraph = page->paragraphs[p]; + if (extract_document_to_docx_content_paragraph(alloc, state, paragraph, content)) goto end; + } + + extract_docx_char_append_string(alloc, content, "\n"); + extract_docx_char_append_string(alloc, content, " </w:txbxContent>\n"); + extract_docx_char_append_string(alloc, content, " </wps:txbx>\n"); + extract_docx_char_append_string(alloc, content, " <wps:bodyPr rot=\"0\" spcFirstLastPara=\"0\" vertOverflow=\"overflow\" horzOverflow=\"overflow\" vert=\"horz\" wrap=\"square\" lIns=\"91440\" tIns=\"45720\" rIns=\"91440\" bIns=\"45720\" numCol=\"1\" spcCol=\"0\" rtlCol=\"0\" fromWordArt=\"0\" anchor=\"t\" anchorCtr=\"0\" forceAA=\"0\" compatLnSpc=\"1\">\n"); + extract_docx_char_append_string(alloc, content, " <a:prstTxWarp prst=\"textNoShape\">\n"); + extract_docx_char_append_string(alloc, content, " <a:avLst/>\n"); + extract_docx_char_append_string(alloc, content, " </a:prstTxWarp>\n"); + extract_docx_char_append_string(alloc, content, " <a:noAutofit/>\n"); + extract_docx_char_append_string(alloc, content, " </wps:bodyPr>\n"); + extract_docx_char_append_string(alloc, content, " </wps:wsp>\n"); + extract_docx_char_append_string(alloc, content, " </a:graphicData>\n"); + extract_docx_char_append_string(alloc, content, " </a:graphic>\n"); + extract_docx_char_append_string(alloc, content, " </wp:anchor>\n"); + extract_docx_char_append_string(alloc, content, " </w:drawing>\n"); + extract_docx_char_append_string(alloc, content, " </mc:Choice>\n"); + + /* This fallback is copied from a real Word document. Not sure + whether it works - both Libreoffice and Word use the above + choice. */ + extract_docx_char_append_string(alloc, content, " <mc:Fallback>\n"); + extract_docx_char_append_string(alloc, content, " <w:pict>\n"); + extract_docx_char_append_string(alloc, content, " <v:shapetype w14:anchorId=\"53A210D1\" id=\"_x0000_t202\" coordsize=\"21600,21600\" o:spt=\"202\" path=\"m,l,21600r21600,l21600,xe\">\n"); + extract_docx_char_append_string(alloc, content, " <v:stroke joinstyle=\"miter\"/>\n"); + extract_docx_char_append_string(alloc, content, " <v:path gradientshapeok=\"t\" o:connecttype=\"rect\"/>\n"); + extract_docx_char_append_string(alloc, content, " </v:shapetype>\n"); + extract_docx_char_append_stringf(alloc, content," <v:shape id=\"Text Box %i\" o:spid=\"_x0000_s1026\" type=\"#_x0000_t202\" style=\"position:absolute;margin-left:71.25pt;margin-top:48.75pt;width:254.25pt;height:180pt;rotation:-2241476fd;z-index:251659264;visibility:visible;mso-wrap-style:square;mso-wrap-distance-left:9pt;mso-wrap-distance-top:0;mso-wrap-distance-right:9pt;mso-wrap-distance-bottom:0;mso-position-horizontal:absolute;mso-position-horizontal-relative:text;mso-position-vertical:absolute;mso-position-vertical-relative:text;v-text-anchor:top\" o:gfxdata=\"UEsDBBQABgAIAAAAIQC2gziS/gAAAOEBAAATAAAAW0NvbnRlbnRfVHlwZXNdLnhtbJSRQU7DMBBF 90jcwfIWJU67QAgl6YK0S0CoHGBkTxKLZGx5TGhvj5O2G0SRWNoz/78nu9wcxkFMGNg6quQqL6RA 0s5Y6ir5vt9lD1JwBDIwOMJKHpHlpr69KfdHjyxSmriSfYz+USnWPY7AufNIadK6MEJMx9ApD/oD OlTrorhX2lFEilmcO2RdNtjC5xDF9pCuTyYBB5bi6bQ4syoJ3g9WQ0ymaiLzg5KdCXlKLjvcW893 SUOqXwnz5DrgnHtJTxOsQfEKIT7DmDSUCaxw7Rqn8787ZsmRM9e2VmPeBN4uqYvTtW7jvijg9N/y JsXecLq0q+WD6m8AAAD//wMAUEsDBBQABgAIAAAAIQA4/SH/1gAAAJQBAAALAAAAX3JlbHMvLnJl bHOkkMFqwzAMhu+DvYPRfXGawxijTi+j0GvpHsDYimMaW0Yy2fr2M4PBMnrbUb/Q94l/f/hMi1qR JVI2sOt6UJgd+ZiDgffL8ekFlFSbvV0oo4EbChzGx4f9GRdb25HMsYhqlCwG5lrLq9biZkxWOiqY 22YiTra2kYMu1l1tQD30/bPm3wwYN0x18gb45AdQl1tp5j/sFB2T0FQ7R0nTNEV3j6o9feQzro1i OWA14Fm+Q8a1a8+Bvu/d/dMb2JY5uiPbhG/ktn4cqGU/er3pcvwCAAD//wMAUEsDBBQABgAIAAAA IQDQg5pQVgIAALEEAAAOAAAAZHJzL2Uyb0RvYy54bWysVE1v2zAMvQ/YfxB0X+2k+WiDOEXWosOA oi3QDj0rstwYk0VNUmJ3v35PipMl3U7DLgJFPj+Rj6TnV12j2VY5X5Mp+OAs50wZSWVtXgv+7fn2 0wVnPghTCk1GFfxNeX61+Phh3tqZGtKadKkcA4nxs9YWfB2CnWWZl2vVCH9GVhkEK3KNCLi616x0 ogV7o7Nhnk+yllxpHUnlPbw3uyBfJP6qUjI8VJVXgemCI7eQTpfOVTyzxVzMXp2w61r2aYh/yKIR tcGjB6obEQTbuPoPqqaWjjxV4UxSk1FV1VKlGlDNIH9XzdNaWJVqgTjeHmTy/49W3m8fHatL9I4z Ixq06Fl1gX2mjg2iOq31M4CeLGChgzsie7+HMxbdVa5hjiDu4HI8ml5MpkkLVMcAh+xvB6kjt4Tz fDi8uJyOOZOIwZ7keWpGtmOLrNb58EVRw6JRcIdeJlqxvfMBGQC6h0S4J12Xt7XW6RLnR11rx7YC ndch5YwvTlDasLbgk/NxnohPYpH68P1KC/k9Vn3KgJs2cEaNdlpEK3SrrhdoReUbdEvSQAZv5W0N 3jvhw6NwGDQ4sTzhAUelCclQb3G2Jvfzb/6IR/8R5azF4Bbc/9gIpzjTXw0m43IwGsVJT5fReDrE xR1HVscRs2muCQqh+8gumREf9N6sHDUv2LFlfBUhYSTeLnjYm9dht07YUamWywTCbFsR7syTlZF6 383n7kU42/czYBTuaT/iYvaurTts/NLQchOoqlPPo8A7VXvdsRepLf0Ox8U7vifU7z/N4hcAAAD/ /wMAUEsDBBQABgAIAAAAIQBh17L63wAAAAoBAAAPAAAAZHJzL2Rvd25yZXYueG1sTI9BT4NAEIXv Jv6HzZh4s0ubgpayNIboSW3Syg9Y2BGI7CyyS0v99Y4nPU3ezMub72W72fbihKPvHClYLiIQSLUz HTUKyvfnuwcQPmgyuneECi7oYZdfX2U6Ne5MBzwdQyM4hHyqFbQhDKmUvm7Rar9wAxLfPtxodWA5 NtKM+szhtperKEqk1R3xh1YPWLRYfx4nq8APVfz9VQxPb+WUNC+vZbGPDhelbm/mxy2IgHP4M8Mv PqNDzkyVm8h40bNer2K2Ktjc82RDEi+5XKVgHfNG5pn8XyH/AQAA//8DAFBLAQItABQABgAIAAAA IQC2gziS/gAAAOEBAAATAAAAAAAAAAAAAAAAAAAAAABbQ29udGVudF9UeXBlc10ueG1sUEsBAi0A FAAGAAgAAAAhADj9If/WAAAAlAEAAAsAAAAAAAAAAAAAAAAALwEAAF9yZWxzLy5yZWxzUEsBAi0A FAAGAAgAAAAhANCDmlBWAgAAsQQAAA4AAAAAAAAAAAAAAAAALgIAAGRycy9lMm9Eb2MueG1sUEsB Ai0AFAAGAAgAAAAhAGHXsvrfAAAACgEAAA8AAAAAAAAAAAAAAAAAsAQAAGRycy9kb3ducmV2Lnht bFBLBQYAAAAABAAEAPMAAAC8BQAAAAA= \" fillcolor=\"white [3201]\" strokeweight=\".5pt\">\n", text_box_id); + extract_docx_char_append_string(alloc, content, " <v:textbox>\n"); + extract_docx_char_append_string(alloc, content, " <w:txbxContent>"); + + for (p=paragraph_begin; p<paragraph_end; ++p) { + paragraph_t* paragraph = page->paragraphs[p]; + if (extract_document_to_docx_content_paragraph(alloc, state, paragraph, content)) goto end; + } + + extract_docx_char_append_string(alloc, content, "\n"); + extract_docx_char_append_string(alloc, content, "\n"); + extract_docx_char_append_string(alloc, content, " </w:txbxContent>\n"); + extract_docx_char_append_string(alloc, content, " </v:textbox>\n"); + extract_docx_char_append_string(alloc, content, " </v:shape>\n"); + extract_docx_char_append_string(alloc, content, " </w:pict>\n"); + extract_docx_char_append_string(alloc, content, " </mc:Fallback>\n"); + extract_docx_char_append_string(alloc, content, " </mc:AlternateContent>\n"); + extract_docx_char_append_string(alloc, content, " </w:r>\n"); + extract_docx_char_append_string(alloc, content, "</w:p>"); + e = 0; + end: + return e; +} + + +int extract_document_to_docx_content( + extract_alloc_t* alloc, + document_t* document, + int spacing, + int rotation, + int images, + extract_astring_t* content + ) +{ + int ret = -1; + int text_box_id = 0; + int p; + + /* Write paragraphs into <content>. */ + for (p=0; p<document->pages_num; ++p) { + page_t* page = document->pages[p]; + int p; + content_state_t state; + state.font_name = NULL; + state.font_size = 0; + state.font_bold = 0; + state.font_italic = 0; + state.ctm_prev = NULL; + + for (p=0; p<page->paragraphs_num; ++p) { + paragraph_t* paragraph = page->paragraphs[p]; + const matrix_t* ctm = ¶graph->lines[0]->spans[0]->ctm; + double rotate = atan2(ctm->b, ctm->a); + + if (spacing + && state.ctm_prev + && paragraph->lines_num + && paragraph->lines[0]->spans_num + && matrix_cmp4( + state.ctm_prev, + ¶graph->lines[0]->spans[0]->ctm + ) + ) { + /* Extra vertical space between paragraphs that were at + different angles in the original document. */ + if (extract_docx_paragraph_empty(alloc, content)) goto end; + } + + if (spacing) { + /* Extra vertical space between paragraphs. */ + if (extract_docx_paragraph_empty(alloc, content)) goto end; + } + + if (rotation && rotate != 0) { + + /* Find extent of paragraphs with this same rotation. extent + will contain max width and max height of paragraphs, in units + before application of ctm, i.e. before rotation. */ + point_t extent = {0, 0}; + int p0 = p; + int p1; + + outf("rotate=%.2frad=%.1fdeg ctm: ef=(%f %f) abcd=(%f %f %f %f)", + rotate, rotate * 180 / pi, + ctm->e, + ctm->f, + ctm->a, + ctm->b, + ctm->c, + ctm->d + ); + + { + /* We assume that first span is at origin of text + block. This assumes left-to-right text. */ + double rotate0 = rotate; + const matrix_t* ctm0 = ctm; + point_t origin = { + paragraph->lines[0]->spans[0]->chars[0].x, + paragraph->lines[0]->spans[0]->chars[0].y + }; + matrix_t ctm_inverse = {1, 0, 0, 1, 0, 0}; + double ctm_det = ctm->a*ctm->d - ctm->b*ctm->c; + if (ctm_det != 0) { + ctm_inverse.a = +ctm->d / ctm_det; + ctm_inverse.b = -ctm->b / ctm_det; + ctm_inverse.c = -ctm->c / ctm_det; + ctm_inverse.d = +ctm->a / ctm_det; + } + else { + outf("cannot invert ctm=(%f %f %f %f)", + ctm->a, ctm->b, ctm->c, ctm->d); + } + + for (p=p0; p<page->paragraphs_num; ++p) { + paragraph = page->paragraphs[p]; + ctm = ¶graph->lines[0]->spans[0]->ctm; + rotate = atan2(ctm->b, ctm->a); + if (rotate != rotate0) { + break; + } + + /* Update <extent>. */ + { + int l; + for (l=0; l<paragraph->lines_num; ++l) { + line_t* line = paragraph->lines[l]; + span_t* span = line_span_last(line); + char_t* char_ = span_char_last(span); + double adv = char_->adv * matrix_expansion(span->trm); + double x = char_->x + adv * cos(rotate); + double y = char_->y + adv * sin(rotate); + + double dx = x - origin.x; + double dy = y - origin.y; + + /* Position relative to origin and before box rotation. */ + double xx = ctm_inverse.a * dx + ctm_inverse.b * dy; + double yy = ctm_inverse.c * dx + ctm_inverse.d * dy; + yy = -yy; + if (xx > extent.x) extent.x = xx; + if (yy > extent.y) extent.y = yy; + if (0) outf("rotate=%f p=%i: origin=(%f %f) xy=(%f %f) dxy=(%f %f) xxyy=(%f %f) span: %s", + rotate, p, origin.x, origin.y, x, y, dx, dy, xx, yy, span_string(alloc, span)); + } + } + } + p1 = p; + rotate = rotate0; + ctm = ctm0; + outf("rotate=%f p0=%i p1=%i. extent is: (%f %f)", + rotate, p0, p1, extent.x, extent.y); + } + + /* Paragraphs p0..p1-1 have same rotation. We output them into + a single rotated text box. */ + + /* We need unique id for text box. */ + text_box_id += 1; + + { + /* Angles are in units of 1/60,000 degree. */ + int rot = (int) (rotate * 180 / pi * 60000); + + /* <wp:anchor distT=\.. etc are in EMU - 1/360,000 of a cm. + relativeHeight is z-ordering. (wp:positionV:wp:posOffset, + wp:positionV:wp:posOffset) is position of origin of box in + EMU. + + The box rotates about its centre but we want to rotate + about the origin (top-left). So we correct the position of + box by subtracting the vector that the top-left moves when + rotated by angle <rotate> about the middle. */ + double point_to_emu = 12700; /* https://en.wikipedia.org/wiki/Office_Open_XML_file_formats#DrawingML */ + int x = (int) (ctm->e * point_to_emu); + int y = (int) (ctm->f * point_to_emu); + int w = (int) (extent.x * point_to_emu); + int h = (int) (extent.y * point_to_emu); + int dx; + int dy; + + if (0) outf("rotate: %f rad, %f deg. rot=%i", rotate, rotate*180/pi, rot); + + h *= 2; + /* We can't predict how much space Word will actually + require for the rotated text, so make the box have the + original width but allow text to take extra vertical + space. There doesn't seem to be a way to make the text box + auto-grow to contain the text. */ + + dx = (int) ((1-cos(rotate)) * w / 2.0 + sin(rotate) * h / 2.0); + dy = (int) ((cos(rotate)-1) * h / 2.0 + sin(rotate) * w / 2.0); + outf("ctm->e,f=%f,%f rotate=%f => x,y=%ik %ik dx,dy=%ik %ik", + ctm->e, + ctm->f, + rotate * 180/pi, + x/1000, + y/1000, + dx/1000, + dy/1000 + ); + x -= dx; + y -= -dy; + + if (extract_document_output_rotated_paragraphs(alloc, page, p0, p1, rot, x, y, w, h, text_box_id, content, &state)) goto end; + } + p = p1 - 1; + //p = page->paragraphs_num - 1; + } + else { + if (extract_document_to_docx_content_paragraph(alloc, &state, paragraph, content)) goto end; + } + + } + + if (images) { + int i; + for (i=0; i<page->images_num; ++i) { + extract_document_append_image(alloc, content, &page->images[i]); + } + } + } + ret = 0; + + end: + + return ret; +} + + + +static int systemf(extract_alloc_t* alloc, const char* format, ...) +/* Like system() but takes printf-style format and args. Also, if we return +ve +we set errno to EIO. */ +{ + int e; + char* command; + va_list va; + va_start(va, format); + e = extract_vasprintf(alloc, &command, format, va); + va_end(va); + if (e < 0) return e; + outf("running: %s", command); + e = system(command); + extract_free(alloc, &command); + if (e > 0) { + errno = EIO; + } + return e; +} + +static int read_all(extract_alloc_t* alloc, FILE* in, char** o_out) +/* Reads until eof into zero-terminated malloc'd buffer. */ +{ + size_t len = 0; + size_t delta = 128; + for(;;) { + size_t n; + if (extract_realloc2(alloc, o_out, len, len + delta + 1)) { + extract_free(alloc, o_out); + return -1; + } + n = fread(*o_out + len, 1 /*size*/, delta /*nmemb*/, in); + len += n; + if (feof(in)) { + (*o_out)[len] = 0; + return 0; + } + if (ferror(in)) { + /* It's weird that fread() and ferror() don't set errno. */ + errno = EIO; + extract_free(alloc, o_out); + return -1; + } + } +} + +static int read_all_path(extract_alloc_t* alloc, const char* path, char** o_text) +/* Reads entire file into zero-terminated malloc'd buffer. */ +{ + int e = -1; + FILE* f = NULL; + f = fopen(path, "rb"); + if (!f) goto end; + if (read_all(alloc, f, o_text)) goto end; + e = 0; + end: + if (f) fclose(f); + if (e) extract_free(alloc, &o_text); + return e; +} + +static int write_all(const void* data, size_t data_size, const char* path) +{ + int e = -1; + FILE* f = fopen(path, "w"); + if (!f) goto end; + if (fwrite(data, data_size, 1 /*nmemb*/, f) != 1) goto end; + e = 0; + end: + if (f) fclose(f); + return e; +} + +static int extract_docx_content_insert( + extract_alloc_t* alloc, + const char* original, + const char* mid_begin_name, + const char* mid_end_name, + extract_astring_t* contentss, + int contentss_num, + char** o_out + ) +/* Creates a string consisting of <original> with all strings in <contentss> +inserted into <original>'s <mid_begin_name>...<mid_end_name> region, and +appends this string to *o_out. */ +{ + int e = -1; + const char* mid_begin; + const char* mid_end; + extract_astring_t out; + extract_astring_init(&out); + + mid_begin = strstr(original, mid_begin_name); + if (!mid_begin) { + outf("error: could not find '%s' in docx content", + mid_begin_name); + errno = ESRCH; + goto end; + } + mid_begin += strlen(mid_begin_name); + + mid_end = strstr(mid_begin, mid_end_name); + if (!mid_end) { + outf("error: could not find '%s' in docx content", + mid_end_name); + errno = ESRCH; + goto end; + } + + if (extract_astring_catl(alloc, &out, original, mid_begin - original)) goto end; + { + int i; + for (i=0; i<contentss_num; ++i) { + if (extract_astring_catl(alloc, &out, contentss[i].chars, contentss[i].chars_num)) goto end; + } + } + if (extract_astring_cat(alloc, &out, mid_end)) goto end; + + *o_out = out.chars; + out.chars = NULL; + e = 0; + + end: + if (e) { + extract_astring_free(alloc, &out); + *o_out = NULL; + } + return e; +} + +static int s_find_mid(const char* text, const char* begin, const char* end, const char** o_begin, const char** o_end) +/* Sets *o_begin to end of first occurrence of <begin> in <text>, and *o_end to +beginning of first occurtence of <end> in <text>. */ +{ + *o_begin = strstr(text, begin); + if (!*o_begin) goto fail; + *o_begin += strlen(begin); + *o_end = strstr(*o_begin, end); + if (!*o_end) goto fail; + return 0; + fail: + errno = ESRCH; + return -1; +} + + +int extract_docx_content_item( + extract_alloc_t* alloc, + extract_astring_t* contentss, + int contentss_num, + images_t* images, + const char* name, + const char* text, + char** text2 + ) +{ + int e = -1; + extract_astring_t temp; + extract_astring_init(&temp); + *text2 = NULL; + + if (0) + {} + else if (!strcmp(name, "[Content_Types].xml")) { + /* Add information about all image types that we are going to use. */ + const char* begin; + const char* end; + const char* insert; + int it; + extract_astring_free(alloc, &temp); + outf("text: %s", text); + if (s_find_mid(text, "<Types ", "</Types>", &begin, &end)) goto end; + + insert = begin; + insert = strchr(insert, '>'); + assert(insert); + insert += 1; + + if (extract_astring_catl(alloc, &temp, text, insert - text)) goto end; + outf("images->imagetypes_num=%i", images->imagetypes_num); + for (it=0; it<images->imagetypes_num; ++it) { + const char* imagetype = images->imagetypes[it]; + if (extract_astring_cat(alloc, &temp, "<Default Extension=\"")) goto end; + if (extract_astring_cat(alloc, &temp, imagetype)) goto end; + if (extract_astring_cat(alloc, &temp, "\" ContentType=\"image/")) goto end; + if (extract_astring_cat(alloc, &temp, imagetype)) goto end; + if (extract_astring_cat(alloc, &temp, "\"/>")) goto end; + } + if (extract_astring_cat(alloc, &temp, insert)) goto end; + *text2 = temp.chars; + extract_astring_init(&temp); + } + else if (!strcmp(name, "word/_rels/document.xml.rels")) { + /* Add relationships between image ids and image names within docx + archive. */ + const char* begin; + const char* end; + int j; + extract_astring_free(alloc, &temp); + if (s_find_mid(text, "<Relationships", "</Relationships>", &begin, &end)) goto end; + if (extract_astring_catl(alloc, &temp, text, end - text)) goto end; + outf("images.images_num=%i", images->images_num); + for (j=0; j<images->images_num; ++j) { + image_t* image = &images->images[j]; + if (extract_astring_cat(alloc, &temp, "<Relationship Id=\"")) goto end; + if (extract_astring_cat(alloc, &temp, image->id)) goto end; + if (extract_astring_cat(alloc, &temp, "\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image\" Target=\"media/")) goto end; + if (extract_astring_cat(alloc, &temp, image->name)) goto end; + if (extract_astring_cat(alloc, &temp, "\"/>")) goto end; + } + if (extract_astring_cat(alloc, &temp, end)) goto end; + *text2 = temp.chars; + extract_astring_init(&temp); + } + else if (!strcmp(name, "word/document.xml")) { + /* Insert paragraphs content. */ + if (extract_docx_content_insert( + alloc, + text, + "<w:body>", + "</w:body>", + contentss, + contentss_num, + text2 + )) goto end; + } + else { + *text2 = NULL; + } + e = 0; + end: + if (e) { + /* We might have set <text2> to new content. */ + extract_free(alloc, text2); + /* We might have used <temp> as a temporary buffer. */ + extract_astring_free(alloc, &temp); + } + extract_astring_init(&temp); + return e; +} + + + +static int check_path_shell_safe(const char* path) +/* Returns -1 with errno=EINVAL if <path> contains sequences that could make it +unsafe in shell commands. */ +{ + if (0 + || strstr(path, "..") + || strchr(path, '\'') + || strchr(path, '"') + || strchr(path, ' ') + ) { + errno = EINVAL; + return -1; + } + return 0; +} + +static int remove_directory(extract_alloc_t* alloc, const char* path) +{ + if (check_path_shell_safe(path)) { + outf("path_out is unsafe: %s", path); + return -1; + } + return systemf(alloc, "rm -r '%s'", path); +} + +#ifdef _WIN32 +#include <direct.h> +static int s_mkdir(const char* path, int mode) +{ + (void) mode; + return _mkdir(path); +} +#else +static int s_mkdir(const char* path, int mode) +{ + return mkdir(path, mode); +} +#endif + + +int extract_docx_write_template( + extract_alloc_t* alloc, + extract_astring_t* contentss, + int contentss_num, + images_t* images, + const char* path_template, + const char* path_out, + int preserve_dir + ) +{ + int e = -1; + int i; + char* path_tempdir = NULL; + FILE* f = NULL; + char* path = NULL; + char* text = NULL; + char* text2 = NULL; + + assert(path_out); + assert(path_template); + + if (check_path_shell_safe(path_out)) { + outf("path_out is unsafe: %s", path_out); + goto end; + } + + outf("images->images_num=%i", images->images_num); + if (extract_asprintf(alloc, &path_tempdir, "%s.dir", path_out) < 0) goto end; + if (systemf(alloc, "rm -r '%s' 2>/dev/null", path_tempdir) < 0) goto end; + + if (s_mkdir(path_tempdir, 0777)) { + outf("Failed to create directory: %s", path_tempdir); + goto end; + } + + outf("Unzipping template document '%s' to tempdir: %s", + path_template, path_tempdir); + e = systemf(alloc, "unzip -q -d '%s' '%s'", path_tempdir, path_template); + if (e) { + outf("Failed to unzip %s into %s", + path_template, path_tempdir); + goto end; + } + + /* Might be nice to iterate through all items in path_tempdir, but for now + we look at just the items that we know extract_docx_content_item() will + modify. */ + + { + const char* names[] = { + "word/document.xml", + "[Content_Types].xml", + "word/_rels/document.xml.rels", + }; + int names_num = sizeof(names) / sizeof(names[0]); + for (i=0; i<names_num; ++i) { + const char* name = names[i]; + extract_free(alloc, &path); + extract_free(alloc, &text); + extract_free(alloc, &text2); + if (extract_asprintf(alloc, &path, "%s/%s", path_tempdir, name) < 0) goto end; + if (read_all_path(alloc, path, &text)) goto end; + + if (extract_docx_content_item( + alloc, + contentss, + contentss_num, + images, + name, + text, + &text2 + )) goto end; + { + const char* text3 = (text2) ? text2 : text; + if (write_all(text3, strlen(text3), path)) goto end; + } + } + } + + /* Copy images into <path_tempdir>/media/. */ + extract_free(alloc, &path); + if (extract_asprintf(alloc, &path, "%s/word/media", path_tempdir) < 0) goto end; + if (s_mkdir(path, 0777)) goto end; + + for (i=0; i<images->images_num; ++i) { + image_t* image = &images->images[i]; + extract_free(alloc, &path); + if (extract_asprintf(alloc, &path, "%s/word/media/%s", path_tempdir, image->name) < 0) goto end; + if (write_all(image->data, image->data_size, path)) goto end; + } + + outf("Zipping tempdir to create %s", path_out); + { + const char* path_out_leaf = strrchr(path_out, '/'); + if (!path_out_leaf) path_out_leaf = path_out; + e = systemf(alloc, "cd '%s' && zip -q -r -D '../%s' .", path_tempdir, path_out_leaf); + if (e) { + outf("Zip command failed to convert '%s' directory into output file: %s", + path_tempdir, path_out); + goto end; + } + } + + if (!preserve_dir) { + if (remove_directory(alloc, path_tempdir)) goto end; + } + + e = 0; + + end: + outf("e=%i", e); + extract_free(alloc, &path_tempdir); + extract_free(alloc, &path); + extract_free(alloc, &text); + extract_free(alloc, &text2); + if (f) fclose(f); + + if (e) { + outf("Failed to create %s", path_out); + } + return e; +} diff --git a/extract/src/docx.h b/extract/src/docx.h new file mode 100644 index 00000000..6e26568f --- /dev/null +++ b/extract/src/docx.h @@ -0,0 +1,84 @@ +#ifndef ARTIFEX_EXTRACT_DOCX_H +#define ARTIFEX_EXTRACT_DOCX_H + +/* Only for internal use by extract code. */ + +/* Things for creating docx files. */ + +int extract_document_to_docx_content( + extract_alloc_t* alloc, + document_t* document, + int spacing, + int rotation, + int images, + extract_astring_t* content + ); +/* Makes *o_content point to a string containing all paragraphs in *document in +docx XML format. + +This string can be passed to extract_docx_content_item() or +extract_docx_write_template() to be inserted into a docx archive's +word/document.xml. */ + + +int extract_docx_write_template( + extract_alloc_t* alloc, + extract_astring_t* contentss, + int contentss_num, + images_t* images, + const char* path_template, + const char* path_out, + int preserve_dir + ); +/* Creates a new docx file using a provided template document. + +Uses the 'zip' and 'unzip' commands internally. + +contents +contentss_num + Content to be inserted into word/document.xml. +document + . +images + Information about images. +path_template + Name of docx file to use as a template. +path_out + Name of docx file to create. Must not contain single-quote, double quote, + space or ".." sequence - these will force EINVAL error because they could + make internal shell commands unsafe. +preserve_dir + If true, we don't delete the temporary directory <path_out>.dir containing + unzipped docx content. +*/ + + +int extract_docx_content_item( + extract_alloc_t* alloc, + extract_astring_t* contentss, + int contentss_num, + images_t* images, + const char* name, + const char* text, + char** text2 + ); +/* Determines content of <name> in docx archive. + +content +content_length + Text to insert if <name> is word/document.xml. +images + Information about images. If <name> is word/document.xml we insert + relationship information mapping from image ids to image names; + <text> should already contain reference ids for images. If <name> is + [Content_Types].xml we insert information about image types. +name + Path within the docx zip archive. +text + Content of <name> in template docx file. +text2 + Out-param. Set to NULL if <text> should be used unchanged. Otherwise set to + point to desired text, allocated with malloc() which caller should free. +*/ + +#endif diff --git a/extract/src/docx_template.c b/extract/src/docx_template.c new file mode 100644 index 00000000..73ab5b71 --- /dev/null +++ b/extract/src/docx_template.c @@ -0,0 +1,910 @@ +/* THIS IS AUTO-GENERATED CODE, DO NOT EDIT. */ + +#include "docx_template.h" + +const docx_template_item_t docx_template_items[] = +{ + { + "[Content_Types].xml", + "" + "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n" + "" + "<Types xmlns=\"http://schemas.openxmlformats.org/package/2006/content-types\">" + "<Default Extension=\"rels\" ContentType=\"application/vnd.openxmlformats-package.relationships+xml\"/>" + "<Default Extension=\"xml\" ContentType=\"application/xml\"/>" + "<Override PartName=\"/word/document.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml\"/>" + "<Override PartName=\"/word/styles.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml\"/>" + "<Override PartName=\"/word/settings.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml\"/>" + "<Override PartName=\"/word/webSettings.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml\"/>" + "<Override PartName=\"/word/fontTable.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml\"/>" + "<Override PartName=\"/word/theme/theme1.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.theme+xml\"/>" + "<Override PartName=\"/docProps/core.xml\" ContentType=\"application/vnd.openxmlformats-package.core-properties+xml\"/>" + "<Override PartName=\"/docProps/app.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.extended-properties+xml\"/></Types>" + }, + + { + "_rels/.rels", + "" + "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n" + "" + "<Relationships xmlns=\"http://schemas.openxmlformats.org/package/2006/relationships\">" + "<Relationship Id=\"rId3\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties\" Target=\"docProps/app.xml\"/>" + "<Relationship Id=\"rId2\" Type=\"http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties\" Target=\"docProps/core.xml\"/>" + "<Relationship Id=\"rId1\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument\" Target=\"word/document.xml\"/></Relationships>" + }, + + { + "docProps/app.xml", + "" + "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n" + "" + "<Properties xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/extended-properties\" xmlns:vt=\"http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes\">" + "<Template>Normal.dotm</Template>" + "<TotalTime>3</TotalTime>" + "<Pages>1</Pages>" + "<Words>2</Words>" + "<Characters>18</Characters>" + "<Application>Microsoft Office Word</Application>" + "<DocSecurity>0</DocSecurity>" + "<Lines>1</Lines>" + "<Paragraphs>1</Paragraphs>" + "<ScaleCrop>false</ScaleCrop>" + "<Company></Company>" + "<LinksUpToDate>false</LinksUpToDate>" + "<CharactersWithSpaces>19</CharactersWithSpaces>" + "<SharedDoc>false</SharedDoc>" + "<HyperlinksChanged>false</HyperlinksChanged>" + "<AppVersion>16.0000</AppVersion></Properties>" + }, + + { + "docProps/core.xml", + "" + "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n" + "" + "<cp:coreProperties xmlns:cp=\"http://schemas.openxmlformats.org/package/2006/metadata/core-properties\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:dcmitype=\"http://purl.org/dc/dcmitype/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">" + "<dc:title></dc:title>" + "<dc:subject></dc:subject>" + "<dc:creator></dc:creator>" + "<cp:keywords></cp:keywords>" + "<dc:description></dc:description>" + "<cp:lastModifiedBy></cp:lastModifiedBy>" + "<cp:revision>1</cp:revision>" + "<dcterms:created xsi:type=\"dcterms:W3CDTF\">2020-09-25T17:04:00Z</dcterms:created>" + "<dcterms:modified xsi:type=\"dcterms:W3CDTF\">2020-09-25T17:07:00Z</dcterms:modified></cp:coreProperties>" + }, + + { + "word/document.xml", + "" + "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n" + "" + "<w:document xmlns:wpc=\"http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas\" xmlns:cx=\"http://schemas.microsoft.com/office/drawing/2014/chartex\" xmlns:cx1=\"http://schemas.microsoft.com/office/drawing/2015/9/8/chartex\" xmlns:cx2=\"http://schemas.microsoft.com/office/drawing/2015/10/21/chartex\" xmlns:cx3=\"http://schemas.microsoft.com/office/drawing/2016/5/9/chartex\" xmlns:cx4=\"http://schemas.microsoft.com/office/drawing/2016/5/10/chartex\" xmlns:cx5=\"http://schemas.microsoft.com/office/drawing/2016/5/11/chartex\" xmlns:cx6=\"http://schemas.microsoft.com/office/drawing/2016/5/12/chartex\" xmlns:cx7=\"http://schemas.microsoft.com/office/drawing/2016/5/13/chartex\" xmlns:cx8=\"http://schemas.microsoft.com/office/drawing/2016/5/14/chartex\" xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\" xmlns:aink=\"http://schemas.microsoft.com/office/drawing/2016/ink\" xmlns:am3d=\"http://schemas.microsoft.com/office/drawing/2017/model3d\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:m=\"http://schemas.openxmlformats.org/officeDocument/2006/math\" xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:wp14=\"http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing\" xmlns:wp=\"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing\" xmlns:w10=\"urn:schemas-microsoft-com:office:word\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:w14=\"http://schemas.microsoft.com/office/word/2010/wordml\" xmlns:w15=\"http://schemas.microsoft.com/office/word/2012/wordml\" xmlns:w16cex=\"http://schemas.microsoft.com/office/word/2018/wordml/cex\" xmlns:w16cid=\"http://schemas.microsoft.com/office/word/2016/wordml/cid\" xmlns:w16=\"http://schemas.microsoft.com/office/word/2018/wordml\" xmlns:w16se=\"http://schemas.microsoft.com/office/word/2015/wordml/symex\" xmlns:wpg=\"http://schemas.microsoft.com/office/word/2010/wordprocessingGroup\" xmlns:wpi=\"http://schemas.microsoft.com/office/word/2010/wordprocessingInk\" xmlns:wne=\"http://schemas.microsoft.com/office/word/2006/wordml\" xmlns:wps=\"http://schemas.microsoft.com/office/word/2010/wordprocessingShape\" mc:Ignorable=\"w14 w15 w16se w16cid w16 w16cex wp14\">" + "<w:body>" + "<w:p w14:paraId=\"7C58A6F1\" w14:textId=\"3E2CAE3F\" w:rsidR=\"00610D78\" w:rsidRDefault=\"007F4427\">" + "<w:r>" + "<w:t>Hello world</w:t></w:r></w:p>" + "<w:p w14:paraId=\"53256C58\" w14:textId=\"13022069\" w:rsidR=\"007F4427\" w:rsidRDefault=\"007F4427\">" + "<w:r>" + "<w:rPr>" + "<w:noProof/></w:rPr>" + "<mc:AlternateContent>" + "<mc:Choice Requires=\"wps\">" + "<w:drawing>" + "<wp:anchor distT=\"0\" distB=\"0\" distL=\"114300\" distR=\"114300\" simplePos=\"0\" relativeHeight=\"251659264\" behindDoc=\"0\" locked=\"0\" layoutInCell=\"1\" allowOverlap=\"1\" wp14:anchorId=\"53A210D1\" wp14:editId=\"2B7E8016\">" + "<wp:simplePos x=\"0\" y=\"0\"/>" + "<wp:positionH relativeFrom=\"column\">" + "<wp:posOffset>904875</wp:posOffset></wp:positionH>" + "<wp:positionV relativeFrom=\"paragraph\">" + "<wp:posOffset>619125</wp:posOffset></wp:positionV>" + "<wp:extent cx=\"3228975\" cy=\"2286000\"/>" + "<wp:effectExtent l=\"381000\" t=\"723900\" r=\"371475\" b=\"723900\"/>" + "<wp:wrapNone/>" + "<wp:docPr id=\"1\" name=\"Text Box 1\"/>" + "<wp:cNvGraphicFramePr/>" + "<a:graphic xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\">" + "<a:graphicData uri=\"http://schemas.microsoft.com/office/word/2010/wordprocessingShape\">" + "<wps:wsp>" + "<wps:cNvSpPr txBox=\"1\"/>" + "<wps:spPr>" + "<a:xfrm rot=\"19547867\">" + "<a:off x=\"0\" y=\"0\"/>" + "<a:ext cx=\"3228975\" cy=\"2286000\"/></a:xfrm>" + "<a:prstGeom prst=\"rect\">" + "<a:avLst/></a:prstGeom>" + "<a:solidFill>" + "<a:schemeClr val=\"lt1\"/></a:solidFill>" + "<a:ln w=\"6350\">" + "<a:solidFill>" + "<a:prstClr val=\"black\"/></a:solidFill></a:ln></wps:spPr>" + "<wps:txbx>" + "<w:txbxContent>" + "<w:p w14:paraId=\"31597E69\" w14:textId=\"2903B1F1\" w:rsidR=\"007F4427\" w:rsidRDefault=\"007F4427\">" + "<w:r>" + "<w:t>Hello. Qwerty. World</w:t></w:r></w:p>" + "<w:p w14:paraId=\"0BD8A985\" w14:textId=\"1BFB8248\" w:rsidR=\"007F4427\" w:rsidRDefault=\"007F4427\">" + "<w:proofErr w:type=\"spellStart\"/>" + "<w:r>" + "<w:t>mupdf</w:t></w:r>" + "<w:proofErr w:type=\"spellEnd\"/></w:p></w:txbxContent></wps:txbx>" + "<wps:bodyPr rot=\"0\" spcFirstLastPara=\"0\" vertOverflow=\"overflow\" horzOverflow=\"overflow\" vert=\"horz\" wrap=\"square\" lIns=\"91440\" tIns=\"45720\" rIns=\"91440\" bIns=\"45720\" numCol=\"1\" spcCol=\"0\" rtlCol=\"0\" fromWordArt=\"0\" anchor=\"t\" anchorCtr=\"0\" forceAA=\"0\" compatLnSpc=\"1\">" + "<a:prstTxWarp prst=\"textNoShape\">" + "<a:avLst/></a:prstTxWarp>" + "<a:noAutofit/></wps:bodyPr></wps:wsp></a:graphicData></a:graphic></wp:anchor></w:drawing></mc:Choice>" + "<mc:Fallback>" + "<w:pict>" + "<v:shapetype w14:anchorId=\"53A210D1\" id=\"_x0000_t202\" coordsize=\"21600,21600\" o:spt=\"202\" path=\"m,l,21600r21600,l21600,xe\">" + "<v:stroke joinstyle=\"miter\"/>" + "<v:path gradientshapeok=\"t\" o:connecttype=\"rect\"/></v:shapetype>" + "<v:shape id=\"Text Box 1\" o:spid=\"_x0000_s1026\" type=\"#_x0000_t202\" style=\"position:absolute;margin-left:71.25pt;margin-top:48.75pt;width:254.25pt;height:180pt;rotation:-2241476fd;z-index:251659264;visibility:visible;mso-wrap-style:square;mso-wrap-distance-left:9pt;mso-wrap-distance-top:0;mso-wrap-distance-right:9pt;mso-wrap-distance-bottom:0;mso-position-horizontal:absolute;mso-position-horizontal-relative:text;mso-position-vertical:absolute;mso-position-vertical-relative:text;v-text-anchor:top\" o:gfxdata=\"UEsDBBQABgAIAAAAIQC2gziS/gAAAOEBAAATAAAAW0NvbnRlbnRfVHlwZXNdLnhtbJSRQU7DMBBF
90jcwfIWJU67QAgl6YK0S0CoHGBkTxKLZGx5TGhvj5O2G0SRWNoz/78nu9wcxkFMGNg6quQqL6RA
0s5Y6ir5vt9lD1JwBDIwOMJKHpHlpr69KfdHjyxSmriSfYz+USnWPY7AufNIadK6MEJMx9ApD/oD
OlTrorhX2lFEilmcO2RdNtjC5xDF9pCuTyYBB5bi6bQ4syoJ3g9WQ0ymaiLzg5KdCXlKLjvcW893
SUOqXwnz5DrgnHtJTxOsQfEKIT7DmDSUCaxw7Rqn8787ZsmRM9e2VmPeBN4uqYvTtW7jvijg9N/y
JsXecLq0q+WD6m8AAAD//wMAUEsDBBQABgAIAAAAIQA4/SH/1gAAAJQBAAALAAAAX3JlbHMvLnJl
bHOkkMFqwzAMhu+DvYPRfXGawxijTi+j0GvpHsDYimMaW0Yy2fr2M4PBMnrbUb/Q94l/f/hMi1qR
JVI2sOt6UJgd+ZiDgffL8ekFlFSbvV0oo4EbChzGx4f9GRdb25HMsYhqlCwG5lrLq9biZkxWOiqY
22YiTra2kYMu1l1tQD30/bPm3wwYN0x18gb45AdQl1tp5j/sFB2T0FQ7R0nTNEV3j6o9feQzro1i
OWA14Fm+Q8a1a8+Bvu/d/dMb2JY5uiPbhG/ktn4cqGU/er3pcvwCAAD//wMAUEsDBBQABgAIAAAA
IQDQg5pQVgIAALEEAAAOAAAAZHJzL2Uyb0RvYy54bWysVE1v2zAMvQ/YfxB0X+2k+WiDOEXWosOA
oi3QDj0rstwYk0VNUmJ3v35PipMl3U7DLgJFPj+Rj6TnV12j2VY5X5Mp+OAs50wZSWVtXgv+7fn2
0wVnPghTCk1GFfxNeX61+Phh3tqZGtKadKkcA4nxs9YWfB2CnWWZl2vVCH9GVhkEK3KNCLi616x0
ogV7o7Nhnk+yllxpHUnlPbw3uyBfJP6qUjI8VJVXgemCI7eQTpfOVTyzxVzMXp2w61r2aYh/yKIR
tcGjB6obEQTbuPoPqqaWjjxV4UxSk1FV1VKlGlDNIH9XzdNaWJVqgTjeHmTy/49W3m8fHatL9I4z
Ixq06Fl1gX2mjg2iOq31M4CeLGChgzsie7+HMxbdVa5hjiDu4HI8ml5MpkkLVMcAh+xvB6kjt4Tz
fDi8uJyOOZOIwZ7keWpGtmOLrNb58EVRw6JRcIdeJlqxvfMBGQC6h0S4J12Xt7XW6RLnR11rx7YC
ndch5YwvTlDasLbgk/NxnohPYpH68P1KC/k9Vn3KgJs2cEaNdlpEK3SrrhdoReUbdEvSQAZv5W0N
3jvhw6NwGDQ4sTzhAUelCclQb3G2Jvfzb/6IR/8R5azF4Bbc/9gIpzjTXw0m43IwGsVJT5fReDrE
xR1HVscRs2muCQqh+8gumREf9N6sHDUv2LFlfBUhYSTeLnjYm9dht07YUamWywTCbFsR7syTlZF6
383n7kU42/czYBTuaT/iYvaurTts/NLQchOoqlPPo8A7VXvdsRepLf0Ox8U7vifU7z/N4hcAAAD/
/wMAUEsDBBQABgAIAAAAIQBh17L63wAAAAoBAAAPAAAAZHJzL2Rvd25yZXYueG1sTI9BT4NAEIXv
Jv6HzZh4s0ubgpayNIboSW3Syg9Y2BGI7CyyS0v99Y4nPU3ezMub72W72fbihKPvHClYLiIQSLUz
HTUKyvfnuwcQPmgyuneECi7oYZdfX2U6Ne5MBzwdQyM4hHyqFbQhDKmUvm7Rar9wAxLfPtxodWA5
NtKM+szhtperKEqk1R3xh1YPWLRYfx4nq8APVfz9VQxPb+WUNC+vZbGPDhelbm/mxy2IgHP4M8Mv
PqNDzkyVm8h40bNer2K2Ktjc82RDEi+5XKVgHfNG5pn8XyH/AQAA//8DAFBLAQItABQABgAIAAAA
IQC2gziS/gAAAOEBAAATAAAAAAAAAAAAAAAAAAAAAABbQ29udGVudF9UeXBlc10ueG1sUEsBAi0A
FAAGAAgAAAAhADj9If/WAAAAlAEAAAsAAAAAAAAAAAAAAAAALwEAAF9yZWxzLy5yZWxzUEsBAi0A
FAAGAAgAAAAhANCDmlBWAgAAsQQAAA4AAAAAAAAAAAAAAAAALgIAAGRycy9lMm9Eb2MueG1sUEsB
Ai0AFAAGAAgAAAAhAGHXsvrfAAAACgEAAA8AAAAAAAAAAAAAAAAAsAQAAGRycy9kb3ducmV2Lnht
bFBLBQYAAAAABAAEAPMAAAC8BQAAAAA=
\" fillcolor=\"white [3201]\" strokeweight=\".5pt\">" + "<v:textbox>" + "<w:txbxContent>" + "<w:p w14:paraId=\"31597E69\" w14:textId=\"2903B1F1\" w:rsidR=\"007F4427\" w:rsidRDefault=\"007F4427\">" + "<w:r>" + "<w:t>Hello. Qwerty. World</w:t></w:r></w:p>" + "<w:p w14:paraId=\"0BD8A985\" w14:textId=\"1BFB8248\" w:rsidR=\"007F4427\" w:rsidRDefault=\"007F4427\">" + "<w:proofErr w:type=\"spellStart\"/>" + "<w:r>" + "<w:t>mupdf</w:t></w:r>" + "<w:proofErr w:type=\"spellEnd\"/></w:p></w:txbxContent></v:textbox></v:shape></w:pict></mc:Fallback></mc:AlternateContent></w:r>" + "<w:r>" + "<w:t>qwerty</w:t></w:r></w:p>" + "<w:sectPr w:rsidR=\"007F4427\">" + "<w:pgSz w:w=\"11906\" w:h=\"16838\"/>" + "<w:pgMar w:top=\"1440\" w:right=\"1440\" w:bottom=\"1440\" w:left=\"1440\" w:header=\"708\" w:footer=\"708\" w:gutter=\"0\"/>" + "<w:cols w:space=\"708\"/>" + "<w:docGrid w:linePitch=\"360\"/></w:sectPr></w:body></w:document>" + }, + + { + "word/fontTable.xml", + "" + "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n" + "" + "<w:fonts xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:w14=\"http://schemas.microsoft.com/office/word/2010/wordml\" xmlns:w15=\"http://schemas.microsoft.com/office/word/2012/wordml\" xmlns:w16cex=\"http://schemas.microsoft.com/office/word/2018/wordml/cex\" xmlns:w16cid=\"http://schemas.microsoft.com/office/word/2016/wordml/cid\" xmlns:w16=\"http://schemas.microsoft.com/office/word/2018/wordml\" xmlns:w16se=\"http://schemas.microsoft.com/office/word/2015/wordml/symex\" mc:Ignorable=\"w14 w15 w16se w16cid w16 w16cex\">" + "<w:font w:name=\"Calibri\">" + "<w:panose1 w:val=\"020F0502020204030204\"/>" + "<w:charset w:val=\"00\"/>" + "<w:family w:val=\"swiss\"/>" + "<w:pitch w:val=\"variable\"/>" + "<w:sig w:usb0=\"E4002EFF\" w:usb1=\"C000247B\" w:usb2=\"00000009\" w:usb3=\"00000000\" w:csb0=\"000001FF\" w:csb1=\"00000000\"/></w:font>" + "<w:font w:name=\"Times New Roman\">" + "<w:panose1 w:val=\"02020603050405020304\"/>" + "<w:charset w:val=\"00\"/>" + "<w:family w:val=\"roman\"/>" + "<w:pitch w:val=\"variable\"/>" + "<w:sig w:usb0=\"E0002EFF\" w:usb1=\"C000785B\" w:usb2=\"00000009\" w:usb3=\"00000000\" w:csb0=\"000001FF\" w:csb1=\"00000000\"/></w:font>" + "<w:font w:name=\"Calibri Light\">" + "<w:panose1 w:val=\"020F0302020204030204\"/>" + "<w:charset w:val=\"00\"/>" + "<w:family w:val=\"swiss\"/>" + "<w:pitch w:val=\"variable\"/>" + "<w:sig w:usb0=\"E4002EFF\" w:usb1=\"C000247B\" w:usb2=\"00000009\" w:usb3=\"00000000\" w:csb0=\"000001FF\" w:csb1=\"00000000\"/></w:font></w:fonts>" + }, + + { + "word/settings.xml", + "" + "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n" + "" + "<w:settings xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:m=\"http://schemas.openxmlformats.org/officeDocument/2006/math\" xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:w10=\"urn:schemas-microsoft-com:office:word\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:w14=\"http://schemas.microsoft.com/office/word/2010/wordml\" xmlns:w15=\"http://schemas.microsoft.com/office/word/2012/wordml\" xmlns:w16cex=\"http://schemas.microsoft.com/office/word/2018/wordml/cex\" xmlns:w16cid=\"http://schemas.microsoft.com/office/word/2016/wordml/cid\" xmlns:w16=\"http://schemas.microsoft.com/office/word/2018/wordml\" xmlns:w16se=\"http://schemas.microsoft.com/office/word/2015/wordml/symex\" xmlns:sl=\"http://schemas.openxmlformats.org/schemaLibrary/2006/main\" mc:Ignorable=\"w14 w15 w16se w16cid w16 w16cex\">" + "<w:zoom w:percent=\"100\"/>" + "<w:proofState w:spelling=\"clean\" w:grammar=\"clean\"/>" + "<w:defaultTabStop w:val=\"720\"/>" + "<w:characterSpacingControl w:val=\"doNotCompress\"/>" + "<w:compat>" + "<w:compatSetting w:name=\"compatibilityMode\" w:uri=\"http://schemas.microsoft.com/office/word\" w:val=\"15\"/>" + "<w:compatSetting w:name=\"overrideTableStyleFontSizeAndJustification\" w:uri=\"http://schemas.microsoft.com/office/word\" w:val=\"1\"/>" + "<w:compatSetting w:name=\"enableOpenTypeFeatures\" w:uri=\"http://schemas.microsoft.com/office/word\" w:val=\"1\"/>" + "<w:compatSetting w:name=\"doNotFlipMirrorIndents\" w:uri=\"http://schemas.microsoft.com/office/word\" w:val=\"1\"/>" + "<w:compatSetting w:name=\"differentiateMultirowTableHeaders\" w:uri=\"http://schemas.microsoft.com/office/word\" w:val=\"1\"/>" + "<w:compatSetting w:name=\"useWord2013TrackBottomHyphenation\" w:uri=\"http://schemas.microsoft.com/office/word\" w:val=\"0\"/></w:compat>" + "<w:rsids>" + "<w:rsidRoot w:val=\"007F4427\"/>" + "<w:rsid w:val=\"00255448\"/>" + "<w:rsid w:val=\"007F4427\"/></w:rsids>" + "<m:mathPr>" + "<m:mathFont m:val=\"Cambria Math\"/>" + "<m:brkBin m:val=\"before\"/>" + "<m:brkBinSub m:val=\"--\"/>" + "<m:smallFrac m:val=\"0\"/>" + "<m:dispDef/>" + "<m:lMargin m:val=\"0\"/>" + "<m:rMargin m:val=\"0\"/>" + "<m:defJc m:val=\"centerGroup\"/>" + "<m:wrapIndent m:val=\"1440\"/>" + "<m:intLim m:val=\"subSup\"/>" + "<m:naryLim m:val=\"undOvr\"/></m:mathPr>" + "<w:themeFontLang w:val=\"en-GB\"/>" + "<w:clrSchemeMapping w:bg1=\"light1\" w:t1=\"dark1\" w:bg2=\"light2\" w:t2=\"dark2\" w:accent1=\"accent1\" w:accent2=\"accent2\" w:accent3=\"accent3\" w:accent4=\"accent4\" w:accent5=\"accent5\" w:accent6=\"accent6\" w:hyperlink=\"hyperlink\" w:followedHyperlink=\"followedHyperlink\"/>" + "<w:shapeDefaults>" + "<o:shapedefaults v:ext=\"edit\" spidmax=\"1026\"/>" + "<o:shapelayout v:ext=\"edit\">" + "<o:idmap v:ext=\"edit\" data=\"1\"/></o:shapelayout></w:shapeDefaults>" + "<w:decimalSymbol w:val=\".\"/>" + "<w:listSeparator w:val=\",\"/>" + "<w14:docId w14:val=\"32E52EF8\"/>" + "<w15:chartTrackingRefBased/>" + "<w15:docId w15:val=\"{A10F59F7-497D-44D4-A338-47719734E7A0}\"/></w:settings>" + }, + + { + "word/styles.xml", + "" + "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n" + "" + "<w:styles xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:w14=\"http://schemas.microsoft.com/office/word/2010/wordml\" xmlns:w15=\"http://schemas.microsoft.com/office/word/2012/wordml\" xmlns:w16cex=\"http://schemas.microsoft.com/office/word/2018/wordml/cex\" xmlns:w16cid=\"http://schemas.microsoft.com/office/word/2016/wordml/cid\" xmlns:w16=\"http://schemas.microsoft.com/office/word/2018/wordml\" xmlns:w16se=\"http://schemas.microsoft.com/office/word/2015/wordml/symex\" mc:Ignorable=\"w14 w15 w16se w16cid w16 w16cex\">" + "<w:docDefaults>" + "<w:rPrDefault>" + "<w:rPr>" + "<w:rFonts w:asciiTheme=\"minorHAnsi\" w:eastAsiaTheme=\"minorHAnsi\" w:hAnsiTheme=\"minorHAnsi\" w:cstheme=\"minorBidi\"/>" + "<w:sz w:val=\"22\"/>" + "<w:szCs w:val=\"22\"/>" + "<w:lang w:val=\"en-GB\" w:eastAsia=\"en-US\" w:bidi=\"ar-SA\"/></w:rPr></w:rPrDefault>" + "<w:pPrDefault>" + "<w:pPr>" + "<w:spacing w:after=\"160\" w:line=\"259\" w:lineRule=\"auto\"/></w:pPr></w:pPrDefault></w:docDefaults>" + "<w:latentStyles w:defLockedState=\"0\" w:defUIPriority=\"99\" w:defSemiHidden=\"0\" w:defUnhideWhenUsed=\"0\" w:defQFormat=\"0\" w:count=\"376\">" + "<w:lsdException w:name=\"Normal\" w:uiPriority=\"0\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"heading 1\" w:uiPriority=\"9\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"heading 2\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"heading 3\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"heading 4\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"heading 5\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"heading 6\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"heading 7\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"heading 8\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"heading 9\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"index 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"index 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"index 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"index 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"index 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"index 6\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"index 7\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"index 8\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"index 9\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"toc 1\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"toc 2\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"toc 3\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"toc 4\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"toc 5\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"toc 6\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"toc 7\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"toc 8\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"toc 9\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Normal Indent\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"footnote text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"annotation text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"header\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"footer\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"index heading\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"caption\" w:semiHidden=\"1\" w:uiPriority=\"35\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"table of figures\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"envelope address\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"envelope return\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"footnote reference\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"annotation reference\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"line number\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"page number\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"endnote reference\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"endnote text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"table of authorities\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"macro\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"toa heading\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Bullet\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Number\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Bullet 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Bullet 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Bullet 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Bullet 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Number 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Number 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Number 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Number 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Title\" w:uiPriority=\"10\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Closing\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Signature\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Default Paragraph Font\" w:semiHidden=\"1\" w:uiPriority=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Body Text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Body Text Indent\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Continue\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Continue 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Continue 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Continue 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"List Continue 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Message Header\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Subtitle\" w:uiPriority=\"11\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Salutation\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Date\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Body Text First Indent\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Body Text First Indent 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Note Heading\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Body Text 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Body Text 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Body Text Indent 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Body Text Indent 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Block Text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Hyperlink\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"FollowedHyperlink\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Strong\" w:uiPriority=\"22\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Emphasis\" w:uiPriority=\"20\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Document Map\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Plain Text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"E-mail Signature\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"HTML Top of Form\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"HTML Bottom of Form\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Normal (Web)\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"HTML Acronym\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"HTML Address\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"HTML Cite\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"HTML Code\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"HTML Definition\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"HTML Keyboard\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"HTML Preformatted\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"HTML Sample\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"HTML Typewriter\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"HTML Variable\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Normal Table\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"annotation subject\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"No List\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Outline List 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Outline List 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Outline List 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Simple 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Simple 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Simple 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Classic 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Classic 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Classic 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Classic 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Colorful 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Colorful 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Colorful 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Columns 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Columns 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Columns 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Columns 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Columns 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Grid 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Grid 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Grid 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Grid 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Grid 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Grid 6\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Grid 7\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Grid 8\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table List 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table List 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table List 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table List 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table List 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table List 6\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table List 7\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table List 8\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table 3D effects 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table 3D effects 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table 3D effects 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Contemporary\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Elegant\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Professional\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Subtle 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Subtle 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Web 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Web 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Web 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Balloon Text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Table Grid\" w:uiPriority=\"39\"/>" + "<w:lsdException w:name=\"Table Theme\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Placeholder Text\" w:semiHidden=\"1\"/>" + "<w:lsdException w:name=\"No Spacing\" w:uiPriority=\"1\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Light Shading\" w:uiPriority=\"60\"/>" + "<w:lsdException w:name=\"Light List\" w:uiPriority=\"61\"/>" + "<w:lsdException w:name=\"Light Grid\" w:uiPriority=\"62\"/>" + "<w:lsdException w:name=\"Medium Shading 1\" w:uiPriority=\"63\"/>" + "<w:lsdException w:name=\"Medium Shading 2\" w:uiPriority=\"64\"/>" + "<w:lsdException w:name=\"Medium List 1\" w:uiPriority=\"65\"/>" + "<w:lsdException w:name=\"Medium List 2\" w:uiPriority=\"66\"/>" + "<w:lsdException w:name=\"Medium Grid 1\" w:uiPriority=\"67\"/>" + "<w:lsdException w:name=\"Medium Grid 2\" w:uiPriority=\"68\"/>" + "<w:lsdException w:name=\"Medium Grid 3\" w:uiPriority=\"69\"/>" + "<w:lsdException w:name=\"Dark List\" w:uiPriority=\"70\"/>" + "<w:lsdException w:name=\"Colorful Shading\" w:uiPriority=\"71\"/>" + "<w:lsdException w:name=\"Colorful List\" w:uiPriority=\"72\"/>" + "<w:lsdException w:name=\"Colorful Grid\" w:uiPriority=\"73\"/>" + "<w:lsdException w:name=\"Light Shading Accent 1\" w:uiPriority=\"60\"/>" + "<w:lsdException w:name=\"Light List Accent 1\" w:uiPriority=\"61\"/>" + "<w:lsdException w:name=\"Light Grid Accent 1\" w:uiPriority=\"62\"/>" + "<w:lsdException w:name=\"Medium Shading 1 Accent 1\" w:uiPriority=\"63\"/>" + "<w:lsdException w:name=\"Medium Shading 2 Accent 1\" w:uiPriority=\"64\"/>" + "<w:lsdException w:name=\"Medium List 1 Accent 1\" w:uiPriority=\"65\"/>" + "<w:lsdException w:name=\"Revision\" w:semiHidden=\"1\"/>" + "<w:lsdException w:name=\"List Paragraph\" w:uiPriority=\"34\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Quote\" w:uiPriority=\"29\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Intense Quote\" w:uiPriority=\"30\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Medium List 2 Accent 1\" w:uiPriority=\"66\"/>" + "<w:lsdException w:name=\"Medium Grid 1 Accent 1\" w:uiPriority=\"67\"/>" + "<w:lsdException w:name=\"Medium Grid 2 Accent 1\" w:uiPriority=\"68\"/>" + "<w:lsdException w:name=\"Medium Grid 3 Accent 1\" w:uiPriority=\"69\"/>" + "<w:lsdException w:name=\"Dark List Accent 1\" w:uiPriority=\"70\"/>" + "<w:lsdException w:name=\"Colorful Shading Accent 1\" w:uiPriority=\"71\"/>" + "<w:lsdException w:name=\"Colorful List Accent 1\" w:uiPriority=\"72\"/>" + "<w:lsdException w:name=\"Colorful Grid Accent 1\" w:uiPriority=\"73\"/>" + "<w:lsdException w:name=\"Light Shading Accent 2\" w:uiPriority=\"60\"/>" + "<w:lsdException w:name=\"Light List Accent 2\" w:uiPriority=\"61\"/>" + "<w:lsdException w:name=\"Light Grid Accent 2\" w:uiPriority=\"62\"/>" + "<w:lsdException w:name=\"Medium Shading 1 Accent 2\" w:uiPriority=\"63\"/>" + "<w:lsdException w:name=\"Medium Shading 2 Accent 2\" w:uiPriority=\"64\"/>" + "<w:lsdException w:name=\"Medium List 1 Accent 2\" w:uiPriority=\"65\"/>" + "<w:lsdException w:name=\"Medium List 2 Accent 2\" w:uiPriority=\"66\"/>" + "<w:lsdException w:name=\"Medium Grid 1 Accent 2\" w:uiPriority=\"67\"/>" + "<w:lsdException w:name=\"Medium Grid 2 Accent 2\" w:uiPriority=\"68\"/>" + "<w:lsdException w:name=\"Medium Grid 3 Accent 2\" w:uiPriority=\"69\"/>" + "<w:lsdException w:name=\"Dark List Accent 2\" w:uiPriority=\"70\"/>" + "<w:lsdException w:name=\"Colorful Shading Accent 2\" w:uiPriority=\"71\"/>" + "<w:lsdException w:name=\"Colorful List Accent 2\" w:uiPriority=\"72\"/>" + "<w:lsdException w:name=\"Colorful Grid Accent 2\" w:uiPriority=\"73\"/>" + "<w:lsdException w:name=\"Light Shading Accent 3\" w:uiPriority=\"60\"/>" + "<w:lsdException w:name=\"Light List Accent 3\" w:uiPriority=\"61\"/>" + "<w:lsdException w:name=\"Light Grid Accent 3\" w:uiPriority=\"62\"/>" + "<w:lsdException w:name=\"Medium Shading 1 Accent 3\" w:uiPriority=\"63\"/>" + "<w:lsdException w:name=\"Medium Shading 2 Accent 3\" w:uiPriority=\"64\"/>" + "<w:lsdException w:name=\"Medium List 1 Accent 3\" w:uiPriority=\"65\"/>" + "<w:lsdException w:name=\"Medium List 2 Accent 3\" w:uiPriority=\"66\"/>" + "<w:lsdException w:name=\"Medium Grid 1 Accent 3\" w:uiPriority=\"67\"/>" + "<w:lsdException w:name=\"Medium Grid 2 Accent 3\" w:uiPriority=\"68\"/>" + "<w:lsdException w:name=\"Medium Grid 3 Accent 3\" w:uiPriority=\"69\"/>" + "<w:lsdException w:name=\"Dark List Accent 3\" w:uiPriority=\"70\"/>" + "<w:lsdException w:name=\"Colorful Shading Accent 3\" w:uiPriority=\"71\"/>" + "<w:lsdException w:name=\"Colorful List Accent 3\" w:uiPriority=\"72\"/>" + "<w:lsdException w:name=\"Colorful Grid Accent 3\" w:uiPriority=\"73\"/>" + "<w:lsdException w:name=\"Light Shading Accent 4\" w:uiPriority=\"60\"/>" + "<w:lsdException w:name=\"Light List Accent 4\" w:uiPriority=\"61\"/>" + "<w:lsdException w:name=\"Light Grid Accent 4\" w:uiPriority=\"62\"/>" + "<w:lsdException w:name=\"Medium Shading 1 Accent 4\" w:uiPriority=\"63\"/>" + "<w:lsdException w:name=\"Medium Shading 2 Accent 4\" w:uiPriority=\"64\"/>" + "<w:lsdException w:name=\"Medium List 1 Accent 4\" w:uiPriority=\"65\"/>" + "<w:lsdException w:name=\"Medium List 2 Accent 4\" w:uiPriority=\"66\"/>" + "<w:lsdException w:name=\"Medium Grid 1 Accent 4\" w:uiPriority=\"67\"/>" + "<w:lsdException w:name=\"Medium Grid 2 Accent 4\" w:uiPriority=\"68\"/>" + "<w:lsdException w:name=\"Medium Grid 3 Accent 4\" w:uiPriority=\"69\"/>" + "<w:lsdException w:name=\"Dark List Accent 4\" w:uiPriority=\"70\"/>" + "<w:lsdException w:name=\"Colorful Shading Accent 4\" w:uiPriority=\"71\"/>" + "<w:lsdException w:name=\"Colorful List Accent 4\" w:uiPriority=\"72\"/>" + "<w:lsdException w:name=\"Colorful Grid Accent 4\" w:uiPriority=\"73\"/>" + "<w:lsdException w:name=\"Light Shading Accent 5\" w:uiPriority=\"60\"/>" + "<w:lsdException w:name=\"Light List Accent 5\" w:uiPriority=\"61\"/>" + "<w:lsdException w:name=\"Light Grid Accent 5\" w:uiPriority=\"62\"/>" + "<w:lsdException w:name=\"Medium Shading 1 Accent 5\" w:uiPriority=\"63\"/>" + "<w:lsdException w:name=\"Medium Shading 2 Accent 5\" w:uiPriority=\"64\"/>" + "<w:lsdException w:name=\"Medium List 1 Accent 5\" w:uiPriority=\"65\"/>" + "<w:lsdException w:name=\"Medium List 2 Accent 5\" w:uiPriority=\"66\"/>" + "<w:lsdException w:name=\"Medium Grid 1 Accent 5\" w:uiPriority=\"67\"/>" + "<w:lsdException w:name=\"Medium Grid 2 Accent 5\" w:uiPriority=\"68\"/>" + "<w:lsdException w:name=\"Medium Grid 3 Accent 5\" w:uiPriority=\"69\"/>" + "<w:lsdException w:name=\"Dark List Accent 5\" w:uiPriority=\"70\"/>" + "<w:lsdException w:name=\"Colorful Shading Accent 5\" w:uiPriority=\"71\"/>" + "<w:lsdException w:name=\"Colorful List Accent 5\" w:uiPriority=\"72\"/>" + "<w:lsdException w:name=\"Colorful Grid Accent 5\" w:uiPriority=\"73\"/>" + "<w:lsdException w:name=\"Light Shading Accent 6\" w:uiPriority=\"60\"/>" + "<w:lsdException w:name=\"Light List Accent 6\" w:uiPriority=\"61\"/>" + "<w:lsdException w:name=\"Light Grid Accent 6\" w:uiPriority=\"62\"/>" + "<w:lsdException w:name=\"Medium Shading 1 Accent 6\" w:uiPriority=\"63\"/>" + "<w:lsdException w:name=\"Medium Shading 2 Accent 6\" w:uiPriority=\"64\"/>" + "<w:lsdException w:name=\"Medium List 1 Accent 6\" w:uiPriority=\"65\"/>" + "<w:lsdException w:name=\"Medium List 2 Accent 6\" w:uiPriority=\"66\"/>" + "<w:lsdException w:name=\"Medium Grid 1 Accent 6\" w:uiPriority=\"67\"/>" + "<w:lsdException w:name=\"Medium Grid 2 Accent 6\" w:uiPriority=\"68\"/>" + "<w:lsdException w:name=\"Medium Grid 3 Accent 6\" w:uiPriority=\"69\"/>" + "<w:lsdException w:name=\"Dark List Accent 6\" w:uiPriority=\"70\"/>" + "<w:lsdException w:name=\"Colorful Shading Accent 6\" w:uiPriority=\"71\"/>" + "<w:lsdException w:name=\"Colorful List Accent 6\" w:uiPriority=\"72\"/>" + "<w:lsdException w:name=\"Colorful Grid Accent 6\" w:uiPriority=\"73\"/>" + "<w:lsdException w:name=\"Subtle Emphasis\" w:uiPriority=\"19\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Intense Emphasis\" w:uiPriority=\"21\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Subtle Reference\" w:uiPriority=\"31\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Intense Reference\" w:uiPriority=\"32\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Book Title\" w:uiPriority=\"33\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Bibliography\" w:semiHidden=\"1\" w:uiPriority=\"37\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"TOC Heading\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>" + "<w:lsdException w:name=\"Plain Table 1\" w:uiPriority=\"41\"/>" + "<w:lsdException w:name=\"Plain Table 2\" w:uiPriority=\"42\"/>" + "<w:lsdException w:name=\"Plain Table 3\" w:uiPriority=\"43\"/>" + "<w:lsdException w:name=\"Plain Table 4\" w:uiPriority=\"44\"/>" + "<w:lsdException w:name=\"Plain Table 5\" w:uiPriority=\"45\"/>" + "<w:lsdException w:name=\"Grid Table Light\" w:uiPriority=\"40\"/>" + "<w:lsdException w:name=\"Grid Table 1 Light\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"Grid Table 2\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"Grid Table 3\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"Grid Table 4\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"Grid Table 5 Dark\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"Grid Table 6 Colorful\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"Grid Table 7 Colorful\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"Grid Table 1 Light Accent 1\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"Grid Table 2 Accent 1\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"Grid Table 3 Accent 1\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"Grid Table 4 Accent 1\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"Grid Table 5 Dark Accent 1\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"Grid Table 6 Colorful Accent 1\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"Grid Table 7 Colorful Accent 1\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"Grid Table 1 Light Accent 2\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"Grid Table 2 Accent 2\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"Grid Table 3 Accent 2\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"Grid Table 4 Accent 2\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"Grid Table 5 Dark Accent 2\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"Grid Table 6 Colorful Accent 2\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"Grid Table 7 Colorful Accent 2\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"Grid Table 1 Light Accent 3\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"Grid Table 2 Accent 3\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"Grid Table 3 Accent 3\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"Grid Table 4 Accent 3\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"Grid Table 5 Dark Accent 3\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"Grid Table 6 Colorful Accent 3\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"Grid Table 7 Colorful Accent 3\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"Grid Table 1 Light Accent 4\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"Grid Table 2 Accent 4\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"Grid Table 3 Accent 4\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"Grid Table 4 Accent 4\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"Grid Table 5 Dark Accent 4\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"Grid Table 6 Colorful Accent 4\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"Grid Table 7 Colorful Accent 4\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"Grid Table 1 Light Accent 5\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"Grid Table 2 Accent 5\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"Grid Table 3 Accent 5\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"Grid Table 4 Accent 5\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"Grid Table 5 Dark Accent 5\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"Grid Table 6 Colorful Accent 5\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"Grid Table 7 Colorful Accent 5\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"Grid Table 1 Light Accent 6\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"Grid Table 2 Accent 6\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"Grid Table 3 Accent 6\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"Grid Table 4 Accent 6\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"Grid Table 5 Dark Accent 6\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"Grid Table 6 Colorful Accent 6\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"Grid Table 7 Colorful Accent 6\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"List Table 1 Light\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"List Table 2\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"List Table 3\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"List Table 4\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"List Table 5 Dark\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"List Table 6 Colorful\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"List Table 7 Colorful\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"List Table 1 Light Accent 1\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"List Table 2 Accent 1\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"List Table 3 Accent 1\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"List Table 4 Accent 1\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"List Table 5 Dark Accent 1\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"List Table 6 Colorful Accent 1\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"List Table 7 Colorful Accent 1\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"List Table 1 Light Accent 2\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"List Table 2 Accent 2\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"List Table 3 Accent 2\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"List Table 4 Accent 2\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"List Table 5 Dark Accent 2\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"List Table 6 Colorful Accent 2\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"List Table 7 Colorful Accent 2\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"List Table 1 Light Accent 3\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"List Table 2 Accent 3\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"List Table 3 Accent 3\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"List Table 4 Accent 3\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"List Table 5 Dark Accent 3\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"List Table 6 Colorful Accent 3\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"List Table 7 Colorful Accent 3\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"List Table 1 Light Accent 4\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"List Table 2 Accent 4\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"List Table 3 Accent 4\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"List Table 4 Accent 4\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"List Table 5 Dark Accent 4\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"List Table 6 Colorful Accent 4\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"List Table 7 Colorful Accent 4\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"List Table 1 Light Accent 5\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"List Table 2 Accent 5\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"List Table 3 Accent 5\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"List Table 4 Accent 5\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"List Table 5 Dark Accent 5\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"List Table 6 Colorful Accent 5\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"List Table 7 Colorful Accent 5\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"List Table 1 Light Accent 6\" w:uiPriority=\"46\"/>" + "<w:lsdException w:name=\"List Table 2 Accent 6\" w:uiPriority=\"47\"/>" + "<w:lsdException w:name=\"List Table 3 Accent 6\" w:uiPriority=\"48\"/>" + "<w:lsdException w:name=\"List Table 4 Accent 6\" w:uiPriority=\"49\"/>" + "<w:lsdException w:name=\"List Table 5 Dark Accent 6\" w:uiPriority=\"50\"/>" + "<w:lsdException w:name=\"List Table 6 Colorful Accent 6\" w:uiPriority=\"51\"/>" + "<w:lsdException w:name=\"List Table 7 Colorful Accent 6\" w:uiPriority=\"52\"/>" + "<w:lsdException w:name=\"Mention\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Smart Hyperlink\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Hashtag\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Unresolved Mention\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>" + "<w:lsdException w:name=\"Smart Link\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/></w:latentStyles>" + "<w:style w:type=\"paragraph\" w:default=\"1\" w:styleId=\"Normal\">" + "<w:name w:val=\"Normal\"/>" + "<w:qFormat/></w:style>" + "<w:style w:type=\"character\" w:default=\"1\" w:styleId=\"DefaultParagraphFont\">" + "<w:name w:val=\"Default Paragraph Font\"/>" + "<w:uiPriority w:val=\"1\"/>" + "<w:semiHidden/>" + "<w:unhideWhenUsed/></w:style>" + "<w:style w:type=\"table\" w:default=\"1\" w:styleId=\"TableNormal\">" + "<w:name w:val=\"Normal Table\"/>" + "<w:uiPriority w:val=\"99\"/>" + "<w:semiHidden/>" + "<w:unhideWhenUsed/>" + "<w:tblPr>" + "<w:tblInd w:w=\"0\" w:type=\"dxa\"/>" + "<w:tblCellMar>" + "<w:top w:w=\"0\" w:type=\"dxa\"/>" + "<w:left w:w=\"108\" w:type=\"dxa\"/>" + "<w:bottom w:w=\"0\" w:type=\"dxa\"/>" + "<w:right w:w=\"108\" w:type=\"dxa\"/></w:tblCellMar></w:tblPr></w:style>" + "<w:style w:type=\"numbering\" w:default=\"1\" w:styleId=\"NoList\">" + "<w:name w:val=\"No List\"/>" + "<w:uiPriority w:val=\"99\"/>" + "<w:semiHidden/>" + "<w:unhideWhenUsed/></w:style></w:styles>" + }, + + { + "word/webSettings.xml", + "" + "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n" + "" + "<w:webSettings xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:w14=\"http://schemas.microsoft.com/office/word/2010/wordml\" xmlns:w15=\"http://schemas.microsoft.com/office/word/2012/wordml\" xmlns:w16cex=\"http://schemas.microsoft.com/office/word/2018/wordml/cex\" xmlns:w16cid=\"http://schemas.microsoft.com/office/word/2016/wordml/cid\" xmlns:w16=\"http://schemas.microsoft.com/office/word/2018/wordml\" xmlns:w16se=\"http://schemas.microsoft.com/office/word/2015/wordml/symex\" mc:Ignorable=\"w14 w15 w16se w16cid w16 w16cex\">" + "<w:optimizeForBrowser/>" + "<w:allowPNG/></w:webSettings>" + }, + + { + "word/_rels/document.xml.rels", + "" + "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n" + "" + "<Relationships xmlns=\"http://schemas.openxmlformats.org/package/2006/relationships\">" + "<Relationship Id=\"rId3\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings\" Target=\"webSettings.xml\"/>" + "<Relationship Id=\"rId2\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings\" Target=\"settings.xml\"/>" + "<Relationship Id=\"rId1\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles\" Target=\"styles.xml\"/>" + "<Relationship Id=\"rId5\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme\" Target=\"theme/theme1.xml\"/>" + "<Relationship Id=\"rId4\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable\" Target=\"fontTable.xml\"/></Relationships>" + }, + + { + "word/theme/theme1.xml", + "" + "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n" + "" + "<a:theme xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\" name=\"Office Theme\">" + "<a:themeElements>" + "<a:clrScheme name=\"Office\">" + "<a:dk1>" + "<a:sysClr val=\"windowText\" lastClr=\"000000\"/></a:dk1>" + "<a:lt1>" + "<a:sysClr val=\"window\" lastClr=\"FFFFFF\"/></a:lt1>" + "<a:dk2>" + "<a:srgbClr val=\"44546A\"/></a:dk2>" + "<a:lt2>" + "<a:srgbClr val=\"E7E6E6\"/></a:lt2>" + "<a:accent1>" + "<a:srgbClr val=\"4472C4\"/></a:accent1>" + "<a:accent2>" + "<a:srgbClr val=\"ED7D31\"/></a:accent2>" + "<a:accent3>" + "<a:srgbClr val=\"A5A5A5\"/></a:accent3>" + "<a:accent4>" + "<a:srgbClr val=\"FFC000\"/></a:accent4>" + "<a:accent5>" + "<a:srgbClr val=\"5B9BD5\"/></a:accent5>" + "<a:accent6>" + "<a:srgbClr val=\"70AD47\"/></a:accent6>" + "<a:hlink>" + "<a:srgbClr val=\"0563C1\"/></a:hlink>" + "<a:folHlink>" + "<a:srgbClr val=\"954F72\"/></a:folHlink></a:clrScheme>" + "<a:fontScheme name=\"Office\">" + "<a:majorFont>" + "<a:latin typeface=\"Calibri Light\" panose=\"020F0302020204030204\"/>" + "<a:ea typeface=\"\"/>" + "<a:cs typeface=\"\"/>" + "<a:font script=\"Jpan\" typeface=\"游ゴシック Light\"/>" + "<a:font script=\"Hang\" typeface=\"맑은 고딕\"/>" + "<a:font script=\"Hans\" typeface=\"等线 Light\"/>" + "<a:font script=\"Hant\" typeface=\"新細明體\"/>" + "<a:font script=\"Arab\" typeface=\"Times New Roman\"/>" + "<a:font script=\"Hebr\" typeface=\"Times New Roman\"/>" + "<a:font script=\"Thai\" typeface=\"Angsana New\"/>" + "<a:font script=\"Ethi\" typeface=\"Nyala\"/>" + "<a:font script=\"Beng\" typeface=\"Vrinda\"/>" + "<a:font script=\"Gujr\" typeface=\"Shruti\"/>" + "<a:font script=\"Khmr\" typeface=\"MoolBoran\"/>" + "<a:font script=\"Knda\" typeface=\"Tunga\"/>" + "<a:font script=\"Guru\" typeface=\"Raavi\"/>" + "<a:font script=\"Cans\" typeface=\"Euphemia\"/>" + "<a:font script=\"Cher\" typeface=\"Plantagenet Cherokee\"/>" + "<a:font script=\"Yiii\" typeface=\"Microsoft Yi Baiti\"/>" + "<a:font script=\"Tibt\" typeface=\"Microsoft Himalaya\"/>" + "<a:font script=\"Thaa\" typeface=\"MV Boli\"/>" + "<a:font script=\"Deva\" typeface=\"Mangal\"/>" + "<a:font script=\"Telu\" typeface=\"Gautami\"/>" + "<a:font script=\"Taml\" typeface=\"Latha\"/>" + "<a:font script=\"Syrc\" typeface=\"Estrangelo Edessa\"/>" + "<a:font script=\"Orya\" typeface=\"Kalinga\"/>" + "<a:font script=\"Mlym\" typeface=\"Kartika\"/>" + "<a:font script=\"Laoo\" typeface=\"DokChampa\"/>" + "<a:font script=\"Sinh\" typeface=\"Iskoola Pota\"/>" + "<a:font script=\"Mong\" typeface=\"Mongolian Baiti\"/>" + "<a:font script=\"Viet\" typeface=\"Times New Roman\"/>" + "<a:font script=\"Uigh\" typeface=\"Microsoft Uighur\"/>" + "<a:font script=\"Geor\" typeface=\"Sylfaen\"/>" + "<a:font script=\"Armn\" typeface=\"Arial\"/>" + "<a:font script=\"Bugi\" typeface=\"Leelawadee UI\"/>" + "<a:font script=\"Bopo\" typeface=\"Microsoft JhengHei\"/>" + "<a:font script=\"Java\" typeface=\"Javanese Text\"/>" + "<a:font script=\"Lisu\" typeface=\"Segoe UI\"/>" + "<a:font script=\"Mymr\" typeface=\"Myanmar Text\"/>" + "<a:font script=\"Nkoo\" typeface=\"Ebrima\"/>" + "<a:font script=\"Olck\" typeface=\"Nirmala UI\"/>" + "<a:font script=\"Osma\" typeface=\"Ebrima\"/>" + "<a:font script=\"Phag\" typeface=\"Phagspa\"/>" + "<a:font script=\"Syrn\" typeface=\"Estrangelo Edessa\"/>" + "<a:font script=\"Syrj\" typeface=\"Estrangelo Edessa\"/>" + "<a:font script=\"Syre\" typeface=\"Estrangelo Edessa\"/>" + "<a:font script=\"Sora\" typeface=\"Nirmala UI\"/>" + "<a:font script=\"Tale\" typeface=\"Microsoft Tai Le\"/>" + "<a:font script=\"Talu\" typeface=\"Microsoft New Tai Lue\"/>" + "<a:font script=\"Tfng\" typeface=\"Ebrima\"/></a:majorFont>" + "<a:minorFont>" + "<a:latin typeface=\"Calibri\" panose=\"020F0502020204030204\"/>" + "<a:ea typeface=\"\"/>" + "<a:cs typeface=\"\"/>" + "<a:font script=\"Jpan\" typeface=\"游明朝\"/>" + "<a:font script=\"Hang\" typeface=\"맑은 고딕\"/>" + "<a:font script=\"Hans\" typeface=\"等线\"/>" + "<a:font script=\"Hant\" typeface=\"新細明體\"/>" + "<a:font script=\"Arab\" typeface=\"Arial\"/>" + "<a:font script=\"Hebr\" typeface=\"Arial\"/>" + "<a:font script=\"Thai\" typeface=\"Cordia New\"/>" + "<a:font script=\"Ethi\" typeface=\"Nyala\"/>" + "<a:font script=\"Beng\" typeface=\"Vrinda\"/>" + "<a:font script=\"Gujr\" typeface=\"Shruti\"/>" + "<a:font script=\"Khmr\" typeface=\"DaunPenh\"/>" + "<a:font script=\"Knda\" typeface=\"Tunga\"/>" + "<a:font script=\"Guru\" typeface=\"Raavi\"/>" + "<a:font script=\"Cans\" typeface=\"Euphemia\"/>" + "<a:font script=\"Cher\" typeface=\"Plantagenet Cherokee\"/>" + "<a:font script=\"Yiii\" typeface=\"Microsoft Yi Baiti\"/>" + "<a:font script=\"Tibt\" typeface=\"Microsoft Himalaya\"/>" + "<a:font script=\"Thaa\" typeface=\"MV Boli\"/>" + "<a:font script=\"Deva\" typeface=\"Mangal\"/>" + "<a:font script=\"Telu\" typeface=\"Gautami\"/>" + "<a:font script=\"Taml\" typeface=\"Latha\"/>" + "<a:font script=\"Syrc\" typeface=\"Estrangelo Edessa\"/>" + "<a:font script=\"Orya\" typeface=\"Kalinga\"/>" + "<a:font script=\"Mlym\" typeface=\"Kartika\"/>" + "<a:font script=\"Laoo\" typeface=\"DokChampa\"/>" + "<a:font script=\"Sinh\" typeface=\"Iskoola Pota\"/>" + "<a:font script=\"Mong\" typeface=\"Mongolian Baiti\"/>" + "<a:font script=\"Viet\" typeface=\"Arial\"/>" + "<a:font script=\"Uigh\" typeface=\"Microsoft Uighur\"/>" + "<a:font script=\"Geor\" typeface=\"Sylfaen\"/>" + "<a:font script=\"Armn\" typeface=\"Arial\"/>" + "<a:font script=\"Bugi\" typeface=\"Leelawadee UI\"/>" + "<a:font script=\"Bopo\" typeface=\"Microsoft JhengHei\"/>" + "<a:font script=\"Java\" typeface=\"Javanese Text\"/>" + "<a:font script=\"Lisu\" typeface=\"Segoe UI\"/>" + "<a:font script=\"Mymr\" typeface=\"Myanmar Text\"/>" + "<a:font script=\"Nkoo\" typeface=\"Ebrima\"/>" + "<a:font script=\"Olck\" typeface=\"Nirmala UI\"/>" + "<a:font script=\"Osma\" typeface=\"Ebrima\"/>" + "<a:font script=\"Phag\" typeface=\"Phagspa\"/>" + "<a:font script=\"Syrn\" typeface=\"Estrangelo Edessa\"/>" + "<a:font script=\"Syrj\" typeface=\"Estrangelo Edessa\"/>" + "<a:font script=\"Syre\" typeface=\"Estrangelo Edessa\"/>" + "<a:font script=\"Sora\" typeface=\"Nirmala UI\"/>" + "<a:font script=\"Tale\" typeface=\"Microsoft Tai Le\"/>" + "<a:font script=\"Talu\" typeface=\"Microsoft New Tai Lue\"/>" + "<a:font script=\"Tfng\" typeface=\"Ebrima\"/></a:minorFont></a:fontScheme>" + "<a:fmtScheme name=\"Office\">" + "<a:fillStyleLst>" + "<a:solidFill>" + "<a:schemeClr val=\"phClr\"/></a:solidFill>" + "<a:gradFill rotWithShape=\"1\">" + "<a:gsLst>" + "<a:gs pos=\"0\">" + "<a:schemeClr val=\"phClr\">" + "<a:lumMod val=\"110000\"/>" + "<a:satMod val=\"105000\"/>" + "<a:tint val=\"67000\"/></a:schemeClr></a:gs>" + "<a:gs pos=\"50000\">" + "<a:schemeClr val=\"phClr\">" + "<a:lumMod val=\"105000\"/>" + "<a:satMod val=\"103000\"/>" + "<a:tint val=\"73000\"/></a:schemeClr></a:gs>" + "<a:gs pos=\"100000\">" + "<a:schemeClr val=\"phClr\">" + "<a:lumMod val=\"105000\"/>" + "<a:satMod val=\"109000\"/>" + "<a:tint val=\"81000\"/></a:schemeClr></a:gs></a:gsLst>" + "<a:lin ang=\"5400000\" scaled=\"0\"/></a:gradFill>" + "<a:gradFill rotWithShape=\"1\">" + "<a:gsLst>" + "<a:gs pos=\"0\">" + "<a:schemeClr val=\"phClr\">" + "<a:satMod val=\"103000\"/>" + "<a:lumMod val=\"102000\"/>" + "<a:tint val=\"94000\"/></a:schemeClr></a:gs>" + "<a:gs pos=\"50000\">" + "<a:schemeClr val=\"phClr\">" + "<a:satMod val=\"110000\"/>" + "<a:lumMod val=\"100000\"/>" + "<a:shade val=\"100000\"/></a:schemeClr></a:gs>" + "<a:gs pos=\"100000\">" + "<a:schemeClr val=\"phClr\">" + "<a:lumMod val=\"99000\"/>" + "<a:satMod val=\"120000\"/>" + "<a:shade val=\"78000\"/></a:schemeClr></a:gs></a:gsLst>" + "<a:lin ang=\"5400000\" scaled=\"0\"/></a:gradFill></a:fillStyleLst>" + "<a:lnStyleLst>" + "<a:ln w=\"6350\" cap=\"flat\" cmpd=\"sng\" algn=\"ctr\">" + "<a:solidFill>" + "<a:schemeClr val=\"phClr\"/></a:solidFill>" + "<a:prstDash val=\"solid\"/>" + "<a:miter lim=\"800000\"/></a:ln>" + "<a:ln w=\"12700\" cap=\"flat\" cmpd=\"sng\" algn=\"ctr\">" + "<a:solidFill>" + "<a:schemeClr val=\"phClr\"/></a:solidFill>" + "<a:prstDash val=\"solid\"/>" + "<a:miter lim=\"800000\"/></a:ln>" + "<a:ln w=\"19050\" cap=\"flat\" cmpd=\"sng\" algn=\"ctr\">" + "<a:solidFill>" + "<a:schemeClr val=\"phClr\"/></a:solidFill>" + "<a:prstDash val=\"solid\"/>" + "<a:miter lim=\"800000\"/></a:ln></a:lnStyleLst>" + "<a:effectStyleLst>" + "<a:effectStyle>" + "<a:effectLst/></a:effectStyle>" + "<a:effectStyle>" + "<a:effectLst/></a:effectStyle>" + "<a:effectStyle>" + "<a:effectLst>" + "<a:outerShdw blurRad=\"57150\" dist=\"19050\" dir=\"5400000\" algn=\"ctr\" rotWithShape=\"0\">" + "<a:srgbClr val=\"000000\">" + "<a:alpha val=\"63000\"/></a:srgbClr></a:outerShdw></a:effectLst></a:effectStyle></a:effectStyleLst>" + "<a:bgFillStyleLst>" + "<a:solidFill>" + "<a:schemeClr val=\"phClr\"/></a:solidFill>" + "<a:solidFill>" + "<a:schemeClr val=\"phClr\">" + "<a:tint val=\"95000\"/>" + "<a:satMod val=\"170000\"/></a:schemeClr></a:solidFill>" + "<a:gradFill rotWithShape=\"1\">" + "<a:gsLst>" + "<a:gs pos=\"0\">" + "<a:schemeClr val=\"phClr\">" + "<a:tint val=\"93000\"/>" + "<a:satMod val=\"150000\"/>" + "<a:shade val=\"98000\"/>" + "<a:lumMod val=\"102000\"/></a:schemeClr></a:gs>" + "<a:gs pos=\"50000\">" + "<a:schemeClr val=\"phClr\">" + "<a:tint val=\"98000\"/>" + "<a:satMod val=\"130000\"/>" + "<a:shade val=\"90000\"/>" + "<a:lumMod val=\"103000\"/></a:schemeClr></a:gs>" + "<a:gs pos=\"100000\">" + "<a:schemeClr val=\"phClr\">" + "<a:shade val=\"63000\"/>" + "<a:satMod val=\"120000\"/></a:schemeClr></a:gs></a:gsLst>" + "<a:lin ang=\"5400000\" scaled=\"0\"/></a:gradFill></a:bgFillStyleLst></a:fmtScheme></a:themeElements>" + "<a:objectDefaults/>" + "<a:extraClrSchemeLst/>" + "<a:extLst>" + "<a:ext uri=\"{05A4C25C-085E-4340-85A3-A5531E510DB2}\">" + "<thm15:themeFamily xmlns:thm15=\"http://schemas.microsoft.com/office/thememl/2012/main\" name=\"Office Theme\" id=\"{62F939B6-93AF-4DB8-9C6B-D6C7DFDC589F}\" vid=\"{4A3C46E8-61CC-4603-A589-7422A47A8E4A}\"/></a:ext></a:extLst></a:theme>" + }, + +}; + +int docx_template_items_num = 11; diff --git a/extract/src/docx_template.h b/extract/src/docx_template.h new file mode 100644 index 00000000..8a73d5b2 --- /dev/null +++ b/extract/src/docx_template.h @@ -0,0 +1,17 @@ +#ifndef EXTRACT_DOCX_TEMPLATE_H +#define EXTRACT_DOCX_TEMPLATE_H + +/* THIS IS AUTO-GENERATED CODE, DO NOT EDIT. */ + + +typedef struct +{ + const char* name; /* Name of item in docx archive. */ + const char* text; /* Contents of item in docx archive. */ +} docx_template_item_t; + +extern const docx_template_item_t docx_template_items[]; +extern int docx_template_items_num; + + +#endif diff --git a/extract/src/docx_template_build.py b/extract/src/docx_template_build.py new file mode 100755 index 00000000..b528bcb5 --- /dev/null +++ b/extract/src/docx_template_build.py @@ -0,0 +1,210 @@ +#! /usr/bin/env python3 + +''' +Creates C code for creating docx files using internal template docx content. + +Args: + + -i <docx-path> + Set template docx file to extract from. + + -o <out-path> + Set name of output files. + + We write to <out-path>.c and <out-path>.h. +''' + +import io +import os +import re +import sys +import textwrap + + +def system(command): + ''' + Like os.system() but raises exception if command fails. + ''' + e = os.system(command) + if e: + print(f'command failed: {command}') + assert 0 + +def read(path): + ''' + Returns contents of file. We assume it is utf-8. + ''' + with open(path, 'rb') as f: + raw = f.read() + return raw.decode('utf-8') + +def write(text, path): + ''' + Writes text to file. + ''' + parent = os.path.dirname(path) + if parent: + os.makedirs(parent, exist_ok=True) + with open(path, 'w') as f: + f.write(text) + +def write_if_diff(text, path): + try: + old = read(path) + except Exception: + old = None + if text != old: + write(text, path) + +def check_path_safe(path): + ''' + Raises exception unless path consists only of characters and sequences that + are known to be safe for shell commands. + ''' + if '..' in path: + raise Exception(f'Path is unsafe because contains "..": {path!r}') + for c in path: + if not c.isalnum() and c not in '/._-': + #print(f'unsafe character {c} in: {path}') + raise Exception(f'Path is unsafe because contains "{c}": {path!r}') + +def path_safe(path): + ''' + Returns True if path is safe else False. + ''' + try: + check_path_safe(path) + except Exception: + return False + else: + return True + +assert not path_safe('foo;rm -rf *') +assert not path_safe('..') +assert path_safe('foo/bar.x') + + +def main(): + + path_in = None + path_out = None + args = iter(sys.argv[1:]) + while 1: + try: arg = next(args) + except StopIteration: break + if arg == '-h' or arg == '--help': + print(__doc__) + return + elif arg == '--docx-pretty': + d = next(args) + for dirpath, dirnames, filenames in os.walk(d): + for filename in filenames: + if not filename.endswith('.xml'): + continue + path = os.path.join(dirpath, filename) + system(f'xmllint --format {path} > {path}-') + system(f'mv {path}- {path}') + elif arg == '-i': + path_in = next(args) + elif arg == '-o': + path_out = next(args) + else: + assert 0 + + if not path_in: + return + + if not path_in: + raise Exception('Need to specify -i <docx-path>') + if not path_out: + raise Exception('Need to specify -o <out-path>') + + check_path_safe(path_in) + check_path_safe(path_out) + path_temp = f'{path_in}.dir' + os.system(f'rm -r "{path_temp}" 2>/dev/null') + system(f'unzip -q -d {path_temp} {path_in}') + + out_c = io.StringIO() + out_c.write(f'/* THIS IS AUTO-GENERATED CODE, DO NOT EDIT. */\n') + out_c.write(f'\n') + out_c.write(f'#include "{os.path.basename(path_out)}.h"\n') + out_c.write(f'\n') + + + out_c.write('const docx_template_item_t docx_template_items[] =\n') + out_c.write(f'{{\n') + + num_items = 0 + for dirpath, dirnames, filenames in os.walk(path_temp): + dirnames.sort() + + if 0: + # Write code to create directory item in zip. This isn't recognised by zipinfo, and doesn't + # make Word like the file. + # + name = dirpath[ len(path_temp)+1: ] + if name: + if not name.endswith('/'): + name += '/' + out_c3.write(f' if (extract_zip_write_file(zip, NULL, 0, "{name}")) goto end;\n') + + for filename in sorted(filenames): + num_items += 1 + path = os.path.join(dirpath, filename) + name = path[ len(path_temp)+1: ] + text = read(os.path.join(dirpath, filename)) + #print(f'first line is: %r' % text.split("\n")[0]) + text = text.replace('"', '\\"') + + # Looks like template files use \r\n when we interpret them as + # utf-8, so we preserve this in the generated strings. + # + text = text.replace('\r\n', '\\r\\n"\n "') + + # Split on '<' to avoid overly-long lines, which break windows + # compiler. + # + text = re.sub('([<][^/])', '"\n "\\1', text) + + # Remove name of document creator. + # + for tag in 'dc:creator', 'cp:lastModifiedBy': + text = re.sub(f'[<]{tag}[>][^<]*[<]/{tag}[>]', f'<{tag}></{tag}>', text) + + out_c.write(f' {{\n') + out_c.write(f' "{name}",\n') + out_c.write(f' "{text}"\n') + out_c.write(f' }},\n') + out_c.write(f' \n') + + out_c.write(f'}};\n') + out_c.write(f'\n') + out_c.write(f'int docx_template_items_num = {num_items};\n') + + out_c = out_c.getvalue() + write_if_diff(out_c, f'{path_out}.c') + + out_h = io.StringIO() + out_h.write(f'#ifndef EXTRACT_DOCX_TEMPLATE_H\n') + out_h.write(f'#define EXTRACT_DOCX_TEMPLATE_H\n') + out_h.write(f'\n') + out_h.write(f'/* THIS IS AUTO-GENERATED CODE, DO NOT EDIT. */\n') + out_h.write(f'\n') + out_h.write(f'\n') + out_h.write(f'typedef struct\n') + out_h.write(f'{{\n') + out_h.write(f' const char* name; /* Name of item in docx archive. */\n') + out_h.write(f' const char* text; /* Contents of item in docx archive. */\n') + out_h.write(f'}} docx_template_item_t;\n') + out_h.write(f'\n') + out_h.write(f'extern const docx_template_item_t docx_template_items[];\n') + out_h.write(f'extern int docx_template_items_num;\n') + out_h.write(f'\n') + out_h.write(f'\n') + out_h.write(f'#endif\n') + write_if_diff(out_h.getvalue(), f'{path_out}.h') + #os.system(f'rm -r "{path_temp}"') + +if __name__ == '__main__': + main() diff --git a/extract/src/extract-exe.c b/extract/src/extract-exe.c new file mode 100644 index 00000000..d3ac81d0 --- /dev/null +++ b/extract/src/extract-exe.c @@ -0,0 +1,244 @@ +/* Command-line programme for extract_ API. */ + +#include "../include/extract.h" +#include "../include/extract_alloc.h" + +#include "memento.h" +#include "outf.h" + +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +/* Error-detecting equivalent to *out = argv[++i]. +*/ +static int arg_next_string(char** argv, int argc, int* i, const char** out) +{ + if (*i + 1 >= argc) { + printf("Expected arg after: %s\n", argv[*i]); + errno = EINVAL; + return -1; + } + *i += 1; + *out = argv[*i]; + return 0; +} + +/* Error-detecting equivalent to *out = atoi(argv[++i]). +*/ +static int arg_next_int(char** argv, int argc, int* i, int* out) +{ + if (*i + 1 >= argc) { + printf("Expected integer arg after: %s\n", argv[*i]); + errno = EINVAL; + return -1; + } + *i += 1; + *out = atoi(argv[*i]); + return 0; +} + +static void* s_realloc(void* state, void* prev, size_t size) +{ + assert(state == (void*) 123); + return realloc(prev, size); +} + +int main(int argc, char** argv) +{ + int e = -1; + const char* docx_out_path = NULL; + const char* input_path = NULL; + const char* docx_template_path = NULL; + const char* content_path = NULL; + int preserve_dir = 0; + int spacing = 1; + int rotation = 1; + int autosplit = 0; + int images = 1; + int alloc_stats = 0; + int i; + + extract_alloc_t* alloc = NULL; + extract_buffer_t* out_buffer = NULL; + extract_buffer_t* intermediate = NULL; + extract_t* extract = NULL; + + /* Create an allocator so we test the allocation code. */ + if (extract_alloc_create(s_realloc, (void*) 123, &alloc)) + { + assert(0); + } + + for (i=1; i<argc; ++i) { + const char* arg = argv[i]; + if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) { + printf( + "Converts intermediate data from mupdf or gs into a docx file.\n" + "\n" + "We require a file containing XML output from one of these commands:\n" + " mutool draw -F xmltext ...\n" + " gs -sDEVICE=txtwrite -dTextFormat=4 ...\n" + "\n" + "We also requires a template docx file.\n" + "\n" + "Args:\n" + " --alloc-exp-min <bytes>\n" + " Internal: set exponential allocation with minimum alloc size.\n" + " --autosplit 0|1\n" + " If 1, we initially split spans when y coordinate changes. This\n" + " stresses our handling of spans when input is from mupdf.\n" + " -i <intermediate-path>\n" + " Path of XML file containing intermediate text spans.\n" + " -o <docx-path>\n" + " If specified, we generate the specified docx file.\n" + " --o-content <path>\n" + " If specified, we write raw docx content to <path>; this is the\n" + " text that we embed inside the template word/document.xml file\n" + " when generating the docx file.\n" + " -p 0|1\n" + " If 1 and -t <docx-template> is specified, we preserve the\n" + " uncompressed <docx-path>.lib/ directory.\n" + " -r 0|1\n" + " If 1, we we output rotated text inside a rotated drawing. Otherwise\n" + " output text is always horizontal.\n" + " -s 0|1\n" + " If 1, we insert extra vertical space between paragraphs and extra\n" + " vertical space between paragraphs that had different ctm matrices\n" + " in the original document.\n" + " -t <docx-template>\n" + " If specified we use <docx-template> as template. Otheerwise we use" + " an internal template.\n" + " -v <verbose>\n" + " Set verbose level.\n" + " -v-alloc\n" + " Show alloc stats.\n" + ); + if (i + 1 == argc) { + e = 0; + goto end; + } + } + else if (!strcmp(arg, "--alloc-exp-min")) { + int size; + if (arg_next_int(argv, argc, &i, &size)) goto end; + outf("Calling alloc_set_min_alloc_size(%i)", size); + extract_exp_min(extract, size); + } + else if (!strcmp(arg, "--autosplit")) { + if (arg_next_int(argv, argc, &i, &autosplit)) goto end; + } + else if (!strcmp(arg, "-i")) { + if (arg_next_string(argv, argc, &i, &input_path)) goto end; + } + else if (!strcmp(arg, "-o")) { + if (arg_next_string(argv, argc, &i, &docx_out_path)) goto end; + } + else if (!strcmp(arg, "--o-content")) { + if (arg_next_string(argv, argc, &i, &content_path)) goto end; + } + else if (!strcmp(arg, "-p")) { + if (arg_next_int(argv, argc, &i, &preserve_dir)) goto end; + } + else if (!strcmp(arg, "-r")) { + if (arg_next_int(argv, argc, &i, &rotation)) goto end; + } + else if (!strcmp(arg, "-s")) { + if (arg_next_int(argv, argc, &i, &spacing)) goto end; + } + else if (!strcmp(arg, "-t")) { + if (arg_next_string(argv, argc, &i, &docx_template_path)) goto end; + } + else if (!strcmp(arg, "-v")) { + int verbose; + if (arg_next_int(argv, argc, &i, &verbose)) goto end; + outf_verbose_set(verbose); + outf("Have changed verbose to %i", verbose); + } + else if (!strcmp(arg, "--v-alloc")) { + if (arg_next_int(argv, argc, &i, &alloc_stats)) goto end; + } + else { + printf("Unrecognised arg: '%s'\n", arg); + errno = EINVAL; + goto end; + } + + assert(i < argc); + } + + if (!input_path) { + printf("-i <input-path> not specified.\n"); + errno = EINVAL; + goto end; + } + + if (extract_buffer_open_file(alloc, input_path, 0 /*writable*/, &intermediate)) { + printf("Failed to open intermediate file: %s\n", input_path); + goto end; + } + + if (extract_begin(alloc, &extract)) goto end; + if (extract_read_intermediate(extract, intermediate, autosplit)) goto end; + if (extract_process(extract, spacing, rotation, images)) goto end; + + if (content_path) { + if (extract_buffer_open_file(alloc, content_path, 1 /*writable*/, &out_buffer)) goto end; + if (extract_write_content(extract, out_buffer)) goto end; + if (extract_buffer_close(&out_buffer)) goto end; + } + if (docx_out_path) { + if (docx_template_path) { + if (extract_write_template( + extract, + docx_template_path, + docx_out_path, + preserve_dir + )) { + printf("Failed to create docx file: %s\n", docx_out_path); + goto end; + } + } + else { + if (extract_buffer_open_file(alloc, docx_out_path, 1 /*writable*/, &out_buffer)) goto end; + if (extract_write(extract, out_buffer)) { + printf("Failed to create docx file: %s\n", docx_out_path); + goto end; + } + if (extract_buffer_close(&out_buffer)) goto end; + } + } + + e = 0; + end: + + extract_buffer_close(&intermediate); + extract_buffer_close(&out_buffer); + extract_end(&extract); + + if (e) { + printf("Failed (errno=%i): %s\n", errno, strerror(errno)); + return 1; + } + + extract_internal_end(); + + if (alloc_stats) { + extract_alloc_stats_t* stats = extract_alloc_stats(alloc); + printf("Alloc stats: num_malloc=%i num_realloc=%i num_free=%i num_libc_realloc=%i\n", + stats->num_malloc, + stats->num_realloc, + stats->num_free, + stats->num_libc_realloc + ); + } + + extract_alloc_destroy(&alloc); + assert(alloc == NULL); + + printf("Finished.\n"); + return 0; +} diff --git a/extract/src/extract.c b/extract/src/extract.c new file mode 100644 index 00000000..adb3565e --- /dev/null +++ b/extract/src/extract.c @@ -0,0 +1,1226 @@ +#include "../include/extract.h" +#include "../include/extract_alloc.h" + +#include "astring.h" +#include "document.h" +#include "docx.h" +#include "docx_template.h" +#include "mem.h" +#include "memento.h" +#include "outf.h" +#include "xml.h" +#include "zip.h" + + +#include <assert.h> +#include <errno.h> +#include <math.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + + + +double matrix_expansion(matrix_t m) +{ + return sqrt(fabs(m.a * m.d - m.b * m.c)); +} + + +static void char_init(char_t* item) +{ + item->pre_x = 0; + item->pre_y = 0; + item->x = 0; + item->y = 0; + item->ucs = 0; + item->adv = 0; +} + + +const char* span_string(extract_alloc_t* alloc, span_t* span) +{ + static extract_astring_t ret = {0}; + double x0 = 0; + double y0 = 0; + double x1 = 0; + double y1 = 0; + int c0 = 0; + int c1 = 0; + int i; + extract_astring_free(alloc, &ret); + if (!span) { + /* This frees our internal data, and is used by extract_internal_end(). + */ + return NULL; + } + if (span->chars_num) { + c0 = span->chars[0].ucs; + x0 = span->chars[0].x; + y0 = span->chars[0].y; + c1 = span->chars[span->chars_num-1].ucs; + x1 = span->chars[span->chars_num-1].x; + y1 = span->chars[span->chars_num-1].y; + } + { + char buffer[200]; + snprintf(buffer, sizeof(buffer), + "span chars_num=%i (%c:%f,%f)..(%c:%f,%f) font=%s:(%f,%f) wmode=%i chars_num=%i: ", + span->chars_num, + c0, x0, y0, + c1, x1, y1, + span->font_name, + span->trm.a, + span->trm.d, + span->wmode, + span->chars_num + ); + extract_astring_cat(alloc, &ret, buffer); + for (i=0; i<span->chars_num; ++i) { + snprintf( + buffer, + sizeof(buffer), + " i=%i {x=%f adv=%f}", + i, + span->chars[i].x, + span->chars[i].adv + ); + extract_astring_cat(alloc, &ret, buffer); + } + } + extract_astring_cat(alloc, &ret, ": "); + extract_astring_catc(alloc, &ret, '"'); + for (i=0; i<span->chars_num; ++i) { + extract_astring_catc(alloc, &ret, (char) span->chars[i].ucs); + } + extract_astring_catc(alloc, &ret, '"'); + return ret.chars; +} + +int span_append_c(extract_alloc_t* alloc, span_t* span, int c) +{ + char_t* item; + if (extract_realloc2( + alloc, + &span->chars, + sizeof(*span->chars) * span->chars_num, + sizeof(*span->chars) * (span->chars_num + 1) + )) { + return -1; + } + item = &span->chars[span->chars_num]; + span->chars_num += 1; + char_init(item); + item->ucs = c; + return 0; +} + +char_t* span_char_last(span_t* span) +{ + assert(span->chars_num > 0); + return &span->chars[span->chars_num-1]; +} + +/* Unused but useful to keep code here. */ +#if 0 +/* Returns static string containing info about line_t. */ +static const char* line_string(line_t* line) +{ + static extract_astring_t ret = {0}; + char buffer[32]; + extract_astring_free(&ret); + snprintf(buffer, sizeof(buffer), "line spans_num=%i:", line->spans_num); + extract_astring_cat(&ret, buffer); + int i; + for (i=0; i<line->spans_num; ++i) { + extract_astring_cat(&ret, " "); + extract_astring_cat(&ret, span_string(line->spans[i])); + } + return ret.chars; +} +#endif + +/* Returns first span in a line. */ +span_t* line_span_last(line_t* line) +{ + assert(line->spans_num > 0); + return line->spans[line->spans_num - 1]; +} + +span_t* line_span_first(line_t* line) +{ + assert(line->spans_num > 0); + return line->spans[0]; +} + +static void page_free(extract_alloc_t* alloc, page_t* page) +{ + int s; + if (!page) return; + + for (s=0; s<page->spans_num; ++s) { + span_t* span = page->spans[s]; + if (span) { + extract_free(alloc, &span->chars); + extract_free(alloc, &span->font_name); + } + extract_free(alloc, &span); + } + extract_free(alloc, &page->spans); + + { + int l; + for (l=0; l<page->lines_num; ++l) { + line_t* line = page->lines[l]; + extract_free(alloc, &line->spans); + extract_free(alloc, &line); + /* We don't free line->spans->chars[] because already freed via + page->spans. */ + } + } + extract_free(alloc, &page->lines); + + { + int p; + for (p=0; p<page->paragraphs_num; ++p) { + paragraph_t* paragraph = page->paragraphs[p]; + if (paragraph) extract_free(alloc, ¶graph->lines); + extract_free(alloc, ¶graph); + } + } + extract_free(alloc, &page->paragraphs); + + { + int i; + for (i=0; i<page->images_num; ++i) { + extract_free(alloc, &page->images[i].data); + extract_free(alloc, &page->images[i].type); + extract_free(alloc, &page->images[i].id); + extract_free(alloc, &page->images[i].name); + } + } + extract_free(alloc, &page->images); +} + +static span_t* page_span_append(extract_alloc_t* alloc, page_t* page) +/* Appends new empty span_ to an page_t; returns NULL with errno set on error. +*/ +{ + span_t* span; + if (extract_malloc(alloc, &span, sizeof(*span))) return NULL; + span->font_name = NULL; + span->chars = NULL; + span->chars_num = 0; + if (extract_realloc2( + alloc, + &page->spans, + sizeof(*page->spans) * page->spans_num, + sizeof(*page->spans) * (page->spans_num + 1) + )) { + extract_free(alloc, &span); + return NULL; + } + page->spans[page->spans_num] = span; + page->spans_num += 1; + return span; +} + + +static void extract_images_free(extract_alloc_t* alloc, images_t* images) +{ + int i; + for (i=0; i<images->images_num; ++i) { + image_t* image = &images->images[i]; + extract_free(alloc, &image->type); + extract_free(alloc, &image->name); + extract_free(alloc, &image->id); + if (image->data_free) { + image->data_free(image->data_free_handle, image->data); + } + extract_free(alloc, &images->images[i]); + } + extract_free(alloc, &images->images); + extract_free(alloc, &images->imagetypes); + images->images_num = 0; + images->imagetypes_num = 0; +} + + +static int extract_document_images(extract_alloc_t* alloc, document_t* document, images_t* o_images) +/* Moves image_t's from document->page[] to *o_images. + +On return document->page[].images* will be NULL etc. +*/ +{ + int e = -1; + int p; + images_t images = {0}; + outf("images.images_num=%i", images.images_num); + for (p=0; p<document->pages_num; ++p) { + page_t* page = document->pages[p]; + int i; + for (i=0; i<page->images_num; ++i) { + image_t* image; + if (extract_realloc2( + alloc, + &images.images, + sizeof(image_t) * images.images_num, + sizeof(image_t) * (images.images_num + 1) + )) goto end; + image = &page->images[i]; + outf("p=%i i=%i image->name=%s image->id=%s", p, i, image->name, image->id); + assert(image->name); + images.images[images.images_num] = *image; + images.images_num += 1; + + /* Add image type if we haven't seen it before. */ + { + int it; + for (it=0; it<images.imagetypes_num; ++it) { + outf("it=%i images.imagetypes[it]=%s image->type=%s", + it, images.imagetypes[it], image->type); + if (!strcmp(images.imagetypes[it], image->type)) { + break; + } + } + if (it == images.imagetypes_num) { + if (extract_realloc2( + alloc, + &images.imagetypes, + sizeof(char*) * images.imagetypes_num, + sizeof(char*) * (images.imagetypes_num + 1) + )) goto end; + assert(image->type); + images.imagetypes[images.imagetypes_num] = image->type; + images.imagetypes_num += 1; + outf("have added images.imagetypes_num=%i", images.imagetypes_num); + } + } + + /* We've taken ownership of image->* so NULL the original values + here to ensure we can't use things after free. */ + image->type = NULL; + image->name = NULL; + image->id = NULL; + image->data = NULL; + image->data_size = 0; + } + extract_free(alloc, &page->images); + page->images_num = 0; + } + e = 0; + end: + if (e) { + } + else { + *o_images = images; + } + return e; +} + +static void extract_document_free(extract_alloc_t* alloc, document_t* document) +{ + int p; + if (!document) { + return; + } + for (p=0; p<document->pages_num; ++p) { + page_t* page = document->pages[p]; + page_free(alloc, page); + extract_free(alloc, &page); + } + extract_free(alloc, &document->pages); + document->pages = NULL; + document->pages_num = 0; +} + + +/* Returns +1, 0 or -1 depending on sign of x. */ +static int s_sign(double x) +{ + if (x < 0) return -1; + if (x > 0) return +1; + return 0; +} + +int matrix_cmp4(const matrix_t* lhs, const matrix_t* rhs) +{ + int ret; + ret = s_sign(lhs->a - rhs->a); if (ret) return ret; + ret = s_sign(lhs->b - rhs->b); if (ret) return ret; + ret = s_sign(lhs->c - rhs->c); if (ret) return ret; + ret = s_sign(lhs->d - rhs->d); if (ret) return ret; + return 0; +} + + +static point_t multiply_matrix_point(matrix_t m, point_t p) +{ + double x = p.x; + p.x = m.a * x + m.c * p.y; + p.y = m.b * x + m.d * p.y; + return p; +} + +static int s_matrix_read(const char* text, matrix_t* matrix) +{ + int n; + if (!text) { + outf("text is NULL in s_matrix_read()"); + errno = EINVAL; + return -1; + } + n = sscanf(text, + "%lf %lf %lf %lf %lf %lf", + &matrix->a, + &matrix->b, + &matrix->c, + &matrix->d, + &matrix->e, + &matrix->f + ); + if (n != 6) { + errno = EINVAL; + return -1; + } + return 0; +} + + +static void s_document_init(document_t* document) +{ + document->pages = NULL; + document->pages_num = 0; +} + + +static int page_span_end_clean(extract_alloc_t* alloc, page_t* page) +/* Does preliminary processing of the end of the last span in a page; intended +to be called as we load span information. + +Looks at last two char_t's in last span_t of <page>, and either +leaves unchanged, or removes space in last-but-one position, or moves last +char_t into a new span_t. */ +{ + int ret = -1; + span_t* span; + char_t* char_; + double font_size; + double x; + double y; + double err_x; + double err_y; + point_t dir; + + assert(page->spans_num); + span = page->spans[page->spans_num-1]; + assert(span->chars_num); + + /* Last two char_t's are char_[-2] and char_[-1]. */ + char_ = &span->chars[span->chars_num]; + + if (span->chars_num == 1) { + return 0; + } + + font_size = matrix_expansion(span->trm) + * matrix_expansion(span->ctm); + + if (span->wmode) { + dir.x = 0; + dir.y = 1; + } + else { + dir.x = 1; + dir.y = 0; + } + dir = multiply_matrix_point(span->trm, dir); + + x = char_[-2].pre_x + char_[-2].adv * dir.x; + y = char_[-2].pre_y + char_[-2].adv * dir.y; + + err_x = (char_[-1].pre_x - x) / font_size; + err_y = (char_[-1].pre_y - y) / font_size; + + if (span->chars_num >= 2 && span->chars[span->chars_num-2].ucs == ' ') { + int remove_penultimate_space = 0; + if (err_x < -span->chars[span->chars_num-2].adv / 2 + && err_x > -span->chars[span->chars_num-2].adv + ) { + remove_penultimate_space = 1; + } + if ((char_[-1].pre_x - char_[-2].pre_x) / font_size < char_[-1].adv / 10) { + outfx( + "removing penultimate space because space very narrow:" + "char_[-1].pre_x-char_[-2].pre_x=%f font_size=%f" + " char_[-1].adv=%f", + char_[-1].pre_x - char_[-2].pre_x, + font_size, + char_[-1].adv + ); + remove_penultimate_space = 1; + } + if (remove_penultimate_space) { + /* This character overlaps with previous space + character. We discard previous space character - these + sometimes seem to appear in the middle of words for some + reason. */ + outfx("removing space before final char in: %s", + span_string(span)); + span->chars[span->chars_num-2] = span->chars[span->chars_num-1]; + span->chars_num -= 1; + outfx("span is now: %s", span_string(span)); + return 0; + } + } + else if (fabs(err_x) > 0.01 || fabs(err_y) > 0.01) { + /* This character doesn't seem to be a continuation of + previous characters, so split into two spans. This often + splits text incorrectly, but this is corrected later when + we join spans into lines. */ + outfx( + "Splitting last char into new span. font_size=%f dir.x=%f" + " char[-1].pre=(%f, %f) err=(%f, %f): %s", + font_size, + dir.x, + char_[-1].pre_x, + char_[-1].pre_y, + err_x, + err_y, + span_string2(span) + ); + { + span_t* span2 = page_span_append(alloc, page); + if (!span2) goto end; + *span2 = *span; + if (extract_strdup(alloc, span->font_name, &span2->font_name)) goto end; + span2->chars_num = 1; + if (extract_malloc(alloc, &span2->chars, sizeof(char_t) * span2->chars_num)) goto end; + span2->chars[0] = char_[-1]; + span->chars_num -= 1; + } + return 0; + } + ret = 0; + end: + return ret; +} + + +struct extract_t +{ + extract_alloc_t* alloc; + + document_t document; + + int num_spans_split; + /* Number of extra spans from page_span_end_clean(). */ + + int num_spans_autosplit; + /* Number of extra spans from autosplit=1. */ + + double span_offset_x; + double span_offset_y; + /* Only used if autosplit is non-zero. */ + + int image_n; + /* Used to generate unique ids for images. */ + + /* List of strings that are the generated docx content for each page. When + zip_* can handle appending of data, we will be able to remove this list. */ + extract_astring_t* contentss; + int contentss_num; + + images_t images; +}; + + + +int extract_begin( + extract_alloc_t* alloc, + extract_t** pextract + ) +{ + int e = -1; + extract_t* extract; + + /* Use a temporary extract_alloc_t to allocate space for the extract_t. */ + if (extract_malloc(alloc, &extract, sizeof(*extract))) goto end; + + extract_bzero(extract, sizeof(*extract)); + extract->alloc = alloc; + s_document_init(&extract->document); + + /* Start at 10 because template document might use some low-numbered IDs. + */ + extract->image_n = 10; + + e = 0; + + end: + *pextract = (e) ? NULL : extract; + return e; +} + +static void image_free_fn(void* handle, void* image_data) +{ + (void) handle; + free(image_data); +} + +int extract_read_intermediate(extract_t* extract, extract_buffer_t* buffer, int autosplit) +{ + int ret = -1; + + document_t* document = &extract->document; + char* image_data = NULL; + int num_spans = 0; + + extract_xml_tag_t tag; + extract_xml_tag_init(&tag); + + if (extract_xml_pparse_init(extract->alloc, buffer, NULL /*first_line*/)) { + outf("Failed to read start of intermediate data: %s", strerror(errno)); + goto end; + } + /* Data read from <path> is expected to be XML looking like: + + <page> + <span> + <char ...> + <char ...> + ... + </span> + <span> + ... + </span> + ... + </page> + <page> + ... + </page> + ... + + We convert this into a list of page_t's, each containing a list of + span_t's, each containing a list of char_t's. + + While doing this, we do some within-span processing by calling + page_span_end_clean(): + Remove spurious spaces. + Split spans in two where there seem to be large gaps between glyphs. + */ + for(;;) { + page_t* page; + int e = extract_xml_pparse_next(buffer, &tag); + if (e == 1) break; /* EOF. */ + if (e) goto end; + if (!strcmp(tag.name, "?xml")) { + /* We simply skip this if we find it. As of 2020-07-31, mutool adds + this header to mupdf raw output, but gs txtwrite does not include + it. */ + continue; + } + if (strcmp(tag.name, "page")) { + outf("Expected <page> but tag.name='%s'", tag.name); + errno = ESRCH; + goto end; + } + outfx("loading spans for page %i...", document->pages_num); + if (extract_page_begin(extract)) goto end; + page = extract->document.pages[extract->document.pages_num-1]; + if (!page) goto end; + + for(;;) { + if (extract_xml_pparse_next(buffer, &tag)) goto end; + if (!strcmp(tag.name, "/page")) { + num_spans += page->spans_num; + break; + } + if (!strcmp(tag.name, "image")) { + const char* type = extract_xml_tag_attributes_find(&tag, "type"); + if (!type) { + errno = EINVAL; + goto end; + } + outf("image type=%s", type); + if (!strcmp(type, "pixmap")) { + int w; + int h; + int y; + if (extract_xml_tag_attributes_find_int(&tag, "w", &w)) goto end; + if (extract_xml_tag_attributes_find_int(&tag, "h", &h)) goto end; + for (y=0; y<h; ++y) { + int yy; + if (extract_xml_pparse_next(buffer, &tag)) goto end; + if (strcmp(tag.name, "line")) { + outf("Expected <line> but tag.name='%s'", tag.name); + errno = ESRCH; + goto end; + } + if (extract_xml_tag_attributes_find_int(&tag, "y", &yy)) goto end; + if (yy != y) { + outf("Expected <line y=%i> but found <line y=%i>", y, yy); + errno = ESRCH; + goto end; + } + if (extract_xml_pparse_next(buffer, &tag)) goto end; + if (strcmp(tag.name, "/line")) { + outf("Expected </line> but tag.name='%s'", tag.name); + errno = ESRCH; + goto end; + } + } + } + else { + /* Compressed. */ + size_t image_data_size; + const char* c; + size_t i; + if (extract_xml_tag_attributes_find_size(&tag, "datasize", &image_data_size)) goto end; + if (extract_malloc(extract->alloc, &image_data, image_data_size)) goto end; + c = tag.text.chars; + for(i=0;;) { + int byte = 0; + int cc; + cc = *c; + c += 1; + if (cc == ' ' || cc == '\n') continue; + if (cc >= '0' && cc <= '9') byte += cc-'0'; + else if (cc >= 'a' && cc <= 'f') byte += 10 + cc - 'a'; + else goto compressed_error; + byte *= 16; + + cc = *c; + c += 1; + if (cc >= '0' && cc <= '9') byte += cc-'0'; + else if (cc >= 'a' && cc <= 'f') byte += 10 + cc - 'a'; + else goto compressed_error; + + image_data[i] = (char) byte; + i += 1; + if (i == image_data_size) { + break; + } + continue; + + compressed_error: + outf("Unrecognised hex character '%x' at offset %lli in image data", cc, (long long) (c-tag.text.chars)); + errno = EINVAL; + goto end; + } + if (extract_add_image( + extract, + type, + 0 /*x*/, + 0 /*y*/, + 0 /*w*/, + 0 /*h*/, + image_data, + image_data_size, + image_free_fn, + NULL + )) + { + goto end; + } + image_data = NULL; + } + if (extract_xml_pparse_next(buffer, &tag)) goto end; + if (strcmp(tag.name, "/image")) { + outf("Expected </image> but tag.name='%s'", tag.name); + errno = ESRCH; + goto end; + } + continue; + } + if (strcmp(tag.name, "span")) { + outf("Expected <span> but tag.name='%s'", tag.name); + errno = ESRCH; + goto end; + } + + { + matrix_t ctm; + matrix_t trm; + char* font_name; + char* font_name2; + int font_bold; + int font_italic; + int wmode; + if (s_matrix_read(extract_xml_tag_attributes_find(&tag, "ctm"), &ctm)) goto end; + if (s_matrix_read(extract_xml_tag_attributes_find(&tag, "trm"), &trm)) goto end; + font_name = extract_xml_tag_attributes_find(&tag, "font_name"); + if (!font_name) { + outf("Failed to find attribute 'font_name'"); + goto end; + } + font_name2 = strchr(font_name, '+'); + if (font_name2) font_name = font_name2 + 1; + font_bold = strstr(font_name, "-Bold") ? 1 : 0; + font_italic = strstr(font_name, "-Oblique") ? 1 : 0; + if (extract_xml_tag_attributes_find_int(&tag, "wmode", &wmode)) goto end; + if (extract_span_begin( + extract, + font_name, + font_bold, + font_italic, + wmode, + ctm.a, + ctm.b, + ctm.c, + ctm.d, + ctm.e, + ctm.f, + trm.a, + trm.b, + trm.c, + trm.d, + trm.e, + trm.f + )) goto end; + + for(;;) { + double x; + double y; + double adv; + unsigned ucs; + + if (extract_xml_pparse_next(buffer, &tag)) { + outf("Failed to find <char or </span"); + goto end; + } + if (!strcmp(tag.name, "/span")) { + break; + } + if (strcmp(tag.name, "char")) { + errno = ESRCH; + outf("Expected <char> but tag.name='%s'", tag.name); + goto end; + } + + if (extract_xml_tag_attributes_find_double(&tag, "x", &x)) goto end; + if (extract_xml_tag_attributes_find_double(&tag, "y", &y)) goto end; + if (extract_xml_tag_attributes_find_double(&tag, "adv", &adv)) goto end; + if (extract_xml_tag_attributes_find_uint(&tag, "ucs", &ucs)) goto end; + + if (extract_add_char(extract, x, y, ucs, adv, autosplit)) goto end; + } + + extract_xml_tag_free(extract->alloc, &tag); + } + } + if (extract_page_end(extract)) goto end; + outf("page=%i page->num_spans=%i", + document->pages_num, page->spans_num); + } + + outf("num_spans=%i num_spans_split=%i num_spans_autosplit=%i", + num_spans, + extract->num_spans_split, + extract->num_spans_autosplit + ); + + ret = 0; + + end: + extract_xml_tag_free(extract->alloc, &tag); + extract_free(extract->alloc, &image_data); + + return ret; +} + + +int extract_span_begin( + extract_t* extract, + const char* font_name, + int font_bold, + int font_italic, + int wmode, + double ctm_a, + double ctm_b, + double ctm_c, + double ctm_d, + double ctm_e, + double ctm_f, + double trm_a, + double trm_b, + double trm_c, + double trm_d, + double trm_e, + double trm_f + ) +{ + int e = -1; + page_t* page; + span_t* span; + assert(extract->document.pages_num > 0); + page = extract->document.pages[extract->document.pages_num-1]; + span = page_span_append(extract->alloc, page); + if (!span) goto end; + span->ctm.a = ctm_a; + span->ctm.b = ctm_b; + span->ctm.c = ctm_c; + span->ctm.d = ctm_d; + span->ctm.e = ctm_e; + span->ctm.f = ctm_f; + span->trm.a = trm_a; + span->trm.b = trm_b; + span->trm.c = trm_c; + span->trm.d = trm_d; + span->trm.e = trm_e; + span->trm.f = trm_f; + { + const char* ff = strchr(font_name, '+'); + const char* f = (ff) ? ff+1 : font_name; + if (extract_strdup(extract->alloc, f, &span->font_name)) goto end; + span->font_bold = font_bold ? 1 : 0; + span->font_italic = font_italic ? 1 : 0; + span->wmode = wmode ? 1 : 0; + extract->span_offset_x = 0; + extract->span_offset_y = 0; + } + e = 0; + end: + return e; +} + + +int extract_add_char( + extract_t* extract, + double x, + double y, + unsigned ucs, + double adv, + int autosplit + ) +{ + int e = -1; + char_t* char_; + page_t* page = extract->document.pages[extract->document.pages_num-1]; + span_t* span = page->spans[page->spans_num - 1]; + + if (autosplit && y - extract->span_offset_y != 0) { + + double e = span->ctm.e + span->ctm.a * (x - extract->span_offset_x) + + span->ctm.b * (y - extract->span_offset_y); + double f = span->ctm.f + span->ctm.c * (x - extract->span_offset_x) + + span->ctm.d * (y - extract->span_offset_y); + extract->span_offset_x = x; + extract->span_offset_y = y; + outfx("autosplit: char_pre_y=%f offset_y=%f", + char_pre_y, offset_y); + outfx( + "autosplit: changing ctm.{e,f} from (%f, %f) to (%f, %f)", + span->ctm.e, + span->ctm.f, + e, f + ); + if (span->chars_num > 0) { + /* Create new span. */ + span_t* span0 = span; + extract->num_spans_autosplit += 1; + span = page_span_append(extract->alloc, page); + if (!span) goto end; + *span = *span0; + span->chars = NULL; + span->chars_num = 0; + if (extract_strdup(extract->alloc, span0->font_name, &span->font_name)) goto end; + } + span->ctm.e = e; + span->ctm.f = f; + outfx("autosplit: char_pre_y=%f offset_y=%f", + char_pre_y, offset_y); + } + + if (span_append_c(extract->alloc, span, 0 /*c*/)) goto end; + char_ = &span->chars[ span->chars_num-1]; + + char_->pre_x = x - extract->span_offset_x; + char_->pre_y = y - extract->span_offset_y; + + char_->x = span->ctm.a * char_->pre_x + span->ctm.b * char_->pre_y; + char_->y = span->ctm.c * char_->pre_x + span->ctm.d * char_->pre_y; + + char_->adv = adv; + char_->ucs = ucs; + + char_->x += span->ctm.e; + char_->y += span->ctm.f; + + { + int page_spans_num_old = page->spans_num; + if (page_span_end_clean(extract->alloc, page)) goto end; + span = page->spans[page->spans_num-1]; /* fixme: unnecessary. */ + if (page->spans_num != page_spans_num_old) { + extract->num_spans_split += 1; + } + } + e = 0; + + end: + return e; +} + + +int extract_span_end(extract_t* extract) +{ + page_t* page = extract->document.pages[extract->document.pages_num-1]; + span_t* span = page->spans[page->spans_num - 1]; + if (span->chars_num == 0) { + /* Calling code called extract_span_begin() then extract_span_end() + without any call to extract_add_char(). Our joining code assumes that + all spans are non-empty, so we need to delete this span. */ + extract_free(extract->alloc, &page->spans[page->spans_num - 1]); + page->spans_num -= 1; + } + return 0; +} + + +int extract_add_image( + extract_t* extract, + const char* type, + double x, + double y, + double w, + double h, + char* data, + size_t data_size, + extract_image_data_free data_free, + void* data_free_handle + ) +{ + int e = -1; + page_t* page = extract->document.pages[extract->document.pages_num-1]; + image_t image_temp = {0}; + + (void) x; + (void) y; + (void) w; + (void) h; + + extract->image_n += 1; + image_temp.data = data; + image_temp.data_size = data_size; + image_temp.data_free = data_free; + image_temp.data_free_handle = data_free_handle; + if (extract_strdup(extract->alloc, type, &image_temp.type)) goto end; + if (extract_asprintf(extract->alloc, &image_temp.id, "rId%i", extract->image_n) < 0) goto end; + if (extract_asprintf(extract->alloc, &image_temp.name, "image%i.%s", extract->image_n, image_temp.type) < 0) goto end; + + if (extract_realloc2( + extract->alloc, + &page->images, + sizeof(image_t) * page->images_num, + sizeof(image_t) * (page->images_num + 1) + )) goto end; + + page->images[page->images_num] = image_temp; + page->images_num += 1; + outf("page->images_num=%i", page->images_num); + + e = 0; + + end: + + if (e) { + extract_free(extract->alloc, &image_temp.type); + extract_free(extract->alloc, &image_temp.data); + extract_free(extract->alloc, &image_temp.id); + extract_free(extract->alloc, &image_temp.name); + } + + return e; +} + +int extract_page_begin(extract_t* extract) +{ + /* Appends new empty page_t to an extract->document. */ + page_t* page; + if (extract_malloc(extract->alloc, &page, sizeof(page_t))) return -1; + page->spans = NULL; + page->spans_num = 0; + page->lines = NULL; + page->lines_num = 0; + page->paragraphs = NULL; + page->paragraphs_num = 0; + page->images = NULL; + page->images_num = 0; + if (extract_realloc2( + extract->alloc, + &extract->document.pages, + sizeof(page_t*) * extract->document.pages_num + 1, + sizeof(page_t*) * (extract->document.pages_num + 1) + )) { + extract_free(extract->alloc, &page); + return -1; + } + extract->document.pages[extract->document.pages_num] = page; + extract->document.pages_num += 1; + return 0; +} + + +int extract_page_end(extract_t* extract) +{ + (void) extract; + return 0; +} + +int extract_process( + extract_t* extract, + int spacing, + int rotation, + int images + ) +{ + int e = -1; + + if (extract_realloc2( + extract->alloc, + &extract->contentss, + sizeof(*extract->contentss) * extract->contentss_num, + sizeof(*extract->contentss) * (extract->contentss_num + 1) + )) goto end; + extract_astring_init(&extract->contentss[extract->contentss_num]); + extract->contentss_num += 1; + + if (extract_document_join(extract->alloc, &extract->document)) goto end; + + if (extract_document_to_docx_content( + extract->alloc, + &extract->document, + spacing, + rotation, + images, + &extract->contentss[extract->contentss_num - 1] + )) goto end; + + if (extract_document_images(extract->alloc, &extract->document, &extract->images)) goto end; + + { + int i; + for (i=0; i<extract->document.pages_num; ++i) { + page_free(extract->alloc, extract->document.pages[i]); + extract_free(extract->alloc, &extract->document.pages[i]); + } + extract_free(extract->alloc, &extract->document.pages); + extract->document.pages_num = 0; + } + + e = 0; + + end: + return e; +} + +int extract_write(extract_t* extract, extract_buffer_t* buffer) +{ + int e = -1; + extract_zip_t* zip = NULL; + char* text2 = NULL; + int i; + + if (extract_zip_open(buffer, &zip)) goto end; + for (i=0; i<docx_template_items_num; ++i) { + const docx_template_item_t* item = &docx_template_items[i]; + extract_free(extract->alloc, &text2); + outf("i=%i item->name=%s", i, item->name); + if (extract_docx_content_item( + extract->alloc, + extract->contentss, + extract->contentss_num, + &extract->images, + item->name, + item->text, + &text2 + )) { + goto end; + } + + { + const char* text3 = (text2) ? text2 : item->text; + if (extract_zip_write_file(zip, text3, strlen(text3), item->name)) goto end; + } + } + + for (i=0; i<extract->images.images_num; ++i) { + image_t* image = &extract->images.images[i]; + extract_free(extract->alloc, &text2); + if (extract_asprintf(extract->alloc, &text2, "word/media/%s", image->name) < 0) goto end; + if (extract_zip_write_file(zip, image->data, image->data_size, text2)) goto end; + } + + if (extract_zip_close(&zip)) goto end; + assert(!zip); + + e = 0; + + end: + if (e) outf("failed: %s", strerror(errno)); + extract_free(extract->alloc, &text2); + extract_zip_close(&zip); + + return e; +} + +int extract_write_content(extract_t* extract, extract_buffer_t* buffer) +{ + int i; + for (i=0; i<extract->contentss_num; ++i) { + if (extract_buffer_write( + buffer, + extract->contentss[i].chars, + extract->contentss[i].chars_num, + NULL /*o_actual*/ + )) return -1; + } + return 0; +} + +int extract_write_template( + extract_t* extract, + const char* path_template, + const char* path_out, + int preserve_dir + ) +{ + return extract_docx_write_template( + extract->alloc, + extract->contentss, + extract->contentss_num, + &extract->images, + path_template, + path_out, + preserve_dir + ); +} + +void extract_end(extract_t** pextract) +{ + extract_t* extract = *pextract; + if (!extract) return; + extract_document_free(extract->alloc, &extract->document); + + { + int i; + for (i=0; i<extract->contentss_num; ++i) { + extract_astring_free(extract->alloc, &extract->contentss[i]); + } + extract_free(extract->alloc, &extract->contentss); + } + extract_images_free(extract->alloc, &extract->images); + extract_free(extract->alloc, pextract); +} + +void extract_internal_end(void) +{ + span_string(NULL, NULL); +} + +void extract_exp_min(extract_t* extract, size_t size) +{ + extract_alloc_exp_min(extract->alloc, size); +} diff --git a/extract/src/join.c b/extract/src/join.c new file mode 100644 index 00000000..bc02ea21 --- /dev/null +++ b/extract/src/join.c @@ -0,0 +1,951 @@ +#include "../include/extract.h" +#include "../include/extract_alloc.h" + +#include "astring.h" +#include "document.h" +#include "mem.h" +#include "outf.h" + +#include <assert.h> +#include <math.h> +#include <stdio.h> + + +static char_t* span_char_first(span_t* span) +{ + assert(span->chars_num > 0); + return &span->chars[0]; +} + +/* Returns first char_t in a line. */ +static char_t* line_item_first(line_t* line) +{ + span_t* span = line_span_first(line); + return span_char_first(span); +} + +/* Returns last char_t in a line. */ +static char_t* line_item_last(line_t* line) +{ + span_t* span = line_span_last(line); + return span_char_last(span); +} + +static const char* matrix_string(const matrix_t* matrix) +{ + static char ret[64]; + snprintf(ret, sizeof(ret), "{%f %f %f %f %f %f}", + matrix->a, + matrix->b, + matrix->c, + matrix->d, + matrix->e, + matrix->f + ); + return ret; +} + +/* Returns total width of span. */ +static double span_adv_total(span_t* span) +{ + double dx = span_char_last(span)->x - span_char_first(span)->x; + double dy = span_char_last(span)->y - span_char_first(span)->y; + /* We add on the advance of the last item; this avoids us returning zero if + there's only one item. */ + double adv = span_char_last(span)->adv * matrix_expansion(span->trm); + return sqrt(dx*dx + dy*dy) + adv; +} + +/* Returns distance between end of <a> and beginning of <b>. */ +static double spans_adv( + span_t* a_span, + char_t* a, + char_t* b + ) +{ + double delta_x = b->x - a->x; + double delta_y = b->y - a->y; + double s = sqrt( delta_x*delta_x + delta_y*delta_y); + double a_size = a->adv * matrix_expansion(a_span->trm); + s -= a_size; + return s; +} + +static double span_angle(span_t* span) +{ + /* Assume ctm is a rotation matix. */ + double ret = atan2(-span->ctm.c, span->ctm.a); + outfx("ctm.a=%f ctm.b=%f ret=%f", span->ctm.a, span->ctm.b, ret); + return ret; + /* Not sure whether this is right. Inclined text seems to be done by + setting the ctm matrix, so not really sure what trm matrix does. This code + assumes that it also inclines text, but maybe it only rotates individual + glyphs? */ + /*if (span->wmode == 0) { + return atan2(span->trm.b, span->trm.a); + } + else { + return atan2(span->trm.d, span->trm.c); + }*/ +} + +/* Returns static string containing brief info about span_t. */ +static const char* span_string2(extract_alloc_t* alloc, span_t* span) +{ + static extract_astring_t ret = {0}; + int i; + extract_astring_free(alloc, &ret); + extract_astring_catc(alloc, &ret, '"'); + for (i=0; i<span->chars_num; ++i) { + extract_astring_catc(alloc, &ret, (char) span->chars[i].ucs); + } + extract_astring_catc(alloc, &ret, '"'); + return ret.chars; +} + +/* Returns angle of <line>. */ +static double line_angle(line_t* line) +{ + /* All spans in a line must have same angle, so just use the first span. */ + assert(line->spans_num > 0); + return span_angle(line->spans[0]); +} + +/* Returns static string containing brief info about line_t. */ +static const char* line_string2(extract_alloc_t* alloc, line_t* line) +{ + static extract_astring_t ret = {0}; + char buffer[256]; + int i; + extract_astring_free(alloc, &ret); + snprintf(buffer, sizeof(buffer), "line x=%f y=%f spans_num=%i:", + line->spans[0]->chars[0].x, + line->spans[0]->chars[0].y, + line->spans_num + ); + extract_astring_cat(alloc, &ret, buffer); + for (i=0; i<line->spans_num; ++i) { + extract_astring_cat(alloc, &ret, " "); + extract_astring_cat(alloc, &ret, span_string2(alloc, line->spans[i])); + } + return ret.chars; +} + +/* Array of pointers to lines that are aligned and adjacent to each other so as +to form a paragraph. */ +static const char* paragraph_string(extract_alloc_t* alloc, paragraph_t* paragraph) +{ + static extract_astring_t ret = {0}; + extract_astring_free(alloc, &ret); + extract_astring_cat(alloc, &ret, "paragraph: "); + if (paragraph->lines_num) { + extract_astring_cat(alloc, &ret, line_string2(alloc, paragraph->lines[0])); + if (paragraph->lines_num > 1) { + extract_astring_cat(alloc, &ret, ".."); + extract_astring_cat( + alloc, + &ret, + line_string2(alloc, paragraph->lines[paragraph->lines_num-1]) + ); + } + } + return ret.chars; +} + +/* Returns first line in paragraph. */ +static line_t* paragraph_line_first(const paragraph_t* paragraph) +{ + assert(paragraph->lines_num); + return paragraph->lines[0]; +} + +/* Returns last line in paragraph. */ +static line_t* paragraph_line_last(const paragraph_t* paragraph) +{ + assert(paragraph->lines_num); + return paragraph->lines[ paragraph->lines_num-1]; +} + + + +/* Things for direct conversion of text spans into lines and paragraphs. */ + +/* Returns 1 if lines have same wmode and are at the same angle, else 0. + +todo: allow small epsilon? */ +static int lines_are_compatible( + line_t* a, + line_t* b, + double angle_a, + int verbose + ) +{ + if (a == b) return 0; + if (!a->spans || !b->spans) return 0; + if (line_span_first(a)->wmode != line_span_first(b)->wmode) { + return 0; + } + if (matrix_cmp4( + &line_span_first(a)->ctm, + &line_span_first(b)->ctm + )) { + if (verbose) { + outf("ctm's differ:"); + outf(" %f %f %f %f %f %f", + line_span_first(a)->ctm.a, + line_span_first(a)->ctm.b, + line_span_first(a)->ctm.c, + line_span_first(a)->ctm.d, + line_span_first(a)->ctm.e, + line_span_first(a)->ctm.f + ); + outf(" %f %f %f %f %f %f", + line_span_first(b)->ctm.a, + line_span_first(b)->ctm.b, + line_span_first(b)->ctm.c, + line_span_first(b)->ctm.d, + line_span_first(b)->ctm.e, + line_span_first(b)->ctm.f + ); + } + return 0; + } + { + double angle_b = span_angle(line_span_first(b)); + if (angle_b != angle_a) { + outfx("%s:%i: angles differ"); + return 0; + } + } + return 1; +} + + +/* Creates representation of span_t's that consists of a list of line_t's, with +each line_t contains pointers to a list of span_t's. + +We only join spans that are at the same angle and are aligned. + +On entry: + Original value of *o_lines and *o_lines_num are ignored. + + <spans> points to array of <spans_num> span_t*'s, each pointing to + an span_t. + +On exit: + If we succeed, we return 0, with *o_lines pointing to array of *o_lines_num + line_t*'s, each pointing to an line_t. + + Otherwise we return -1 with errno set. *o_lines and *o_lines_num are + undefined. +*/ +static int make_lines( + extract_alloc_t* alloc, + span_t** spans, + int spans_num, + line_t*** o_lines, + int* o_lines_num + ) +{ + int ret = -1; + + /* Make an line_t for each span. Then we will join some of these + line_t's together before returning. */ + int lines_num = spans_num; + line_t** lines = NULL; + int a; + int num_compatible; + int num_joins; + if (extract_malloc(alloc, &lines, sizeof(*lines) * lines_num)) goto end; + + /* Ensure we can clean up after error. */ + for (a=0; a<lines_num; ++a) { + lines[a] = NULL; + } + for (a=0; a<lines_num; ++a) { + if (extract_malloc(alloc, &lines[a], sizeof(line_t))) goto end; + lines[a]->spans_num = 0; + if (extract_malloc(alloc, &lines[a]->spans, sizeof(span_t*) * 1)) goto end; + lines[a]->spans_num = 1; + lines[a]->spans[0] = spans[a]; + outfx("initial line a=%i: %s", a, line_string(lines[a])); + } + + num_compatible = 0; + + /* For each line, look for nearest aligned line, and append if found. */ + num_joins = 0; + for (a=0; a<lines_num; ++a) { + int b; + int verbose = 0; + int nearest_line_b = -1; + double nearest_adv = 0; + line_t* nearest_line = NULL; + span_t* span_a; + double angle_a; + + line_t* line_a = lines[a]; + if (!line_a) { + continue; + } + + if (0 && a < 1) verbose = 1; + outfx("looking at line_a=%s", line_string2(line_a)); + + span_a = line_span_last(line_a); + angle_a = span_angle(span_a); + if (verbose) outf("a=%i angle_a=%f ctm=%s: %s", + a, + angle_a * 180/pi, + matrix_string(&span_a->ctm), + line_string2(alloc, line_a) + ); + + for (b=0; b<lines_num; ++b) { + line_t* line_b = lines[b]; + if (!line_b) { + continue; + } + if (b == a) { + continue; + } + if (verbose) { + outf(""); + outf("a=%i b=%i: nearest_line_b=%i nearest_adv=%f", + a, + b, + nearest_line_b, + nearest_adv + ); + outf(" line_a=%s", line_string2(alloc, line_a)); + outf(" line_b=%s", line_string2(alloc, line_b)); + } + if (!lines_are_compatible(line_a, line_b, angle_a, 0*verbose)) { + if (verbose) outf("not compatible"); + continue; + } + + num_compatible += 1; + { + /* Find angle between last glyph of span_a and first glyph of + span_b. This detects whether the lines are lined up with each other + (as opposed to being at the same angle but in different lines). */ + span_t* span_b = line_span_first(line_b); + double dx = span_char_first(span_b)->x - span_char_last(span_a)->x; + double dy = span_char_first(span_b)->y - span_char_last(span_a)->y; + double angle_a_b = atan2(-dy, dx); + const double angle_tolerance_deg = 1; + if (verbose) { + outf("delta=(%f %f) alast=(%f %f) bfirst=(%f %f): angle_a=%f angle_a_b=%f", + dx, + dy, + span_char_last(span_a)->x, + span_char_last(span_a)->y, + span_char_first(span_b)->x, + span_char_first(span_b)->y, + angle_a * 180 / pi, + angle_a_b * 180 / pi + ); + } + /* Might want to relax this when we test on non-horizontal lines. + */ + if (fabs(angle_a_b - angle_a) * 180 / pi <= angle_tolerance_deg) { + /* Find distance between end of line_a and beginning of line_b. */ + double adv = spans_adv( + span_a, + span_char_last(span_a), + span_char_first(span_b) + ); + if (verbose) outf("nearest_adv=%f. angle_a_b=%f adv=%f", + nearest_adv, + angle_a_b, + adv + ); + if (!nearest_line || adv < nearest_adv) { + nearest_line = line_b; + nearest_adv = adv; + nearest_line_b = b; + } + } + else { + if (verbose) outf( + "angle beyond tolerance: span_a last=(%f,%f) span_b first=(%f,%f) angle_a_b=%g angle_a=%g span_a.trm{a=%f b=%f}", + span_char_last(span_a)->x, + span_char_last(span_a)->y, + span_char_first(span_b)->x, + span_char_first(span_b)->y, + angle_a_b * 180 / pi, + angle_a * 180 / pi, + span_a->trm.a, + span_a->trm.b + ); + } + } + } + + if (nearest_line) { + /* line_a and nearest_line are aligned so we can move line_b's + spans on to the end of line_a. */ + span_t* span_b = line_span_first(nearest_line); + b = nearest_line_b; + if (verbose) outf("found nearest line. a=%i b=%i", a, b); + + if (1 + && span_char_last(span_a)->ucs != ' ' + && span_char_first(span_b)->ucs != ' ' + ) { + /* Find average advance of the two adjacent spans in the two + lines we are considering joining, so that we can decide whether + the distance between them is large enough to merit joining with + a space character). */ + double average_adv = ( + (span_adv_total(span_a) + span_adv_total(span_b)) + / + (double) (span_a->chars_num + span_b->chars_num) + ); + + int insert_space = (nearest_adv > 0.25 * average_adv); + if (insert_space) { + /* Append space to span_a before concatenation. */ + char_t* item; + if (verbose) { + outf("(inserted space) nearest_adv=%f average_adv=%f", + nearest_adv, + average_adv + ); + outf(" a: %s", span_string(alloc, span_a)); + outf(" b: %s", span_string(alloc, span_b)); + } + if (extract_realloc2( + alloc, + &span_a->chars, + sizeof(char_t) * span_a->chars_num, + sizeof(char_t) * (span_a->chars_num + 1) + )) goto end; + item = &span_a->chars[span_a->chars_num]; + span_a->chars_num += 1; + extract_bzero(item, sizeof(*item)); + item->ucs = ' '; + item->adv = nearest_adv; + } + + if (verbose) { + outf("Joining spans a=%i b=%i:", a, b); + outf(" %s", span_string2(alloc, span_a)); + outf(" %s", span_string2(alloc, span_b)); + } + if (0) { + /* Show details about what we're joining. */ + outf( + "joining line insert_space=%i a=%i (y=%f) to line b=%i (y=%f). nearest_adv=%f average_adv=%f", + insert_space, + a, + span_char_last(span_a)->y, + b, + span_char_first(span_b)->y, + nearest_adv, + average_adv + ); + outf("a: %s", span_string(alloc, span_a)); + outf("b: %s", span_string(alloc, span_b)); + } + } + + /* We might end up with two adjacent spaces here. But removing a + space could result in an empty line_t, which could break various + assumptions elsewhere. */ + + if (verbose) { + outf("Joining spans a=%i b=%i:", a, b); + outf(" %s", span_string2(alloc, span_a)); + outf(" %s", span_string2(alloc, span_b)); + } + if (extract_realloc2( + alloc, + &line_a->spans, + sizeof(span_t*) * line_a->spans_num, + sizeof(span_t*) * (line_a->spans_num + nearest_line->spans_num) + )) goto end; + { + int k; + for (k=0; k<nearest_line->spans_num; ++k) { + line_a->spans[ line_a->spans_num + k] = nearest_line->spans[k]; + } + } + line_a->spans_num += nearest_line->spans_num; + + /* Ensure that we ignore nearest_line from now on. */ + extract_free(alloc, &nearest_line->spans); + extract_free(alloc, &nearest_line); + outfx("setting line[b=%i] to NULL", b); + lines[b] = NULL; + + num_joins += 1; + + if (b > a) { + /* We haven't yet tried appending any spans to nearest_line, so + the new extended line_a needs checking again. */ + a -= 1; + } + outfx("new line is:\n %s", line_string2(line_a)); + } + } + + { + /* Remove empty lines left behind after we appended pairs of lines. */ + int from; + int to; + int lines_num_old; + for (from=0, to=0; from<lines_num; ++from) { + if (lines[from]) { + outfx("final line from=%i: %s", + from, + lines[from] ? line_string(lines[from]) : "NULL" + ); + lines[to] = lines[from]; + to += 1; + } + } + lines_num_old = lines_num; + lines_num = to; + if (extract_realloc2( + alloc, + &lines, + sizeof(line_t*) * lines_num_old, + sizeof(line_t*) * lines_num + )) { + /* Should always succeed because we're not increasing allocation size. */ + goto end; + } + } + + *o_lines = lines; + *o_lines_num = lines_num; + ret = 0; + + outf("Turned %i spans into %i lines. num_compatible=%i", + spans_num, + lines_num, + num_compatible + ); + + end: + if (ret) { + /* Free everything. */ + if (lines) { + for (a=0; a<lines_num; ++a) { + if (lines[a]) extract_free(alloc, &lines[a]->spans); + extract_free(alloc, &lines[a]); + } + } + extract_free(alloc, &lines); + } + return ret; +} + + +/* Returns max font size of all span_t's in an line_t. */ +static double line_font_size_max(line_t* line) +{ + double size_max = 0; + int i; + for (i=0; i<line->spans_num; ++i) { + span_t* span = line->spans[i]; + /* fixme: <size> should be double, which changes some output. */ + double size = matrix_expansion(span->trm); + if (size > size_max) { + size_max = size; + } + } + return size_max; +} + + + +/* Find distance between parallel lines line_a and line_b, both at <angle>. + + _-R + _- + A------------_P + \ _- + \ _B + \_- + Q + +A is (ax, ay) +B is (bx, by) +APB and PAR are both <angle>. + +AR and QBP are parallel, and are the lines of text a and b +respectively. + +AQB is a right angle. We need to find AQ. +*/ +static double line_distance( + double ax, + double ay, + double bx, + double by, + double angle + ) +{ + double dx = bx - ax; + double dy = by - ay; + + + return dx * sin(angle) + dy * cos(angle); +} + + +/* A comparison function for use with qsort(), for sorting paragraphs within a +page. */ +static int paragraphs_cmp(const void* a, const void* b) +{ + const paragraph_t* const* a_paragraph = a; + const paragraph_t* const* b_paragraph = b; + line_t* a_line = paragraph_line_first(*a_paragraph); + line_t* b_line = paragraph_line_first(*b_paragraph); + + span_t* a_span = line_span_first(a_line); + span_t* b_span = line_span_first(b_line); + + /* If ctm matrices differ, always return this diff first. Note that we + ignore .e and .f because if data is from ghostscript then .e and .f vary + for each span, and we don't care about these differences. */ + int d = matrix_cmp4(&a_span->ctm, &b_span->ctm); + if (d) return d; + + { + double a_angle = line_angle(a_line); + double b_angle = line_angle(b_line); + if (fabs(a_angle - b_angle) > 3.14/2) { + /* Give up if more than 90 deg. */ + return 0; + } + { + double angle = (a_angle + b_angle) / 2; + double ax = line_item_first(a_line)->x; + double ay = line_item_first(a_line)->y; + double bx = line_item_first(b_line)->x; + double by = line_item_first(b_line)->y; + double distance = line_distance(ax, ay, bx, by, angle); + if (distance > 0) return -1; + if (distance < 0) return +1; + } + } + return 0; +} + + +/* Creates a representation of line_t's that consists of a list of +paragraph_t's. + +We only join lines that are at the same angle and are adjacent. + +On entry: + Original value of *o_paragraphs and *o_paragraphs_num are ignored. + + <lines> points to array of <lines_num> line_t*'s, each pointing to + a line_t. + +On exit: + On sucess, returns zero, *o_paragraphs points to array of *o_paragraphs_num + paragraph_t*'s, each pointing to an paragraph_t. In the + array, paragraph_t's with same angle are sorted. + + On failure, returns -1 with errno set. *o_paragraphs and *o_paragraphs_num + are undefined. +*/ +static int make_paragraphs( + extract_alloc_t* alloc, + line_t** lines, + int lines_num, + paragraph_t*** o_paragraphs, + int* o_paragraphs_num + ) +{ + int ret = -1; + int a; + int num_joins; + paragraph_t** paragraphs = NULL; + + /* Start off with an paragraph_t for each line_t. */ + int paragraphs_num = lines_num; + if (extract_malloc(alloc, ¶graphs, sizeof(*paragraphs) * paragraphs_num)) goto end; + /* Ensure we can clean up after error when setting up. */ + for (a=0; a<paragraphs_num; ++a) { + paragraphs[a] = NULL; + } + /* Set up initial paragraphs. */ + for (a=0; a<paragraphs_num; ++a) { + if (extract_malloc(alloc, ¶graphs[a], sizeof(paragraph_t))) goto end; + paragraphs[a]->lines_num = 0; + if (extract_malloc(alloc, ¶graphs[a]->lines, sizeof(line_t*) * 1)) goto end; + paragraphs[a]->lines_num = 1; + paragraphs[a]->lines[0] = lines[a]; + } + + num_joins = 0; + for (a=0; a<paragraphs_num; ++a) { + paragraph_t* nearest_paragraph; + int nearest_paragraph_b; + double nearest_paragraph_distance; + line_t* line_a; + double angle_a; + int verbose; + int b; + + paragraph_t* paragraph_a = paragraphs[a]; + if (!paragraph_a) { + /* This paragraph is empty - already been appended to a different + paragraph. */ + continue; + } + + nearest_paragraph = NULL; + nearest_paragraph_b = -1; + nearest_paragraph_distance = -1; + assert(paragraph_a->lines_num > 0); + + line_a = paragraph_line_last(paragraph_a); + angle_a = line_angle(line_a); + + verbose = 0; + + /* Look for nearest paragraph_t that could be appended to + paragraph_a. */ + for (b=0; b<paragraphs_num; ++b) { + paragraph_t* paragraph_b = paragraphs[b]; + line_t* line_b; + if (!paragraph_b) { + /* This paragraph is empty - already been appended to a different + paragraph. */ + continue; + } + line_b = paragraph_line_first(paragraph_b); + if (!lines_are_compatible(line_a, line_b, angle_a, 0)) { + continue; + } + + { + double ax = line_item_last(line_a)->x; + double ay = line_item_last(line_a)->y; + double bx = line_item_first(line_b)->x; + double by = line_item_first(line_b)->y; + double distance = line_distance(ax, ay, bx, by, angle_a); + if (verbose) { + outf( + "angle_a=%f a=(%f %f) b=(%f %f) delta=(%f %f) distance=%f:", + angle_a * 180 / pi, + ax, ay, + bx, by, + bx - ax, + by - ay, + distance + ); + outf(" line_a=%s", line_string2(alloc, line_a)); + outf(" line_b=%s", line_string2(alloc, line_b)); + } + if (distance > 0) { + if (nearest_paragraph_distance == -1 + || distance < nearest_paragraph_distance) { + if (verbose) { + outf("updating nearest. distance=%f:", distance); + outf(" line_a=%s", line_string2(alloc, line_a)); + outf(" line_b=%s", line_string2(alloc, line_b)); + } + nearest_paragraph_distance = distance; + nearest_paragraph_b = b; + nearest_paragraph = paragraph_b; + } + } + } + } + + if (nearest_paragraph) { + double line_b_size = line_font_size_max( + paragraph_line_first(nearest_paragraph) + ); + line_t* line_b = paragraph_line_first(nearest_paragraph); + (void) line_b; /* Only used in outfx(). */ + if (nearest_paragraph_distance < 1.4 * line_b_size) { + /* Paragraphs are close together vertically compared to maximum + font size of first line in second paragraph, so we'll join them + into a single paragraph. */ + span_t* a_span; + int a_lines_num_new; + if (verbose) { + outf( + "joing paragraphs. a=(%f,%f) b=(%f,%f) nearest_paragraph_distance=%f line_b_size=%f", + line_item_last(line_a)->x, + line_item_last(line_a)->y, + line_item_first(line_b)->x, + line_item_first(line_b)->y, + nearest_paragraph_distance, + line_b_size + ); + outf(" %s", paragraph_string(alloc, paragraph_a)); + outf(" %s", paragraph_string(alloc, nearest_paragraph)); + outf("paragraph_a ctm=%s", + matrix_string(¶graph_a->lines[0]->spans[0]->ctm) + ); + outf("paragraph_a trm=%s", + matrix_string(¶graph_a->lines[0]->spans[0]->trm) + ); + } + /* Join these two paragraph_t's. */ + a_span = line_span_last(line_a); + if (span_char_last(a_span)->ucs == '-') { + /* remove trailing '-' at end of prev line. char_t doesn't + contain any malloc-heap pointers so this doesn't leak. */ + a_span->chars_num -= 1; + } + else { + /* Insert space before joining adjacent lines. */ + char_t* c_prev; + char_t* c; + if (span_append_c(alloc, line_span_last(line_a), ' ')) goto end; + c_prev = &a_span->chars[ a_span->chars_num-2]; + c = &a_span->chars[ a_span->chars_num-1]; + c->x = c_prev->x + c_prev->adv * a_span->ctm.a; + c->y = c_prev->y + c_prev->adv * a_span->ctm.c; + } + + a_lines_num_new = paragraph_a->lines_num + nearest_paragraph->lines_num; + if (extract_realloc2( + alloc, + ¶graph_a->lines, + sizeof(line_t*) * paragraph_a->lines_num, + sizeof(line_t*) * a_lines_num_new + )) goto end; + { + int i; + for (i=0; i<nearest_paragraph->lines_num; ++i) { + paragraph_a->lines[paragraph_a->lines_num + i] + = nearest_paragraph->lines[i]; + } + } + paragraph_a->lines_num = a_lines_num_new; + + /* Ensure that we skip nearest_paragraph in future. */ + extract_free(alloc, &nearest_paragraph->lines); + extract_free(alloc, &nearest_paragraph); + paragraphs[nearest_paragraph_b] = NULL; + + num_joins += 1; + outfx( + "have joined paragraph a=%i to snearest_paragraph_b=%i", + a, + nearest_paragraph_b + ); + + if (nearest_paragraph_b > a) { + /* We haven't yet tried appending any paragraphs to + nearest_paragraph_b, so the new extended paragraph_a needs + checking again. */ + a -= 1; + } + } + else { + outfx( + "Not joining paragraphs. nearest_paragraph_distance=%f line_b_size=%f", + nearest_paragraph_distance, + line_b_size + ); + } + } + } + + { + /* Remove empty paragraphs. */ + int from; + int to; + int paragraphs_num_old; + for (from=0, to=0; from<paragraphs_num; ++from) { + if (paragraphs[from]) { + paragraphs[to] = paragraphs[from]; + to += 1; + } + } + outfx("paragraphs_num=%i => %i", paragraphs_num, to); + paragraphs_num_old = paragraphs_num; + paragraphs_num = to; + if (extract_realloc2( + alloc, + ¶graphs, + sizeof(paragraph_t*) * paragraphs_num_old, + sizeof(paragraph_t*) * paragraphs_num + )) { + /* Should always succeed because we're not increasing allocation size, but + can fail with memento squeeze. */ + goto end; + } + } + + /* Sort paragraphs so they appear in correct order, using paragraphs_cmp(). + */ + qsort( + paragraphs, + paragraphs_num, + sizeof(paragraph_t*), paragraphs_cmp + ); + + *o_paragraphs = paragraphs; + *o_paragraphs_num = paragraphs_num; + ret = 0; + outf("Turned %i lines into %i paragraphs", + lines_num, + paragraphs_num + ); + + + end: + + if (ret) { + if (paragraphs) { + for (a=0; a<paragraphs_num; ++a) { + if (paragraphs[a]) extract_free(alloc, ¶graphs[a]->lines); + extract_free(alloc, ¶graphs[a]); + } + } + extract_free(alloc, ¶graphs); + } + return ret; +} + +int extract_document_join(extract_alloc_t* alloc, document_t* document) +{ + int ret = -1; + + /* For each page in <document> we join spans into lines and paragraphs. A + line is a list of spans that are at the same angle and on the same line. A + paragraph is a list of lines that are at the same angle and close together. + */ + int p; + for (p=0; p<document->pages_num; ++p) { + page_t* page = document->pages[p]; + outf("processing page %i: num_spans=%i", p, page->spans_num); + + if (make_lines( + alloc, + page->spans, + page->spans_num, + &page->lines, + &page->lines_num + )) goto end; + + if (make_paragraphs( + alloc, + page->lines, + page->lines_num, + &page->paragraphs, + &page->paragraphs_num + )) goto end; + } + + ret = 0; + + end: + + return ret; +} diff --git a/extract/src/mem.c b/extract/src/mem.c new file mode 100644 index 00000000..83b5032c --- /dev/null +++ b/extract/src/mem.c @@ -0,0 +1,51 @@ +#include "../include/extract_alloc.h" + +#include "mem.h" + +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#ifdef _MSC_VER + #include "compat_va_copy.h" +#endif + + +void extract_bzero(void *b, size_t len) +{ + memset(b, 0, len); +} + +int extract_vasprintf(extract_alloc_t* alloc, char** out, const char* format, va_list va) +{ + int n; + int n2; + va_list va2; + va_copy(va2, va); + n = vsnprintf(NULL, 0, format, va); + if (n < 0) return n; + if (extract_malloc(alloc, out, n + 1)) return -1; + n2 = vsnprintf(*out, n + 1, format, va2); + va_end(va2); + assert(n2 == n); + return n2; +} + + +int extract_asprintf(extract_alloc_t* alloc, char** out, const char* format, ...) +{ + va_list va; + int ret; + va_start(va, format); + ret = extract_vasprintf(alloc, out, format, va); + va_end(va); + return ret; +} + +int extract_strdup(extract_alloc_t* alloc, const char* s, char** o_out) +{ + size_t l = strlen(s) + 1; + if (extract_malloc(alloc, o_out, l)) return -1; + memcpy(*o_out, s, l); + return 0; +} diff --git a/extract/src/mem.h b/extract/src/mem.h new file mode 100644 index 00000000..59729b1a --- /dev/null +++ b/extract/src/mem.h @@ -0,0 +1,14 @@ +#ifndef EXTRACT_MEM_H +#define EXTRACT_MEM_H + +#include <stdarg.h> +#include <string.h> + +void extract_bzero(void *b, size_t len); + +int extract_vasprintf(extract_alloc_t* alloc, char** out, const char* format, va_list va); +int extract_asprintf(extract_alloc_t* alloc, char** out, const char* format, ...); + +int extract_strdup(extract_alloc_t* alloc, const char* s, char** o_out); + +#endif diff --git a/extract/src/memento.c b/extract/src/memento.c new file mode 100644 index 00000000..e62744be --- /dev/null +++ b/extract/src/memento.c @@ -0,0 +1,3574 @@ +/* Copyright (C) 2009-2020 Artifex Software, Inc. + All Rights Reserved. + + This software is provided AS-IS with no warranty, either express or + implied. + + This software is distributed under license and may not be copied, modified + or distributed except as expressly authorized under the terms of that + license. Refer to licensing information at http://www.artifex.com + or contact Artifex Software, Inc., 1305 Grant Avenue - Suite 200, + Novato, CA 94945, U.S.A., +1(415)492-9861, for further information. +*/ + +/* Inspired by Fortify by Simon P Bullen. */ + +/* Set the following if you're only looking for leaks, not memory overwrites + * to speed the operation */ +/* #define MEMENTO_LEAKONLY */ + +/* Set the following to keep extra details about the history of blocks */ +#define MEMENTO_DETAILS + +/* Don't keep blocks around if they'd mean losing more than a quarter of + * the freelist. */ +#define MEMENTO_FREELIST_MAX_SINGLE_BLOCK (MEMENTO_FREELIST_MAX/4) + +#define COMPILING_MEMENTO_C + +/* SHUT UP, MSVC. I KNOW WHAT I AM DOING. */ +#define _CRT_SECURE_NO_WARNINGS + +/* We have some GS specific tweaks; more for the GS build environment than + * anything else. */ +/* #define MEMENTO_GS_HACKS */ + +#ifdef MEMENTO_GS_HACKS +/* For GS we include malloc_.h. Anyone else would just include memento.h */ +#include "malloc_.h" +#include "memory_.h" +int atexit(void (*)(void)); +#else +#ifdef MEMENTO_MUPDF_HACKS +#include "mupdf/memento.h" +#else +#include "memento.h" +#endif +#include <stdio.h> +#endif +#ifndef _MSC_VER +#include <stdint.h> +#include <limits.h> +#include <unistd.h> +#endif + +#include <errno.h> +#include <stdlib.h> +#include <stdarg.h> +#include <string.h> + +#ifdef __ANDROID__ +#define MEMENTO_ANDROID +#include <stdio.h> +#endif + +/* Hacks to portably print large sizes */ +#ifdef _MSC_VER +#define FMTZ "%llu" +#define FMTZ_CAST _int64 +#define FMTP "0x%p" +#else +#define FMTZ "%zu" +#define FMTZ_CAST size_t +#define FMTP "%p" +#endif + +#define UB(x) ((intptr_t)((x) & 0xFF)) +#define B2I(x) (UB(x) | (UB(x)<<8) | (UB(x)<<16) | (UB(x)<<24)) +#define B2P(x) ((void *)(B2I(x) | ((B2I(x)<<16)<<16))) +#define MEMENTO_PREFILL_UBYTE ((unsigned char)(MEMENTO_PREFILL)) +#define MEMENTO_PREFILL_USHORT (((unsigned short)MEMENTO_PREFILL_UBYTE) | (((unsigned short)MEMENTO_PREFILL_UBYTE)<<8)) +#define MEMENTO_PREFILL_UINT (((unsigned int)MEMENTO_PREFILL_USHORT) | (((unsigned int)MEMENTO_PREFILL_USHORT)<<16)) +#define MEMENTO_PREFILL_PTR (void *)(((uintptr_t)MEMENTO_PREFILL_UINT) | ((((uintptr_t)MEMENTO_PREFILL_UINT)<<16)<<16)) +#define MEMENTO_POSTFILL_UBYTE ((unsigned char)(MEMENTO_POSTFILL)) +#define MEMENTO_POSTFILL_USHORT (((unsigned short)MEMENTO_POSTFILL_UBYTE) | (((unsigned short)MEMENTO_POSTFILL_UBYTE)<<8)) +#define MEMENTO_POSTFILL_UINT (((unsigned int)MEMENTO_POSTFILL_USHORT) | (((unsigned int)MEMENTO_POSTFILL_USHORT)<<16)) +#define MEMENTO_POSTFILL_PTR (void *)(((uintptr_t)MEMENTO_POSTFILL_UINT) | ((((uintptr_t)MEMENTO_POSTFILL_UINT)<<16)<<16)) +#define MEMENTO_ALLOCFILL_UBYTE ((unsigned char)(MEMENTO_ALLOCFILL)) +#define MEMENTO_ALLOCFILL_USHORT (((unsigned short)MEMENTO_ALLOCFILL_UBYTE) | (((unsigned short)MEMENTO_ALLOCFILL_UBYTE)<<8)) +#define MEMENTO_ALLOCFILL_UINT (((unsigned int)MEMENTO_ALLOCFILL_USHORT) | (((unsigned int)MEMENTO_ALLOCFILL_USHORT)<<16)) +#define MEMENTO_ALLOCFILL_PTR (void *)(((uintptr_t)MEMENTO_ALLOCFILL_UINT) | ((((uintptr_t)MEMENTO_ALLOCFILL_UINT)<<16)<<16)) +#define MEMENTO_FREEFILL_UBYTE ((unsigned char)(MEMENTO_FREEFILL)) +#define MEMENTO_FREEFILL_USHORT (((unsigned short)MEMENTO_FREEFILL_UBYTE) | (((unsigned short)MEMENTO_FREEFILL_UBYTE)<<8)) +#define MEMENTO_FREEFILL_UINT (((unsigned int)MEMENTO_FREEFILL_USHORT) | (((unsigned int)MEMENTO_FREEFILL_USHORT)<<16)) +#define MEMENTO_FREEFILL_PTR (void *)(((uintptr_t)MEMENTO_FREEFILL_UINT) | ((((uintptr_t)MEMENTO_FREEFILL_UINT)<<16)<<16)) + +#ifdef MEMENTO + +#ifndef MEMENTO_CPP_EXTRAS_ONLY + +#ifdef MEMENTO_ANDROID +#include <android/log.h> + +static char log_buffer[4096]; +static int log_fill = 0; + +static char log_buffer2[4096]; + +static int +android_fprintf(FILE *file, const char *fmt, ...) +{ + va_list args; + char *p, *q; + + va_start(args, fmt); + vsnprintf(log_buffer2, sizeof(log_buffer2)-1, fmt, args); + va_end(args); + + /* Ensure we are always null terminated */ + log_buffer2[sizeof(log_buffer2)-1] = 0; + + p = log_buffer2; + q = p; + do + { + /* Find the end of the string, or the next \n */ + while (*p && *p != '\n') + p++; + + /* We need to output from q to p. Limit ourselves to what + * will fit in the existing */ + if (p - q >= sizeof(log_buffer)-1 - log_fill) + p = q + sizeof(log_buffer)-1 - log_fill; + + memcpy(&log_buffer[log_fill], q, p-q); + log_fill += p-q; + if (*p == '\n') + { + log_buffer[log_fill] = 0; + __android_log_print(ANDROID_LOG_ERROR, "memento", "%s", log_buffer); + usleep(1); + log_fill = 0; + p++; /* Skip over the \n */ + } + else if (log_fill >= sizeof(log_buffer)-1) + { + log_buffer[sizeof(log_buffer2)-1] = 0; + __android_log_print(ANDROID_LOG_ERROR, "memento", "%s", log_buffer); + usleep(1); + log_fill = 0; + } + q = p; + } + while (*p); + + return 0; +} + +#define fprintf android_fprintf +#define MEMENTO_STACKTRACE_METHOD 3 +#endif + +/* _WIN64 defined implies _WIN32 will be */ +#ifdef _WIN32 +#include <windows.h> + +static int +windows_fprintf(FILE *file, const char *fmt, ...) +{ + va_list args; + char text[4096]; + int ret; + + va_start(args, fmt); + ret = vfprintf(file, fmt, args); + va_end(args); + + va_start(args, fmt); + vsnprintf(text, 4096, fmt, args); + OutputDebugStringA(text); + va_end(args); + + return ret; +} + +#define fprintf windows_fprintf +#endif + +#ifndef MEMENTO_STACKTRACE_METHOD +#ifdef __GNUC__ +#define MEMENTO_STACKTRACE_METHOD 1 +#endif +#ifdef _WIN32 +#define MEMENTO_STACKTRACE_METHOD 2 +#endif +#endif + +#if defined(__linux__) || defined(__OpenBSD__) +#define MEMENTO_HAS_FORK +#elif defined(__APPLE__) && defined(__MACH__) +#define MEMENTO_HAS_FORK +#endif + +/* Define the underlying allocators, just in case */ +void *MEMENTO_UNDERLYING_MALLOC(size_t); +void MEMENTO_UNDERLYING_FREE(void *); +void *MEMENTO_UNDERLYING_REALLOC(void *,size_t); +void *MEMENTO_UNDERLYING_CALLOC(size_t,size_t); + +/* And some other standard functions we use. We don't include the header + * files, just in case they pull in unexpected others. */ +int atoi(const char *); +char *getenv(const char *); + +/* How far to search for pointers in each block when calculating nestings */ +/* mupdf needs at least 34000ish (sizeof(fz_shade))/ */ +#define MEMENTO_PTRSEARCH 65536 + +#ifndef MEMENTO_MAXPATTERN +#define MEMENTO_MAXPATTERN 0 +#endif + +#ifdef MEMENTO_GS_HACKS +#include "valgrind.h" +#else +#ifdef HAVE_VALGRIND +#include "valgrind/memcheck.h" +#else +#define VALGRIND_MAKE_MEM_NOACCESS(p,s) do { } while (0==1) +#define VALGRIND_MAKE_MEM_UNDEFINED(p,s) do { } while (0==1) +#define VALGRIND_MAKE_MEM_DEFINED(p,s) do { } while (0==1) +#endif +#endif + +enum { + Memento_PreSize = 16, + Memento_PostSize = 16 +}; + +/* Some compile time checks */ +typedef struct +{ + char MEMENTO_PRESIZE_MUST_BE_A_MULTIPLE_OF_4[Memento_PreSize & 3 ? -1 : 1]; + char MEMENTO_POSTSIZE_MUST_BE_A_MULTIPLE_OF_4[Memento_PostSize & 3 ? -1 : 1]; + char MEMENTO_POSTSIZE_MUST_BE_AT_LEAST_4[Memento_PostSize >= 4 ? 1 : -1]; + char MEMENTO_PRESIZE_MUST_BE_AT_LEAST_4[Memento_PreSize >= 4 ? 1 : -1]; +} MEMENTO_SANITY_CHECK_STRUCT; + +#define MEMENTO_UINT32 unsigned int +#define MEMENTO_UINT16 unsigned short + +#define MEMENTO_PREFILL_UINT32 ((MEMENTO_UINT32)(MEMENTO_PREFILL | (MEMENTO_PREFILL <<8) | (MEMENTO_PREFILL <<16) |(MEMENTO_PREFILL <<24))) +#define MEMENTO_POSTFILL_UINT16 ((MEMENTO_UINT16)(MEMENTO_POSTFILL | (MEMENTO_POSTFILL<<8))) +#define MEMENTO_POSTFILL_UINT32 ((MEMENTO_UINT32)(MEMENTO_POSTFILL | (MEMENTO_POSTFILL<<8) | (MEMENTO_POSTFILL<<16) |(MEMENTO_POSTFILL<<24))) +#define MEMENTO_FREEFILL_UINT16 ((MEMENTO_UINT16)(MEMENTO_FREEFILL | (MEMENTO_FREEFILL<<8))) +#define MEMENTO_FREEFILL_UINT32 ((MEMENTO_UINT32)(MEMENTO_FREEFILL | (MEMENTO_FREEFILL<<8) | (MEMENTO_FREEFILL<<16) |(MEMENTO_FREEFILL<<24))) + +enum { + Memento_Flag_OldBlock = 1, + Memento_Flag_HasParent = 2, + Memento_Flag_BreakOnFree = 4, + Memento_Flag_BreakOnRealloc = 8, + Memento_Flag_Freed = 16, + Memento_Flag_KnownLeak = 32, + Memento_Flag_Reported = 64 +}; + +enum { + Memento_EventType_malloc = 0, + Memento_EventType_calloc = 1, + Memento_EventType_realloc = 2, + Memento_EventType_free = 3, + Memento_EventType_new = 4, + Memento_EventType_delete = 5, + Memento_EventType_newArray = 6, + Memento_EventType_deleteArray = 7, + Memento_EventType_takeRef = 8, + Memento_EventType_dropRef = 9, + Memento_EventType_reference = 10, + Memento_EventType_strdup = 11, + Memento_EventType_asprintf = 12, + Memento_EventType_vasprintf = 13 +}; + +static const char *eventType[] = +{ + "malloc", + "calloc", + "realloc", + "free", + "new", + "delete", + "new[]", + "delete[]", + "takeRef", + "dropRef", + "reference", + "strdup", + "asprintf", + "vasprintf" +}; + +/* When we list leaked blocks at the end of execution, we search for pointers + * between blocks in order to be able to give a nice nested view. + * Unfortunately, if you have are running your own allocator (such as + * postscript's chunk allocator) you can often find that the header of the + * block always contains pointers to next or previous blocks. This tends to + * mean the nesting displayed is "uninteresting" at best :) + * + * As a hack to get around this, we have a define MEMENTO_SKIP_SEARCH that + * indicates how many bytes to skip over at the start of the chunk. + * This may cause us to miss true nestings, but such is life... + */ +#ifndef MEMENTO_SEARCH_SKIP +#ifdef MEMENTO_GS_HACKS +#define MEMENTO_SEARCH_SKIP (2*sizeof(void *)) +#else +#define MEMENTO_SEARCH_SKIP 0 +#endif +#endif + +#define MEMENTO_CHILD_MAGIC ((Memento_BlkHeader *)('M' | ('3' << 8) | ('m' << 16) | ('3' << 24))) +#define MEMENTO_SIBLING_MAGIC ((Memento_BlkHeader *)('n' | ('t' << 8) | ('0' << 16) | ('!' << 24))) + +#ifdef MEMENTO_DETAILS +typedef struct Memento_BlkDetails Memento_BlkDetails; + +struct Memento_BlkDetails +{ + Memento_BlkDetails *next; + char type; + char count; + int sequence; + void *stack[1]; +}; +#endif /* MEMENTO_DETAILS */ + +typedef struct Memento_BlkHeader Memento_BlkHeader; + +struct Memento_BlkHeader +{ + size_t rawsize; + int sequence; + int lastCheckedOK; + int flags; + Memento_BlkHeader *next; + Memento_BlkHeader *prev; /* Reused as 'parent' when printing nested list */ + + const char *label; + + /* Entries for nesting display calculations. Set to magic + * values at all other time. */ + Memento_BlkHeader *child; + Memento_BlkHeader *sibling; + +#ifdef MEMENTO_DETAILS + Memento_BlkDetails *details; + Memento_BlkDetails **details_tail; +#endif + + char preblk[Memento_PreSize]; +}; + +/* In future this could (should) be a smarter data structure, like, say, + * splay trees. For now, we use a list. + */ +typedef struct Memento_Blocks +{ + Memento_BlkHeader *head; + Memento_BlkHeader *tail; +} Memento_Blocks; + +/* What sort of Mutex should we use? */ +#ifdef MEMENTO_LOCKLESS +typedef int Memento_mutex; + +static void Memento_initMutex(Memento_mutex *m) +{ + (void)m; +} + +#define MEMENTO_DO_LOCK() do { } while (0) +#define MEMENTO_DO_UNLOCK() do { } while (0) + +#else +#if defined(_WIN32) || defined(_WIN64) +/* Windows */ +typedef CRITICAL_SECTION Memento_mutex; + +static void Memento_initMutex(Memento_mutex *m) +{ + InitializeCriticalSection(m); +} + +#define MEMENTO_DO_LOCK() \ + EnterCriticalSection(&memento.mutex) +#define MEMENTO_DO_UNLOCK() \ + LeaveCriticalSection(&memento.mutex) + +#else +#include <pthread.h> +typedef pthread_mutex_t Memento_mutex; + +static void Memento_initMutex(Memento_mutex *m) +{ + pthread_mutex_init(m, NULL); +} + +#define MEMENTO_DO_LOCK() \ + pthread_mutex_lock(&memento.mutex) +#define MEMENTO_DO_UNLOCK() \ + pthread_mutex_unlock(&memento.mutex) + +#endif +#endif + +typedef struct { + int begin; + int end; +} Memento_range; + +/* And our global structure */ +static struct { + int inited; + Memento_Blocks used; + Memento_Blocks free; + size_t freeListSize; + int sequence; + int paranoia; + int paranoidAt; + int countdown; + int lastChecked; + int breakAt; + int failAt; + int failing; + int nextFailAt; + int squeezeAt; + int squeezing; + int segv; + int pattern; + int nextPattern; + int patternBit; + int leaking; + int hideMultipleReallocs; + int abortOnLeak; + int abortOnCorruption; + size_t maxMemory; + size_t alloc; + size_t peakAlloc; + size_t totalAlloc; + size_t numMallocs; + size_t numFrees; + size_t numReallocs; + Memento_mutex mutex; + Memento_range *squeezes; + int squeezes_num; + int squeezes_pos; +} memento; + +#define MEMENTO_EXTRASIZE (sizeof(Memento_BlkHeader) + Memento_PostSize) + +/* Round up size S to the next multiple of N (where N is a power of 2) */ +#define MEMENTO_ROUNDUP(S,N) ((S + N-1)&~(N-1)) + +#define MEMBLK_SIZE(s) MEMENTO_ROUNDUP(s + MEMENTO_EXTRASIZE, MEMENTO_MAXALIGN) + +#define MEMBLK_FROMBLK(B) (&((Memento_BlkHeader*)(void *)(B))[-1]) +#define MEMBLK_TOBLK(B) ((void*)(&((Memento_BlkHeader*)(void*)(B))[1])) +#define MEMBLK_POSTPTR(B) \ + (&((unsigned char *)(void *)(B))[(B)->rawsize + sizeof(Memento_BlkHeader)]) + +enum +{ + SkipStackBackTraceLevels = 4 +}; + +#if defined(MEMENTO_STACKTRACE_METHOD) && MEMENTO_STACKTRACE_METHOD == 1 +extern size_t backtrace(void **, int); +extern void backtrace_symbols_fd(void **, size_t, int); +extern char **backtrace_symbols(void **, size_t); + +#define MEMENTO_BACKTRACE_MAX 256 +static void (*print_stack_value)(void *address); + +/* Libbacktrace gubbins - relies on us having libdl to load the .so */ +#ifdef HAVE_LIBDL +#include <dlfcn.h> + +typedef void (*backtrace_error_callback) (void *data, const char *msg, int errnum); + +typedef struct backtrace_state *(*backtrace_create_state_type)( + const char *filename, int threaded, + backtrace_error_callback error_callback, void *data); + +typedef int (*backtrace_full_callback) (void *data, uintptr_t pc, + const char *filename, int lineno, + const char *function); + +typedef int (*backtrace_pcinfo_type)(struct backtrace_state *state, + uintptr_t pc, + backtrace_full_callback callback, + backtrace_error_callback error_callback, + void *data); + +typedef void (*backtrace_syminfo_callback) (void *data, uintptr_t pc, + const char *symname, + uintptr_t symval, + uintptr_t symsize); + +typedef int (*backtrace_syminfo_type)(struct backtrace_state *state, + uintptr_t addr, + backtrace_syminfo_callback callback, + backtrace_error_callback error_callback, + void *data); + +static backtrace_syminfo_type backtrace_syminfo; +static backtrace_create_state_type backtrace_create_state; +static backtrace_pcinfo_type backtrace_pcinfo; +static struct backtrace_state *my_backtrace_state; +static void *libbt; +static char backtrace_exe[4096]; +static void *current_addr; + +static void error2_cb(void *data, const char *msg, int errnum) +{ + (void)data; + (void)msg; + (void)errnum; +} + +static void syminfo_cb(void *data, uintptr_t pc, const char *symname, uintptr_t symval, uintptr_t symsize) +{ + (void)data; + (void)symval; + (void)symsize; + if (sizeof(void *) == 4) + fprintf(stderr, " 0x%08lx %s\n", pc, symname?symname:"?"); + else + fprintf(stderr, " 0x%016lx %s\n", pc, symname?symname:"?"); +} + +static void error_cb(void *data, const char *msg, int errnum) +{ + (void)data; + (void)msg; + (void)errnum; + backtrace_syminfo(my_backtrace_state, + (uintptr_t)current_addr, + syminfo_cb, + error2_cb, + NULL); +} + +static int full_cb(void *data, uintptr_t pc, const char *fname, int line, const char *fn) +{ + (void)data; + if (sizeof(void *) == 4) + fprintf(stderr, " 0x%08lx %s(%s:%d)\n", pc, fn?fn:"?", fname?fname:"?", line); + else + fprintf(stderr, " 0x%016lx %s(%s:%d)\n", pc, fn?fn:"?", fname?fname:"?", line); + return 0; +} + +static void print_stack_libbt(void *addr) +{ + current_addr = addr; + backtrace_pcinfo(my_backtrace_state, + (uintptr_t)addr, + full_cb, + error_cb, + NULL); +} + +static void print_stack_libbt_failed(void *addr) +{ + char **strings; +#if 0 + /* Let's use a hack from Julian Smith to call gdb to extract the information */ + /* Disabled for now, as I can't make this work. */ + static char command[1024]; + int e; + static int gdb_invocation_failed = 0; + + if (gdb_invocation_failed == 0) + { + snprintf(command, sizeof(command), + //"gdb -q --batch -p=%i -ex 'info line *%p' -ex quit 2>/dev/null", + "gdb -q --batch -p=%i -ex 'info line *%p' -ex quit 2>/dev/null| egrep -v '(Thread debugging using)|(Using host libthread_db library)|(A debugging session is active)|(will be detached)|(Quit anyway)|(No such file or directory)|(^0x)|(^$)'", + getpid(), addr); + printf("%s\n", command); + e = system(command); + if (e == 0) + return; /* That'll do! */ + gdb_invocation_failed = 1; /* If it's failed once, it'll probably keep failing. */ + } +#endif + + /* We couldn't even get gdb! Make do. */ + strings = backtrace_symbols(&addr, 1); + + if (strings == NULL || strings[0] == NULL) + { + if (sizeof(void *) == 4) + fprintf(stderr, " [0x%08lx]\n", (uintptr_t)addr); + else + fprintf(stderr, " [0x%016lx]\n", (uintptr_t)addr); + } + else + { + fprintf(stderr, " %s\n", strings[0]); + } + (free)(strings); +} + +static int init_libbt(void) +{ + static int libbt_inited = 0; + + if (libbt_inited) + return 0; + libbt_inited = 1; + + libbt = dlopen("libbacktrace.so", RTLD_LAZY); + if (libbt == NULL) + libbt = dlopen("/opt/lib/libbacktrace.so", RTLD_LAZY); + if (libbt == NULL) + libbt = dlopen("/lib/libbacktrace.so", RTLD_LAZY); + if (libbt == NULL) + libbt = dlopen("/usr/lib/libbacktrace.so", RTLD_LAZY); + if (libbt == NULL) + libbt = dlopen("/usr/local/lib/libbacktrace.so", RTLD_LAZY); + if (libbt == NULL) + goto fail; + + backtrace_create_state = dlsym(libbt, "backtrace_create_state"); + backtrace_syminfo = dlsym(libbt, "backtrace_syminfo"); + backtrace_pcinfo = dlsym(libbt, "backtrace_pcinfo"); + + if (backtrace_create_state == NULL || + backtrace_syminfo == NULL || + backtrace_pcinfo == NULL) + { + goto fail; + } + + my_backtrace_state = backtrace_create_state(backtrace_exe, + 1 /*BACKTRACE_SUPPORTS_THREADS*/, + error_cb, + NULL); + if (my_backtrace_state == NULL) + goto fail; + + print_stack_value = print_stack_libbt; + + return 1; + + fail: + fprintf(stderr, + "MEMENTO: libbacktrace.so failed to load; backtraces will be sparse.\n" + "MEMENTO: See memento.h for how to rectify this.\n"); + libbt = NULL; + backtrace_create_state = NULL; + backtrace_syminfo = NULL; + print_stack_value = print_stack_libbt_failed; + return 0; +} +#endif + +static void print_stack_default(void *addr) +{ + char **strings = backtrace_symbols(&addr, 1); + + if (strings == NULL || strings[0] == NULL) + { + fprintf(stderr, " ["FMTP"]\n", addr); + } +#ifdef HAVE_LIBDL + else if (strchr(strings[0], ':') == NULL) + { + /* Probably a "path [address]" format string */ + char *s = strchr(strings[0], ' '); + + if (s != strings[0]) + { + memcpy(backtrace_exe, strings[0], s - strings[0]); + backtrace_exe[s-strings[0]] = 0; + init_libbt(); + print_stack_value(addr); + } + } +#endif + else + { + fprintf(stderr, " %s\n", strings[0]); + } + free(strings); +} + +static void Memento_initStacktracer(void) +{ + print_stack_value = print_stack_default; +} + +static int Memento_getStacktrace(void **stack, int *skip) +{ + size_t num; + + num = backtrace(&stack[0], MEMENTO_BACKTRACE_MAX); + + *skip = SkipStackBackTraceLevels; + if (num <= SkipStackBackTraceLevels) + return 0; + return (int)(num-SkipStackBackTraceLevels); +} + +static void Memento_showStacktrace(void **stack, int numberOfFrames) +{ + int i; + + for (i = 0; i < numberOfFrames; i++) + { + print_stack_value(stack[i]); + } +} +#elif defined(MEMENTO_STACKTRACE_METHOD) && MEMENTO_STACKTRACE_METHOD == 2 +#include <Windows.h> + +/* We use DbgHelp.dll rather than DbgHelp.lib. This avoids us needing + * extra link time complications, and enables us to fall back gracefully + * if the DLL cannot be found. + * + * To achieve this we have our own potted versions of the required types + * inline here. + */ +#ifdef _WIN64 +typedef DWORD64 DWORD_NATIVESIZED; +#else +typedef DWORD DWORD_NATIVESIZED; +#endif + +#define MEMENTO_BACKTRACE_MAX 64 + +typedef USHORT (__stdcall *My_CaptureStackBackTraceType)(__in ULONG, __in ULONG, __out PVOID*, __out_opt PULONG); + +typedef struct MY_IMAGEHLP_LINE { + DWORD SizeOfStruct; + PVOID Key; + DWORD LineNumber; + PCHAR FileName; + DWORD_NATIVESIZED Address; +} MY_IMAGEHLP_LINE, *MY_PIMAGEHLP_LINE; + +typedef BOOL (__stdcall *My_SymGetLineFromAddrType)(HANDLE hProcess, DWORD_NATIVESIZED dwAddr, PDWORD pdwDisplacement, MY_PIMAGEHLP_LINE Line); + +typedef struct MY_SYMBOL_INFO { + ULONG SizeOfStruct; + ULONG TypeIndex; // Type Index of symbol + ULONG64 Reserved[2]; + ULONG info; + ULONG Size; + ULONG64 ModBase; // Base Address of module containing this symbol + ULONG Flags; + ULONG64 Value; // Value of symbol, ValuePresent should be 1 + ULONG64 Address; // Address of symbol including base address of module + ULONG Register; // register holding value or pointer to value + ULONG Scope; // scope of the symbol + ULONG Tag; // pdb classification + ULONG NameLen; // Actual length of name + ULONG MaxNameLen; + CHAR Name[1]; // Name of symbol +} MY_SYMBOL_INFO, *MY_PSYMBOL_INFO; + +typedef BOOL (__stdcall *My_SymFromAddrType)(HANDLE hProcess, DWORD64 Address, PDWORD64 Displacement, MY_PSYMBOL_INFO Symbol); +typedef BOOL (__stdcall *My_SymInitializeType)(HANDLE hProcess, PSTR UserSearchPath, BOOL fInvadeProcess); + +static My_CaptureStackBackTraceType Memento_CaptureStackBackTrace; +static My_SymGetLineFromAddrType Memento_SymGetLineFromAddr; +static My_SymFromAddrType Memento_SymFromAddr; +static My_SymInitializeType Memento_SymInitialize; +static HANDLE Memento_process; + +static void Memento_initStacktracer(void) +{ + HMODULE mod = LoadLibrary("kernel32.dll"); + + if (mod == NULL) + return; + Memento_CaptureStackBackTrace = (My_CaptureStackBackTraceType)(GetProcAddress(mod, "RtlCaptureStackBackTrace")); + if (Memento_CaptureStackBackTrace == NULL) + return; + mod = LoadLibrary("Dbghelp.dll"); + if (mod == NULL) { + Memento_CaptureStackBackTrace = NULL; + return; + } + Memento_SymGetLineFromAddr = + (My_SymGetLineFromAddrType)(GetProcAddress(mod, +#ifdef _WIN64 + "SymGetLineFromAddr64" +#else + "SymGetLineFromAddr" +#endif + )); + if (Memento_SymGetLineFromAddr == NULL) { + Memento_CaptureStackBackTrace = NULL; + return; + } + Memento_SymFromAddr = (My_SymFromAddrType)(GetProcAddress(mod, "SymFromAddr")); + if (Memento_SymFromAddr == NULL) { + Memento_CaptureStackBackTrace = NULL; + return; + } + Memento_SymInitialize = (My_SymInitializeType)(GetProcAddress(mod, "SymInitialize")); + if (Memento_SymInitialize == NULL) { + Memento_CaptureStackBackTrace = NULL; + return; + } + Memento_process = GetCurrentProcess(); + Memento_SymInitialize(Memento_process, NULL, TRUE); +} + +static int Memento_getStacktrace(void **stack, int *skip) +{ + if (Memento_CaptureStackBackTrace == NULL) + return 0; + + *skip = 0; + /* Limit us to 63 levels due to windows bug */ + return Memento_CaptureStackBackTrace(SkipStackBackTraceLevels, 63-SkipStackBackTraceLevels, stack, NULL); +} + +static void Memento_showStacktrace(void **stack, int numberOfFrames) +{ + MY_IMAGEHLP_LINE line; + int i; + char symbol_buffer[sizeof(MY_SYMBOL_INFO) + 1024 + 1]; + MY_SYMBOL_INFO *symbol = (MY_SYMBOL_INFO *)symbol_buffer; + + symbol->MaxNameLen = 1024; + symbol->SizeOfStruct = sizeof(MY_SYMBOL_INFO); + line.SizeOfStruct = sizeof(MY_IMAGEHLP_LINE); + for (i = 0; i < numberOfFrames; i++) + { + DWORD64 dwDisplacement64; + DWORD dwDisplacement; + Memento_SymFromAddr(Memento_process, (DWORD64)(stack[i]), &dwDisplacement64, symbol); + Memento_SymGetLineFromAddr(Memento_process, (DWORD_NATIVESIZED)(stack[i]), &dwDisplacement, &line); + fprintf(stderr, " %s in %s:%d\n", symbol->Name, line.FileName, line.LineNumber); + } +} +#elif defined(MEMENTO_STACKTRACE_METHOD) && MEMENTO_STACKTRACE_METHOD == 3 + +#include <unwind.h> +#include <dlfcn.h> + +/* From cxxabi.h */ +extern char* __cxa_demangle(const char* mangled_name, + char* output_buffer, + size_t* length, + int* status); + +static void Memento_initStacktracer(void) +{ +} + +#define MEMENTO_BACKTRACE_MAX 256 + +typedef struct +{ + int count; + void **addr; +} my_unwind_details; + +static _Unwind_Reason_Code unwind_populate_callback(struct _Unwind_Context *context, + void *arg) +{ + my_unwind_details *uw = (my_unwind_details *)arg; + int count = uw->count; + + if (count >= MEMENTO_BACKTRACE_MAX) + return _URC_END_OF_STACK; + + uw->addr[count] = (void *)_Unwind_GetIP(context); + uw->count++; + + return _URC_NO_REASON; +} + +static int Memento_getStacktrace(void **stack, int *skip) +{ + my_unwind_details uw = { 0, stack }; + + *skip = 0; + + /* Collect the backtrace. Deliberately only unwind once, + * and avoid using malloc etc until this completes just + * in case. */ + _Unwind_Backtrace(unwind_populate_callback, &uw); + if (uw.count <= SkipStackBackTraceLevels) + return 0; + + *skip = SkipStackBackTraceLevels; + return uw.count-SkipStackBackTraceLevels; +} + +static void Memento_showStacktrace(void **stack, int numberOfFrames) +{ + int i; + + for (i = 0; i < numberOfFrames; i++) + { + Dl_info info; + if (dladdr(stack[i], &info)) + { + int status = 0; + const char *sym = info.dli_sname ? info.dli_sname : "<unknown>"; + char *demangled = __cxa_demangle(sym, NULL, 0, &status); + int offset = stack[i] - info.dli_saddr; + fprintf(stderr, " ["FMTP"]%s(+0x%x)\n", stack[i], demangled && status == 0 ? demangled : sym, offset); + free(demangled); + } + else + { + fprintf(stderr, " ["FMTP"]\n", stack[i]); + } + } +} + +#else +static void Memento_initStacktracer(void) +{ +} + +static int Memento_getStacktrace(void **stack, int *skip) +{ + *skip = 0; + return 0; +} + +static void Memento_showStacktrace(void **stack, int numberOfFrames) +{ +} +#endif /* MEMENTO_STACKTRACE_METHOD */ + +#ifdef MEMENTO_DETAILS +static void Memento_storeDetails(Memento_BlkHeader *head, int type) +{ + void *stack[MEMENTO_BACKTRACE_MAX]; + Memento_BlkDetails *details; + int count; + int skip; + + if (head == NULL) + return; + +#ifdef MEMENTO_STACKTRACE_METHOD + count = Memento_getStacktrace(stack, &skip); +#else + skip = 0; + count = 0; +#endif + + details = MEMENTO_UNDERLYING_MALLOC(sizeof(*details) + (count-1) * sizeof(void *)); + if (details == NULL) + return; + + if (count) + memcpy(&details->stack, &stack[skip], count * sizeof(void *)); + + details->type = (char)type; + details->count = (char)count; + details->sequence = memento.sequence; + details->next = NULL; + VALGRIND_MAKE_MEM_DEFINED(&head->details_tail, sizeof(head->details_tail)); + *head->details_tail = details; + head->details_tail = &details->next; + VALGRIND_MAKE_MEM_NOACCESS(&head->details_tail, sizeof(head->details_tail)); +} +#endif + +void (Memento_bt)(void) +{ +#ifdef MEMENTO_STACKTRACE_METHOD + void *stack[MEMENTO_BACKTRACE_MAX]; + int count; + int skip; + + count = Memento_getStacktrace(stack, &skip); + Memento_showStacktrace(&stack[skip-2], count-skip+2); +#endif +} + +static void Memento_bt_internal(int skip2) +{ +#ifdef MEMENTO_STACKTRACE_METHOD + void *stack[MEMENTO_BACKTRACE_MAX]; + int count; + int skip; + + count = Memento_getStacktrace(stack, &skip); + Memento_showStacktrace(&stack[skip+skip2], count-skip-skip2); +#endif +} + +static int Memento_checkAllMemoryLocked(void); + +void Memento_breakpoint(void) +{ + /* A handy externally visible function for breakpointing */ +#if 0 /* Enable this to force automatic breakpointing */ +#ifndef NDEBUG +#ifdef _MSC_VER + __asm int 3; +#endif +#endif +#endif +} + +static void Memento_init(void); + +#define MEMENTO_LOCK() \ +do { if (!memento.inited) Memento_init(); MEMENTO_DO_LOCK(); } while (0) + +#define MEMENTO_UNLOCK() \ +do { MEMENTO_DO_UNLOCK(); } while (0) + +/* Do this as a macro to prevent another level in the callstack, + * which is annoying while stepping. */ +#define Memento_breakpointLocked() \ +do { MEMENTO_UNLOCK(); Memento_breakpoint(); MEMENTO_LOCK(); } while (0) + +static void Memento_addBlockHead(Memento_Blocks *blks, + Memento_BlkHeader *b, + int type) +{ + if (blks->tail == NULL) + blks->tail = b; + b->next = blks->head; + b->prev = NULL; + if (blks->head) + { + VALGRIND_MAKE_MEM_DEFINED(&blks->head->prev, sizeof(blks->head->prev)); + blks->head->prev = b; + VALGRIND_MAKE_MEM_NOACCESS(&blks->head->prev, sizeof(blks->head->prev)); + } + blks->head = b; +#ifndef MEMENTO_LEAKONLY + memset(b->preblk, MEMENTO_PREFILL, Memento_PreSize); + memset(MEMBLK_POSTPTR(b), MEMENTO_POSTFILL, Memento_PostSize); +#endif + VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_POSTPTR(b), Memento_PostSize); + if (type == 0) { /* malloc */ + VALGRIND_MAKE_MEM_UNDEFINED(MEMBLK_TOBLK(b), b->rawsize); + } else if (type == 1) { /* free */ + VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_TOBLK(b), b->rawsize); + } + VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(Memento_BlkHeader)); +} + +static void Memento_addBlockTail(Memento_Blocks *blks, + Memento_BlkHeader *b, + int type) +{ + VALGRIND_MAKE_MEM_DEFINED(&blks->tail, sizeof(Memento_BlkHeader *)); + if (blks->head == NULL) + blks->head = b; + b->prev = blks->tail; + b->next = NULL; + if (blks->tail) { + VALGRIND_MAKE_MEM_DEFINED(&blks->tail->next, sizeof(blks->tail->next)); + blks->tail->next = b; + VALGRIND_MAKE_MEM_NOACCESS(&blks->tail->next, sizeof(blks->tail->next)); + } + blks->tail = b; +#ifndef MEMENTO_LEAKONLY + memset(b->preblk, MEMENTO_PREFILL, Memento_PreSize); + memset(MEMBLK_POSTPTR(b), MEMENTO_POSTFILL, Memento_PostSize); +#endif + VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_POSTPTR(b), Memento_PostSize); + if (type == 0) { /* malloc */ + VALGRIND_MAKE_MEM_UNDEFINED(MEMBLK_TOBLK(b), b->rawsize); + } else if (type == 1) { /* free */ + VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_TOBLK(b), b->rawsize); + } + VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(Memento_BlkHeader)); + VALGRIND_MAKE_MEM_NOACCESS(&blks->tail, sizeof(Memento_BlkHeader *)); +} + +typedef struct BlkCheckData { + int found; + int preCorrupt; + int postCorrupt; + int freeCorrupt; + size_t index; +} BlkCheckData; + +#ifndef MEMENTO_LEAKONLY +static int Memento_Internal_checkAllocedBlock(Memento_BlkHeader *b, void *arg) +{ + int i; + MEMENTO_UINT32 *ip; + unsigned char *p; + BlkCheckData *data = (BlkCheckData *)arg; + + ip = (MEMENTO_UINT32 *)(void *)(b->preblk); + i = Memento_PreSize>>2; + do { + if (*ip++ != MEMENTO_PREFILL_UINT32) + goto pre_corrupt; + } while (--i); + if (0) { +pre_corrupt: + data->preCorrupt = 1; + } + /* Postfill may not be aligned, so have to be slower */ + p = MEMBLK_POSTPTR(b); + i = Memento_PostSize-4; + if ((intptr_t)p & 1) + { + if (*p++ != MEMENTO_POSTFILL) + goto post_corrupt; + i--; + } + if ((intptr_t)p & 2) + { + if (*(MEMENTO_UINT16 *)p != MEMENTO_POSTFILL_UINT16) + goto post_corrupt; + p += 2; + i -= 2; + } + do { + if (*(MEMENTO_UINT32 *)p != MEMENTO_POSTFILL_UINT32) + goto post_corrupt; + p += 4; + i -= 4; + } while (i >= 0); + if (i & 2) + { + if (*(MEMENTO_UINT16 *)p != MEMENTO_POSTFILL_UINT16) + goto post_corrupt; + p += 2; + } + if (i & 1) + { + if (*p != MEMENTO_POSTFILL) + goto post_corrupt; + } + if (0) { +post_corrupt: + data->postCorrupt = 1; + } + if ((data->freeCorrupt | data->preCorrupt | data->postCorrupt) == 0) { + b->lastCheckedOK = memento.sequence; + } + data->found |= 1; + return 0; +} + +static int Memento_Internal_checkFreedBlock(Memento_BlkHeader *b, void *arg) +{ + size_t i; + unsigned char *p; + BlkCheckData *data = (BlkCheckData *)arg; + + p = MEMBLK_TOBLK(b); /* p will always be aligned */ + i = b->rawsize; + /* Attempt to speed this up by checking an (aligned) int at a time */ + if (i >= 4) { + i -= 4; + do { + if (*(MEMENTO_UINT32 *)p != MEMENTO_FREEFILL_UINT32) + goto mismatch4; + p += 4; + i -= 4; + } while (i > 0); + i += 4; + } + if (i & 2) { + if (*(MEMENTO_UINT16 *)p != MEMENTO_FREEFILL_UINT16) + goto mismatch; + p += 2; + i -= 2; + } + if (0) { +mismatch4: + i += 4; + } +mismatch: + while (i) { + if (*p++ != (unsigned char)MEMENTO_FREEFILL) + break; + i--; + } + if (i) { + data->freeCorrupt = 1; + data->index = b->rawsize-i; + } + return Memento_Internal_checkAllocedBlock(b, arg); +} +#endif /* MEMENTO_LEAKONLY */ + +static void Memento_removeBlock(Memento_Blocks *blks, + Memento_BlkHeader *b) +{ + VALGRIND_MAKE_MEM_DEFINED(b, sizeof(*b)); + if (b->next) { + VALGRIND_MAKE_MEM_DEFINED(&b->next->prev, sizeof(b->next->prev)); + b->next->prev = b->prev; + VALGRIND_MAKE_MEM_NOACCESS(&b->next->prev, sizeof(b->next->prev)); + } + if (b->prev) { + VALGRIND_MAKE_MEM_DEFINED(&b->prev->next, sizeof(b->prev->next)); + b->prev->next = b->next; + VALGRIND_MAKE_MEM_NOACCESS(&b->prev->next, sizeof(b->prev->next)); + } + if (blks->tail == b) + blks->tail = b->prev; + if (blks->head == b) + blks->head = b->next; +} + +static void free_block(Memento_BlkHeader *head) +{ +#ifdef MEMENTO_DETAILS + Memento_BlkDetails *details = head->details; + + while (details) + { + Memento_BlkDetails *next = details->next; + MEMENTO_UNDERLYING_FREE(details); + details = next; + } +#endif + MEMENTO_UNDERLYING_FREE(head); +} + +static int Memento_Internal_makeSpace(size_t space) +{ + /* If too big, it can never go on the freelist */ + if (space > MEMENTO_FREELIST_MAX_SINGLE_BLOCK) + return 0; + /* Pretend we added it on. */ + memento.freeListSize += space; + /* Ditch blocks until it fits within our limit */ + while (memento.freeListSize > MEMENTO_FREELIST_MAX) { + Memento_BlkHeader *head = memento.free.head; + VALGRIND_MAKE_MEM_DEFINED(head, sizeof(*head)); + memento.free.head = head->next; + memento.freeListSize -= MEMBLK_SIZE(head->rawsize); + free_block(head); + } + /* Make sure we haven't just completely emptied the free list */ + /* (This should never happen, but belt and braces... */ + if (memento.free.head == NULL) + memento.free.tail = NULL; + return 1; +} + +static int Memento_appBlocks(Memento_Blocks *blks, + int (*app)(Memento_BlkHeader *, + void *), + void *arg) +{ + Memento_BlkHeader *head = blks->head; + Memento_BlkHeader *next; + int result; + while (head) { + VALGRIND_MAKE_MEM_DEFINED(head, sizeof(Memento_BlkHeader)); + VALGRIND_MAKE_MEM_DEFINED(MEMBLK_TOBLK(head), + head->rawsize + Memento_PostSize); + result = app(head, arg); + next = head->next; + VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_POSTPTR(head), Memento_PostSize); + VALGRIND_MAKE_MEM_NOACCESS(head, sizeof(Memento_BlkHeader)); + if (result) + return result; + head = next; + } + return 0; +} + +#ifndef MEMENTO_LEAKONLY +/* Distrustful - check the block is a real one */ +static int Memento_appBlockUser(Memento_Blocks *blks, + int (*app)(Memento_BlkHeader *, + void *), + void *arg, + Memento_BlkHeader *b) +{ + Memento_BlkHeader *head = blks->head; + Memento_BlkHeader *next; + int result; + while (head && head != b) { + VALGRIND_MAKE_MEM_DEFINED(head, sizeof(Memento_BlkHeader)); + next = head->next; + VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_POSTPTR(head), Memento_PostSize); + head = next; + } + if (head == b) { + VALGRIND_MAKE_MEM_DEFINED(head, sizeof(Memento_BlkHeader)); + VALGRIND_MAKE_MEM_DEFINED(MEMBLK_TOBLK(head), + head->rawsize + Memento_PostSize); + result = app(head, arg); + VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_POSTPTR(head), Memento_PostSize); + VALGRIND_MAKE_MEM_NOACCESS(head, sizeof(Memento_BlkHeader)); + return result; + } + return 0; +} + +static int Memento_appBlock(Memento_Blocks *blks, + int (*app)(Memento_BlkHeader *, + void *), + void *arg, + Memento_BlkHeader *b) +{ + int result; + (void)blks; + VALGRIND_MAKE_MEM_DEFINED(b, sizeof(Memento_BlkHeader)); + VALGRIND_MAKE_MEM_DEFINED(MEMBLK_TOBLK(b), + b->rawsize + Memento_PostSize); + result = app(b, arg); + VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_POSTPTR(b), Memento_PostSize); + VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(Memento_BlkHeader)); + return result; +} +#endif /* MEMENTO_LEAKONLY */ + +static int showBlock(Memento_BlkHeader *b, int space) +{ + int seq; + VALGRIND_MAKE_MEM_DEFINED(b, sizeof(Memento_BlkHeader)); + fprintf(stderr, FMTP":(size=" FMTZ ",num=%d)", + MEMBLK_TOBLK(b), (FMTZ_CAST)b->rawsize, b->sequence); + if (b->label) + fprintf(stderr, "%c(%s)", space, b->label); + if (b->flags & Memento_Flag_KnownLeak) + fprintf(stderr, "(Known Leak)"); + seq = b->sequence; + VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(Memento_BlkHeader)); + return seq; +} + +static void blockDisplay(Memento_BlkHeader *b, int n) +{ + n++; + while (n > 40) + { + fprintf(stderr, "*"); + n -= 40; + } + while(n > 0) + { + int i = n; + if (i > 32) + i = 32; + n -= i; + fprintf(stderr, "%s", &" "[32-i]); + } + showBlock(b, '\t'); + fprintf(stderr, "\n"); +} + +static int Memento_listBlock(Memento_BlkHeader *b, + void *arg) +{ + size_t *counts = (size_t *)arg; + blockDisplay(b, 0); + counts[0]++; + VALGRIND_MAKE_MEM_DEFINED(b, sizeof(Memento_BlkHeader)); + counts[1]+= b->rawsize; + VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(Memento_BlkHeader)); + return 0; +} + +static void doNestedDisplay(Memento_BlkHeader *b, + int depth) +{ + /* Try and avoid recursion if we can help it */ + do { + Memento_BlkHeader *c = NULL; + blockDisplay(b, depth); + VALGRIND_MAKE_MEM_DEFINED(b, sizeof(Memento_BlkHeader)); + if (b->sibling) { + c = b->child; + b = b->sibling; + } else { + b = b->child; + depth++; + } + VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(Memento_BlkHeader)); + if (c) + doNestedDisplay(c, depth+1); + } while (b); +} + +static int ptrcmp(const void *a_, const void *b_) +{ + const char **a = (const char **)a_; + const char **b = (const char **)b_; + return (int)(*a-*b); +} + +static +int Memento_listBlocksNested(void) +{ + int count, i; + size_t size; + Memento_BlkHeader *b, *prev; + void **blocks, *minptr, *maxptr; + intptr_t mask; + + /* Count the blocks */ + count = 0; + size = 0; + for (b = memento.used.head; b; b = b->next) { + VALGRIND_MAKE_MEM_DEFINED(b, sizeof(*b)); + size += b->rawsize; + count++; + } + + /* Make our block list */ + blocks = MEMENTO_UNDERLYING_MALLOC(sizeof(void *) * count); + if (blocks == NULL) + return 1; + + /* Populate our block list */ + b = memento.used.head; + minptr = maxptr = MEMBLK_TOBLK(b); + mask = (intptr_t)minptr; + for (i = 0; b; b = b->next, i++) { + void *p = MEMBLK_TOBLK(b); + mask &= (intptr_t)p; + if (p < minptr) + minptr = p; + if (p > maxptr) + maxptr = p; + blocks[i] = p; + b->flags &= ~Memento_Flag_HasParent; + b->child = NULL; + b->sibling = NULL; + b->prev = NULL; /* parent */ + } + qsort(blocks, count, sizeof(void *), ptrcmp); + + /* Now, calculate tree */ + for (b = memento.used.head; b; b = b->next) { + char *p = MEMBLK_TOBLK(b); + size_t end = (b->rawsize < MEMENTO_PTRSEARCH ? b->rawsize : MEMENTO_PTRSEARCH); + size_t z; + VALGRIND_MAKE_MEM_DEFINED(p, end); + end -= sizeof(void *)-1; + for (z = MEMENTO_SEARCH_SKIP; z < end; z += sizeof(void *)) { + void *q = *(void **)(&p[z]); + void **r; + + /* Do trivial checks on pointer */ + if ((mask & (intptr_t)q) != mask || q < minptr || q > maxptr) + continue; + + /* Search for pointer */ + r = bsearch(&q, blocks, count, sizeof(void *), ptrcmp); + if (r) { + /* Found child */ + Memento_BlkHeader *child = MEMBLK_FROMBLK(*r); + Memento_BlkHeader *parent; + + /* We're assuming tree structure, not graph - ignore second + * and subsequent pointers. */ + if (child->prev != NULL) /* parent */ + continue; + if (child->flags & Memento_Flag_HasParent) + continue; + + /* Not interested in pointers to ourself! */ + if (child == b) + continue; + + /* We're also assuming acyclicness here. If this is one of + * our parents, ignore it. */ + parent = b->prev; /* parent */ + while (parent != NULL && parent != child) + parent = parent->prev; /* parent */ + if (parent == child) + continue; + + child->sibling = b->child; + b->child = child; + child->prev = b; /* parent */ + child->flags |= Memento_Flag_HasParent; + } + } + } + + /* Now display with nesting */ + for (b = memento.used.head; b; b = b->next) { + if ((b->flags & Memento_Flag_HasParent) == 0) + doNestedDisplay(b, 0); + } + fprintf(stderr, " Total number of blocks = %d\n", count); + fprintf(stderr, " Total size of blocks = "FMTZ"\n", (FMTZ_CAST)size); + + MEMENTO_UNDERLYING_FREE(blocks); + + /* Now put the blocks back for valgrind, and restore the prev + * and magic values. */ + prev = NULL; + for (b = memento.used.head; b;) { + Memento_BlkHeader *next = b->next; + b->prev = prev; + b->child = MEMENTO_CHILD_MAGIC; + b->sibling = MEMENTO_SIBLING_MAGIC; + prev = b; + VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(*b)); + b = next; + } + + return 0; +} + +void Memento_listBlocks(void) +{ + MEMENTO_LOCK(); + fprintf(stderr, "Allocated blocks:\n"); + if (Memento_listBlocksNested()) + { + size_t counts[2]; + counts[0] = 0; + counts[1] = 0; + Memento_appBlocks(&memento.used, Memento_listBlock, &counts[0]); + fprintf(stderr, " Total number of blocks = "FMTZ"\n", (FMTZ_CAST)counts[0]); + fprintf(stderr, " Total size of blocks = "FMTZ"\n", (FMTZ_CAST)counts[1]); + } + MEMENTO_UNLOCK(); +} + +static int Memento_listNewBlock(Memento_BlkHeader *b, + void *arg) +{ + if (b->flags & Memento_Flag_OldBlock) + return 0; + b->flags |= Memento_Flag_OldBlock; + return Memento_listBlock(b, arg); +} + +void Memento_listNewBlocks(void) +{ + size_t counts[2]; + MEMENTO_LOCK(); + counts[0] = 0; + counts[1] = 0; + fprintf(stderr, "Blocks allocated and still extant since last list:\n"); + Memento_appBlocks(&memento.used, Memento_listNewBlock, &counts[0]); + fprintf(stderr, " Total number of blocks = "FMTZ"\n", (FMTZ_CAST)counts[0]); + fprintf(stderr, " Total size of blocks = "FMTZ"\n", (FMTZ_CAST)counts[1]); + MEMENTO_UNLOCK(); +} + +static void Memento_endStats(void) +{ + fprintf(stderr, "Total memory malloced = "FMTZ" bytes\n", (FMTZ_CAST)memento.totalAlloc); + fprintf(stderr, "Peak memory malloced = "FMTZ" bytes\n", (FMTZ_CAST)memento.peakAlloc); + fprintf(stderr, FMTZ" mallocs, "FMTZ" frees, "FMTZ" reallocs\n", (FMTZ_CAST)memento.numMallocs, + (FMTZ_CAST)memento.numFrees, (FMTZ_CAST)memento.numReallocs); + fprintf(stderr, "Average allocation size "FMTZ" bytes\n", (FMTZ_CAST) + (memento.numMallocs != 0 ? memento.totalAlloc/memento.numMallocs: 0)); +} + +void Memento_stats(void) +{ + MEMENTO_LOCK(); + fprintf(stderr, "Current memory malloced = "FMTZ" bytes\n", (FMTZ_CAST)memento.alloc); + Memento_endStats(); + MEMENTO_UNLOCK(); +} + +#ifdef MEMENTO_DETAILS +static int showInfo(Memento_BlkHeader *b, void *arg) +{ + Memento_BlkDetails *details; + + (void)arg; + + fprintf(stderr, FMTP":(size="FMTZ",num=%d)", + MEMBLK_TOBLK(b), (FMTZ_CAST)b->rawsize, b->sequence); + if (b->label) + fprintf(stderr, " (%s)", b->label); + fprintf(stderr, "\nEvents:\n"); + + for (details = b->details; details; details = details->next) + { + if (memento.hideMultipleReallocs && + details->type == Memento_EventType_realloc && + details->next && + details->next->type == Memento_EventType_realloc) { + continue; + } + fprintf(stderr, " Event %d (%s)\n", details->sequence, eventType[(int)details->type]); + Memento_showStacktrace(details->stack, details->count); + } + return 0; +} +#endif + +void Memento_listBlockInfo(void) +{ +#ifdef MEMENTO_DETAILS + MEMENTO_LOCK(); + fprintf(stderr, "Details of allocated blocks:\n"); + Memento_appBlocks(&memento.used, showInfo, NULL); + MEMENTO_UNLOCK(); +#endif +} + +static int Memento_nonLeakBlocksLeaked(void) +{ + Memento_BlkHeader *blk = memento.used.head; + while (blk) + { + Memento_BlkHeader *next; + int leaked; + VALGRIND_MAKE_MEM_DEFINED(blk, sizeof(*blk)); + leaked = ((blk->flags & Memento_Flag_KnownLeak) == 0); + next = blk->next; + VALGRIND_MAKE_MEM_DEFINED(blk, sizeof(*blk)); + if (leaked) + return 1; + blk = next; + } + return 0; +} + +void Memento_fin(void) +{ + Memento_checkAllMemory(); + if (!memento.segv) + { + Memento_endStats(); + if (Memento_nonLeakBlocksLeaked()) { + Memento_listBlocks(); +#ifdef MEMENTO_DETAILS + fprintf(stderr, "\n"); + Memento_listBlockInfo(); +#endif + Memento_breakpoint(); + } + } + if (memento.squeezing) { + if (memento.pattern == 0) + fprintf(stderr, "Memory squeezing @ %d complete%s\n", memento.squeezeAt, memento.segv ? " (with SEGV)" : ""); + else + fprintf(stderr, "Memory squeezing @ %d (%d) complete%s\n", memento.squeezeAt, memento.pattern, memento.segv ? " (with SEGV)" : ""); + } else if (memento.segv) { + fprintf(stderr, "Memento completed (with SEGV)\n"); + } + if (memento.failing) + { + fprintf(stderr, "MEMENTO_FAILAT=%d\n", memento.failAt); + fprintf(stderr, "MEMENTO_PATTERN=%d\n", memento.pattern); + } + if (memento.nextFailAt != 0) + { + fprintf(stderr, "MEMENTO_NEXTFAILAT=%d\n", memento.nextFailAt); + fprintf(stderr, "MEMENTO_NEXTPATTERN=%d\n", memento.nextPattern); + } + if (Memento_nonLeakBlocksLeaked() && memento.abortOnLeak) { + fprintf(stderr, "Calling abort() because blocks were leaked and MEMENTO_ABORT_ON_LEAK is set.\n"); + abort(); + } +} + +/* Reads number from <text> using strtol(). + * + * Params: + * text: + * text to read. + * out: + * pointer to output value. + * relative: + * *relative set to 1 if <text> starts with '+' or '-', else set to 0. + * end: + * *end is set to point to next unread character after number. + * + * Returns 0 on success, else -1. + */ +static int read_number(const char *text, int *out, int *relative, char **end) +{ + if (text[0] == '+' || text[0] == '-') + *relative = 1; + else + *relative = 0; + errno = 0; + *out = (int)strtol(text, end, 0 /*base*/); + if (errno || *end == text) + { + fprintf(stderr, "Failed to parse number at start of '%s'.\n", text); + return -1; + } + if (0) + fprintf(stderr, "text='%s': *out=%i *relative=%i\n", + text, *out, *relative); + return 0; +} + +/* Reads number plus optional delta value from <text>. + * + * Evaluates <number> or <number>[+|-<delta>]. E.g. text='1234+2' sets *out=1236, + * text='1234-1' sets *out=1233. + * + * Params: + * text: + * text to read. + * out: + * pointer to output value. + * end: + * *end is set to point to next unread character after number. + * + * Returns 0 on success, else -1. + */ +static int read_number_delta(const char *text, int *out, char **end) +{ + int e; + int relative; + + e = read_number(text, out, &relative, end); + if (e) + return e; + if (relative) { + fprintf(stderr, "Base number should not start with '+' or '-' at start of '%s'.\n", + text); + return -1; + } + if (*end) { + if (**end == '-' || **end == '+') { + int delta; + e = read_number(*end, &delta, &relative, end); + if (e) + return e; + *out += delta; + } + } + if (0) fprintf(stderr, "text='%s': *out=%i\n", text, *out); + + return 0; +} + +/* Reads range. + * + * E.g.: + * text='115867-2' sets *begin=115865 *end=115866. + * text='115867-1..+3' sets *begin=115866 *end=115869. + * + * Supported patterns for text: + * <range> + * <value> - returns *begin=value *end=*begin+1. + * <value1>..<value2> - returns *begin=value1 *end=value2. + * <value>..+<number> - returns *begin=value *end=*begin+number. + * <value> + * <number> + * <number>+<number> + * <number>-<number> + * + * <number>: [0-9]+ + * + * If not specified, *end defaults to *begin+1. + * + * Returns 0 on success, else -1, with *string_end pointing to first unused + * character. + */ +static int read_number_range(const char *text, int *begin, int *end, char **string_end) +{ + int e; + e = read_number_delta(text, begin, string_end); + if (e) + return e; + if (string_end && (*string_end)[0] == '.' && (*string_end)[1] == '.') { + int relative; + e = read_number((*string_end) + 2, end, &relative, string_end); + if (e) + return e; + if (relative) + *end += *begin; + } else { + *end = *begin + 1; + } + if (*end < *begin) { + fprintf(stderr, "Range %i..%i has negative extent, at start of '%s'.\n", + *begin, *end, text); + return -1; + } + if (0) fprintf(stderr, "text='%s': *begin=%i *end=%i\n", text, *begin, *end); + + return 0; +} + +/* Format: <range>[,<range>]+ + * + * For description of <range>, see read_number_range() above. + * + * E.g.: + * MEMENTO_SQUEEZES=1234-2..+4,2345,2350..+2 + */ +static int Memento_add_squeezes(const char *text) +{ + int e = 0; + for(;;) { + int begin; + int end; + char *string_end; + if (!*text) + break; + e = read_number_range(text, &begin, &end, &string_end); + if (e) + break; + if (*string_end && *string_end != ',') { + fprintf(stderr, "Expecting comma at start of '%s'.\n", string_end); + e = -1; + break; + } + fprintf(stderr, "Adding squeeze range %i..%i.\n", + begin, end); + memento.squeezes_num += 1; + memento.squeezes = MEMENTO_UNDERLYING_REALLOC( + memento.squeezes, + memento.squeezes_num * sizeof(*memento.squeezes) + ); + if (!memento.squeezes) { + fprintf(stderr, "Failed to allocate memory for memento.squeezes_num=%i\n", + memento.squeezes_num); + e = -1; + break; + } + memento.squeezes[memento.squeezes_num-1].begin = begin; + memento.squeezes[memento.squeezes_num-1].end = end; + + if (*string_end == 0) + break; + text = string_end + 1; + } + + return e; +} + +static void Memento_init(void) +{ + char *env; + memset(&memento, 0, sizeof(memento)); + memento.inited = 1; + memento.used.head = NULL; + memento.used.tail = NULL; + memento.free.head = NULL; + memento.free.tail = NULL; + memento.sequence = 0; + memento.countdown = 1024; + memento.squeezes = NULL; + memento.squeezes_num = 0; + memento.squeezes_pos = 0; + + env = getenv("MEMENTO_FAILAT"); + memento.failAt = (env ? atoi(env) : 0); + + env = getenv("MEMENTO_BREAKAT"); + memento.breakAt = (env ? atoi(env) : 0); + + env = getenv("MEMENTO_PARANOIA"); + memento.paranoia = (env ? atoi(env) : 0); + if (memento.paranoia == 0) + memento.paranoia = -1024; + + env = getenv("MEMENTO_PARANOIDAT"); + memento.paranoidAt = (env ? atoi(env) : 0); + + env = getenv("MEMENTO_SQUEEZEAT"); + memento.squeezeAt = (env ? atoi(env) : 0); + + env = getenv("MEMENTO_PATTERN"); + memento.pattern = (env ? atoi(env) : 0); + + env = getenv("MEMENTO_HIDE_MULTIPLE_REALLOCS"); + memento.hideMultipleReallocs = (env ? atoi(env) : 0); + + env = getenv("MEMENTO_ABORT_ON_LEAK"); + memento.abortOnLeak = (env ? atoi(env) : 0); + + env = getenv("MEMENTO_ABORT_ON_CORRUPTION"); + memento.abortOnCorruption = (env ? atoi(env) : 0); + + env = getenv("MEMENTO_SQUEEZES"); + if (env) { + int e; + fprintf(stderr, "Parsing squeeze ranges in MEMENTO_SQUEEZES=%s\n", env); + e = Memento_add_squeezes(env); + if (e) { + fprintf(stderr, "Failed to parse MEMENTO_SQUEEZES=%s\n", env); + exit(1); + } + } + + env = getenv("MEMENTO_MAXMEMORY"); + memento.maxMemory = (env ? atoi(env) : 0); + + atexit(Memento_fin); + + Memento_initMutex(&memento.mutex); + + Memento_initStacktracer(); + + Memento_breakpoint(); +} + +typedef struct findBlkData { + void *addr; + Memento_BlkHeader *blk; + int flags; +} findBlkData; + +static int Memento_containsAddr(Memento_BlkHeader *b, + void *arg) +{ + findBlkData *data = (findBlkData *)arg; + char *blkend = &((char *)MEMBLK_TOBLK(b))[b->rawsize]; + if ((MEMBLK_TOBLK(b) <= data->addr) && + ((void *)blkend > data->addr)) { + data->blk = b; + data->flags = 1; + return 1; + } + if (((void *)b <= data->addr) && + (MEMBLK_TOBLK(b) > data->addr)) { + data->blk = b; + data->flags = 2; + return 1; + } + if (((void *)blkend <= data->addr) && + ((void *)(blkend + Memento_PostSize) > data->addr)) { + data->blk = b; + data->flags = 3; + return 1; + } + return 0; +} + +void Memento_info(void *addr) +{ +#ifdef MEMENTO_DETAILS + findBlkData data; + + MEMENTO_LOCK(); + data.addr = addr; + data.blk = NULL; + data.flags = 0; + Memento_appBlocks(&memento.used, Memento_containsAddr, &data); + if (data.blk != NULL) + showInfo(data.blk, NULL); + data.blk = NULL; + data.flags = 0; + Memento_appBlocks(&memento.free, Memento_containsAddr, &data); + if (data.blk != NULL) + showInfo(data.blk, NULL); + MEMENTO_UNLOCK(); +#else + printf("Memento not compiled with details support\n"); +#endif +} + +#ifdef MEMENTO_HAS_FORK +#include <unistd.h> +#include <sys/wait.h> +#include <time.h> +#ifdef MEMENTO_STACKTRACE_METHOD +#if MEMENTO_STACKTRACE_METHOD == 1 +#include <signal.h> +#endif +#endif + +/* FIXME: Find some portable way of getting this */ +/* MacOSX has 10240, Ubuntu seems to have 256 */ +#ifndef OPEN_MAX +#define OPEN_MAX 10240 +#endif + +/* stashed_map[j] = i means that file descriptor i-1 was duplicated to j */ +int stashed_map[OPEN_MAX]; + +static void Memento_signal(int sig) +{ + (void)sig; + fprintf(stderr, "SEGV at:\n"); + memento.segv = 1; + Memento_bt_internal(0); + + exit(1); +} + +static int squeeze(void) +{ + pid_t pid; + int i, status; + + if (memento.patternBit < 0) + return 1; + if (memento.squeezing && memento.patternBit >= MEMENTO_MAXPATTERN) + return 1; + + if (memento.patternBit == 0) + memento.squeezeAt = memento.sequence; + + if (!memento.squeezing) { + fprintf(stderr, "Memory squeezing @ %d\n", memento.squeezeAt); + } else + fprintf(stderr, "Memory squeezing @ %d (%x,%x)\n", memento.squeezeAt, memento.pattern, memento.patternBit); + + /* When we fork below, the child is going to snaffle all our file pointers + * and potentially corrupt them. Let's make copies of all of them before + * we fork, so we can restore them when we restart. */ + for (i = 0; i < OPEN_MAX; i++) { + if (stashed_map[i] == 0) { + int j = dup(i); + if (j >= 0) { + stashed_map[j] = i+1; + } + } + } + + fprintf(stderr, "Failing at:\n"); + Memento_bt_internal(2); + pid = fork(); + if (pid == 0) { + /* Child */ + signal(SIGSEGV, Memento_signal); + /* Close the dup-licated fds to avoid them getting corrupted by faulty + * code. */ + for (i = 0; i < OPEN_MAX; i++) { + if (stashed_map[i] != 0) { + /* We close duplicated fds, just in case child has some bad + * code that modifies/closes random fds. */ + close(i); + } + } + /* In the child, we always fail the next allocation. */ + if (memento.patternBit == 0) { + memento.patternBit = 1; + } else + memento.patternBit <<= 1; + memento.squeezing = 1; + + /* This is necessary to allow Memento_failThisEventLocked() near the + * end to do 'return squeeze();'. */ + memento.squeezes_num = 0; + + return 1; + } + + /* In the parent if we hit another allocation, pass it (and record the + * fact we passed it in the pattern. */ + memento.pattern |= memento.patternBit; + memento.patternBit <<= 1; + + /* Wait for pid to finish, with a timeout. */ + { + struct timespec tm = { 0, 10 * 1000 * 1000 }; /* 10ms = 100th sec */ + int timeout = 30 * 1000 * 1000; /* time out in microseconds! */ + while (waitpid(pid, &status, WNOHANG) == 0) { + nanosleep(&tm, NULL); + timeout -= (int)(tm.tv_nsec/1000); + tm.tv_nsec *= 2; + if (tm.tv_nsec > 999999999) + tm.tv_nsec = 999999999; + if (timeout <= 0) { + char text[32]; + fprintf(stderr, "Child is taking a long time to die. Killing it.\n"); + sprintf(text, "kill %d", pid); + system(text); + break; + } + } + } + + if (status != 0) { + fprintf(stderr, "Child status=%d\n", status); + } + + /* Put the files back */ + for (i = 0; i < OPEN_MAX; i++) { + if (stashed_map[i] != 0) { + dup2(i, stashed_map[i]-1); + close(i); + stashed_map[i] = 0; + } + } + + return 0; +} +#else +#include <signal.h> + +static void Memento_signal(int sig) +{ + (void)sig; + memento.segv = 1; + /* If we just return from this function the SEGV will be unhandled, and + * we'll launch into whatever JIT debugging system the OS provides. At + * least fprintf(stderr, something useful first. If MEMENTO_NOJIT is set, then + * just exit to avoid the JIT (and get the usual atexit handling). */ + if (getenv("MEMENTO_NOJIT")) + exit(1); + else + Memento_fin(); +} + +static int squeeze(void) +{ + fprintf(stderr, "Memento memory squeezing disabled as no fork!\n"); + return 0; +} +#endif + +static void Memento_startFailing(void) +{ + if (!memento.failing) { + fprintf(stderr, "Starting to fail...\n"); + Memento_bt(); + fflush(stderr); + memento.failing = 1; + memento.failAt = memento.sequence; + memento.nextFailAt = memento.sequence+1; + memento.pattern = 0; + memento.patternBit = 0; + signal(SIGSEGV, Memento_signal); + signal(SIGABRT, Memento_signal); + Memento_breakpointLocked(); + } +} + +static int Memento_event(void) +{ + memento.sequence++; + if ((memento.sequence >= memento.paranoidAt) && (memento.paranoidAt != 0)) { + memento.paranoia = 1; + memento.countdown = 1; + } + if (--memento.countdown == 0) { + Memento_checkAllMemoryLocked(); + if (memento.paranoia > 0) + memento.countdown = memento.paranoia; + else + { + memento.countdown = -memento.paranoia; + if (memento.paranoia > INT_MIN/2) + memento.paranoia *= 2; + } + } + + if (memento.sequence == memento.breakAt) { + fprintf(stderr, "Breaking at event %d\n", memento.breakAt); + return 1; + } + return 0; +} + +int Memento_sequence(void) +{ + return memento.sequence; +} + +int Memento_breakAt(int event) +{ + MEMENTO_LOCK(); + memento.breakAt = event; + MEMENTO_UNLOCK(); + return event; +} + +static void *safe_find_block(void *ptr) +{ + Memento_BlkHeader *block; + int valid; + + if (ptr == NULL) + return NULL; + + block = MEMBLK_FROMBLK(ptr); + /* Sometimes wrapping allocators can mean Memento_label + * is called with a value within the block, rather than + * at the start of the block. If we detect this, find it + * the slow way. */ + VALGRIND_MAKE_MEM_DEFINED(&block->child, sizeof(block->child)); + VALGRIND_MAKE_MEM_DEFINED(&block->sibling, sizeof(block->sibling)); + valid = (block->child == MEMENTO_CHILD_MAGIC && + block->sibling == MEMENTO_SIBLING_MAGIC); + VALGRIND_MAKE_MEM_NOACCESS(&block->child, sizeof(block->child)); + VALGRIND_MAKE_MEM_NOACCESS(&block->sibling, sizeof(block->sibling)); + if (!valid) + { + findBlkData data; + + data.addr = ptr; + data.blk = NULL; + data.flags = 0; + Memento_appBlocks(&memento.used, Memento_containsAddr, &data); + if (data.blk == NULL) + return NULL; + block = data.blk; + } + return block; +} + +void *Memento_label(void *ptr, const char *label) +{ + Memento_BlkHeader *block; + + if (ptr == NULL) + return NULL; + MEMENTO_LOCK(); + block = safe_find_block(ptr); + if (block != NULL) + { + VALGRIND_MAKE_MEM_DEFINED(&block->label, sizeof(block->label)); + block->label = label; + VALGRIND_MAKE_MEM_NOACCESS(&block->label, sizeof(block->label)); + } + MEMENTO_UNLOCK(); + return ptr; +} + +void Memento_tick(void) +{ + MEMENTO_LOCK(); + if (Memento_event()) Memento_breakpointLocked(); + MEMENTO_UNLOCK(); +} + +static int Memento_failThisEventLocked(void) +{ + int failThisOne; + + if (Memento_event()) Memento_breakpointLocked(); + + if (!memento.squeezing && memento.squeezes_num) { + /* Move to next relevant squeeze region if appropriate. */ + for ( ; memento.squeezes_pos != memento.squeezes_num; memento.squeezes_pos++) { + if (memento.sequence < memento.squeezes[memento.squeezes_pos].end) + break; + } + + /* See whether memento.sequence is within this squeeze region. */ + if (memento.squeezes_pos < memento.squeezes_num) { + int begin = memento.squeezes[memento.squeezes_pos].begin; + int end = memento.squeezes[memento.squeezes_pos].end; + if (memento.sequence >= begin && memento.sequence < end) { + if (1) { + fprintf(stderr, + "squeezes match memento.sequence=%i: memento.squeezes_pos=%i/%i %i..%i\n", + memento.sequence, + memento.squeezes_pos, + memento.squeezes_num, + memento.squeezes[memento.squeezes_pos].begin, + memento.squeezes[memento.squeezes_pos].end + ); + } + return squeeze(); + } + } + } + + if ((memento.sequence >= memento.failAt) && (memento.failAt != 0)) + Memento_startFailing(); + if ((memento.squeezes_num==0) && (memento.sequence >= memento.squeezeAt) && (memento.squeezeAt != 0)) + return squeeze(); + + if (!memento.failing) + return 0; + failThisOne = ((memento.patternBit & memento.pattern) == 0); + /* If we are failing, and we've reached the end of the pattern and we've + * still got bits available in the pattern word, and we haven't already + * set a nextPattern, then extend the pattern. */ + if (memento.failing && + ((~(memento.patternBit-1) & memento.pattern) == 0) && + (memento.patternBit != 0) && + memento.nextPattern == 0) + { + /* We'll fail this one, and set the 'next' one to pass it. */ + memento.nextFailAt = memento.failAt; + memento.nextPattern = memento.pattern | memento.patternBit; + } + memento.patternBit = (memento.patternBit ? memento.patternBit << 1 : 1); + + return failThisOne; +} + +int Memento_failThisEvent(void) +{ + int ret; + + if (!memento.inited) + Memento_init(); + + MEMENTO_LOCK(); + ret = Memento_failThisEventLocked(); + MEMENTO_UNLOCK(); + return ret; +} + +static void *do_malloc(size_t s, int eventType) +{ + Memento_BlkHeader *memblk; + size_t smem = MEMBLK_SIZE(s); + + (void)eventType; + + if (Memento_failThisEventLocked()) { + errno = ENOMEM; + return NULL; + } + + if (s == 0) + return NULL; + + memento.numMallocs++; + + if (memento.maxMemory != 0 && memento.alloc + s > memento.maxMemory) { + errno = ENOMEM; + return NULL; + } + + memblk = MEMENTO_UNDERLYING_MALLOC(smem); + if (memblk == NULL) + return NULL; + + memento.alloc += s; + memento.totalAlloc += s; + if (memento.peakAlloc < memento.alloc) + memento.peakAlloc = memento.alloc; +#ifndef MEMENTO_LEAKONLY + memset(MEMBLK_TOBLK(memblk), MEMENTO_ALLOCFILL, s); +#endif + memblk->rawsize = s; + memblk->sequence = memento.sequence; + memblk->lastCheckedOK = memblk->sequence; + memblk->flags = 0; + memblk->label = 0; + memblk->child = MEMENTO_CHILD_MAGIC; + memblk->sibling = MEMENTO_SIBLING_MAGIC; +#ifdef MEMENTO_DETAILS + memblk->details = NULL; + memblk->details_tail = &memblk->details; + Memento_storeDetails(memblk, eventType); +#endif /* MEMENTO_DETAILS */ + Memento_addBlockHead(&memento.used, memblk, 0); + + if (memento.leaking > 0) + memblk->flags |= Memento_Flag_KnownLeak; + + return MEMBLK_TOBLK(memblk); +} + +char *Memento_strdup(const char *text) +{ + size_t len = strlen(text) + 1; + char *ret; + + if (!memento.inited) + Memento_init(); + + MEMENTO_LOCK(); + ret = do_malloc(len, Memento_EventType_strdup); + MEMENTO_UNLOCK(); + + if (ret != NULL) + memcpy(ret, text, len); + + return ret; +} + +int Memento_asprintf(char **ret, const char *format, ...) +{ + va_list va; + int n; + int n2; + + if (!memento.inited) + Memento_init(); + + va_start(va, format); + n = vsnprintf(NULL, 0, format, va); + va_end(va); + if (n < 0) + return n; + + MEMENTO_LOCK(); + *ret = do_malloc(n+1, Memento_EventType_asprintf); + MEMENTO_UNLOCK(); + if (*ret == NULL) + return -1; + + va_start(va, format); + n2 = vsnprintf(*ret, n + 1, format, va); + va_end(va); + + return n2; +} + +int Memento_vasprintf(char **ret, const char *format, va_list ap) +{ + int n; + va_list ap2; + va_copy(ap2, ap); + + if (!memento.inited) + Memento_init(); + + n = vsnprintf(NULL, 0, format, ap); + if (n < 0) { + va_end(ap2); + return n; + } + + MEMENTO_LOCK(); + *ret = do_malloc(n+1, Memento_EventType_vasprintf); + MEMENTO_UNLOCK(); + if (*ret == NULL) { + va_end(ap2); + return -1; + } + + n = vsnprintf(*ret, n + 1, format, ap2); + va_end(ap2); + + return n; +} + +void *Memento_malloc(size_t s) +{ + void *ret; + + if (!memento.inited) + Memento_init(); + + MEMENTO_LOCK(); + ret = do_malloc(s, Memento_EventType_malloc); + MEMENTO_UNLOCK(); + + return ret; +} + +void *Memento_calloc(size_t n, size_t s) +{ + void *block; + + if (!memento.inited) + Memento_init(); + + MEMENTO_LOCK(); + block = do_malloc(n*s, Memento_EventType_calloc); + MEMENTO_UNLOCK(); + if (block) + memset(block, 0, n*s); + + return block; +} + +static void do_reference(Memento_BlkHeader *blk, int event) +{ +#ifdef MEMENTO_DETAILS + Memento_storeDetails(blk, event); +#endif /* MEMENTO_DETAILS */ +} + +int Memento_checkPointerOrNull(void *blk) +{ + if (blk == NULL) + return 0; + if (blk == MEMENTO_PREFILL_PTR) + fprintf(stderr, "Prefill value found as pointer - buffer underrun?\n"); + else if (blk == MEMENTO_POSTFILL_PTR) + fprintf(stderr, "Postfill value found as pointer - buffer overrun?\n"); + else if (blk == MEMENTO_ALLOCFILL_PTR) + fprintf(stderr, "Allocfill value found as pointer - use of uninitialised value?\n"); + else if (blk == MEMENTO_FREEFILL_PTR) + fprintf(stderr, "Allocfill value found as pointer - use after free?\n"); + else + return 0; +#ifdef MEMENTO_DETAILS + fprintf(stderr, "Current backtrace:\n"); + Memento_bt(); + fprintf(stderr, "History:\n"); + Memento_info(blk); +#endif + return 1; +} + +int Memento_checkBytePointerOrNull(void *blk) +{ + unsigned char i; + if (blk == NULL) + return 0; + Memento_checkPointerOrNull(blk); + + i = *(unsigned char *)blk; + + if (i == MEMENTO_PREFILL_UBYTE) + fprintf(stderr, "Prefill value found - buffer underrun?\n"); + else if (i == MEMENTO_POSTFILL_UBYTE) + fprintf(stderr, "Postfill value found - buffer overrun?\n"); + else if (i == MEMENTO_ALLOCFILL_UBYTE) + fprintf(stderr, "Allocfill value found - use of uninitialised value?\n"); + else if (i == MEMENTO_FREEFILL_UBYTE) + fprintf(stderr, "Allocfill value found - use after free?\n"); + else + return 0; +#ifdef MEMENTO_DETAILS + fprintf(stderr, "Current backtrace:\n"); + Memento_bt(); + fprintf(stderr, "History:\n"); + Memento_info(blk); +#endif + Memento_breakpoint(); + return 1; +} + +int Memento_checkShortPointerOrNull(void *blk) +{ + unsigned short i; + if (blk == NULL) + return 0; + Memento_checkPointerOrNull(blk); + + i = *(unsigned short *)blk; + + if (i == MEMENTO_PREFILL_USHORT) + fprintf(stderr, "Prefill value found - buffer underrun?\n"); + else if (i == MEMENTO_POSTFILL_USHORT) + fprintf(stderr, "Postfill value found - buffer overrun?\n"); + else if (i == MEMENTO_ALLOCFILL_USHORT) + fprintf(stderr, "Allocfill value found - use of uninitialised value?\n"); + else if (i == MEMENTO_FREEFILL_USHORT) + fprintf(stderr, "Allocfill value found - use after free?\n"); + else + return 0; +#ifdef MEMENTO_DETAILS + fprintf(stderr, "Current backtrace:\n"); + Memento_bt(); + fprintf(stderr, "History:\n"); + Memento_info(blk); +#endif + Memento_breakpoint(); + return 1; +} + +int Memento_checkIntPointerOrNull(void *blk) +{ + unsigned int i; + if (blk == NULL) + return 0; + Memento_checkPointerOrNull(blk); + + i = *(unsigned int *)blk; + + if (i == MEMENTO_PREFILL_UINT) + fprintf(stderr, "Prefill value found - buffer underrun?\n"); + else if (i == MEMENTO_POSTFILL_UINT) + fprintf(stderr, "Postfill value found - buffer overrun?\n"); + else if (i == MEMENTO_ALLOCFILL_UINT) + fprintf(stderr, "Allocfill value found - use of uninitialised value?\n"); + else if (i == MEMENTO_FREEFILL_UINT) + fprintf(stderr, "Allocfill value found - use after free?\n"); + else + return 0; +#ifdef MEMENTO_DETAILS + fprintf(stderr, "Current backtrace:\n"); + Memento_bt(); + fprintf(stderr, "History:\n"); + Memento_info(blk); +#endif + Memento_breakpoint(); + return 1; +} + +static void *do_takeRef(void *blk) +{ + MEMENTO_LOCK(); + do_reference(safe_find_block(blk), Memento_EventType_takeRef); + MEMENTO_UNLOCK(); + return blk; +} + +void *Memento_takeByteRef(void *blk) +{ + if (!memento.inited) + Memento_init(); + + if (Memento_event()) Memento_breakpoint(); + + if (!blk) + return NULL; + + (void)Memento_checkBytePointerOrNull(blk); + + return do_takeRef(blk); +} + +void *Memento_takeShortRef(void *blk) +{ + if (!memento.inited) + Memento_init(); + + if (Memento_event()) Memento_breakpoint(); + + if (!blk) + return NULL; + + (void)Memento_checkShortPointerOrNull(blk); + + return do_takeRef(blk); +} + +void *Memento_takeIntRef(void *blk) +{ + if (!memento.inited) + Memento_init(); + + if (Memento_event()) Memento_breakpoint(); + + if (!blk) + return NULL; + + (void)Memento_checkIntPointerOrNull(blk); + + return do_takeRef(blk); +} + +void *Memento_takeRef(void *blk) +{ + if (!memento.inited) + Memento_init(); + + if (Memento_event()) Memento_breakpoint(); + + if (!blk) + return NULL; + + return do_takeRef(blk); +} + +static void *do_dropRef(void *blk) +{ + MEMENTO_LOCK(); + do_reference(safe_find_block(blk), Memento_EventType_dropRef); + MEMENTO_UNLOCK(); + return blk; +} + +void *Memento_dropByteRef(void *blk) +{ + if (!memento.inited) + Memento_init(); + + if (Memento_event()) Memento_breakpoint(); + + if (!blk) + return NULL; + + Memento_checkBytePointerOrNull(blk); + + return do_dropRef(blk); +} + +void *Memento_dropShortRef(void *blk) +{ + if (!memento.inited) + Memento_init(); + + if (Memento_event()) Memento_breakpoint(); + + if (!blk) + return NULL; + + Memento_checkShortPointerOrNull(blk); + + return do_dropRef(blk); +} + +void *Memento_dropIntRef(void *blk) +{ + if (!memento.inited) + Memento_init(); + + if (Memento_event()) Memento_breakpoint(); + + if (!blk) + return NULL; + + Memento_checkIntPointerOrNull(blk); + + return do_dropRef(blk); +} + +void *Memento_dropRef(void *blk) +{ + if (!memento.inited) + Memento_init(); + + if (Memento_event()) Memento_breakpoint(); + + if (!blk) + return NULL; + + return do_dropRef(blk); +} + +void *Memento_adjustRef(void *blk, int adjust) +{ + if (Memento_event()) Memento_breakpoint(); + + if (blk == NULL) + return NULL; + + while (adjust > 0) + { + do_takeRef(blk); + adjust--; + } + while (adjust < 0) + { + do_dropRef(blk); + adjust++; + } + + return blk; + } + +void *Memento_reference(void *blk) +{ + if (!blk) + return NULL; + + if (!memento.inited) + Memento_init(); + + MEMENTO_LOCK(); + do_reference(safe_find_block(blk), Memento_EventType_reference); + MEMENTO_UNLOCK(); + return blk; +} + +/* Treat blocks from the user with suspicion, and check them the slow + * but safe way. */ +static int checkBlockUser(Memento_BlkHeader *memblk, const char *action) +{ +#ifndef MEMENTO_LEAKONLY + BlkCheckData data; + + memset(&data, 0, sizeof(data)); + Memento_appBlockUser(&memento.used, Memento_Internal_checkAllocedBlock, + &data, memblk); + if (!data.found) { + /* Failure! */ + fprintf(stderr, "Attempt to %s block ", action); + showBlock(memblk, 32); + fprintf(stderr, "\n"); + Memento_breakpointLocked(); + return 1; + } else if (data.preCorrupt || data.postCorrupt) { + fprintf(stderr, "Block "); + showBlock(memblk, ' '); + fprintf(stderr, " found to be corrupted on %s!\n", action); + if (data.preCorrupt) { + fprintf(stderr, "Preguard corrupted\n"); + } + if (data.postCorrupt) { + fprintf(stderr, "Postguard corrupted\n"); + } + fprintf(stderr, "Block last checked OK at allocation %d. Now %d.\n", + memblk->lastCheckedOK, memento.sequence); + if ((memblk->flags & Memento_Flag_Reported) == 0) + { + memblk->flags |= Memento_Flag_Reported; + Memento_breakpointLocked(); + } + return 1; + } +#endif + return 0; +} + +static int checkBlock(Memento_BlkHeader *memblk, const char *action) +{ +#ifndef MEMENTO_LEAKONLY + BlkCheckData data; +#endif + + if (memblk->child != MEMENTO_CHILD_MAGIC || + memblk->sibling != MEMENTO_SIBLING_MAGIC) + { + /* Failure! */ + fprintf(stderr, "Attempt to %s invalid block ", action); + showBlock(memblk, 32); + fprintf(stderr, "\n"); + Memento_breakpointLocked(); + return 1; + } + +#ifndef MEMENTO_LEAKONLY + memset(&data, 0, sizeof(data)); + Memento_appBlock(&memento.used, Memento_Internal_checkAllocedBlock, + &data, memblk); + if (!data.found) { + /* Failure! */ + fprintf(stderr, "Attempt to %s block ", action); + showBlock(memblk, 32); + fprintf(stderr, "\n"); + Memento_breakpointLocked(); + return 1; + } else if (data.preCorrupt || data.postCorrupt) { + fprintf(stderr, "Block "); + showBlock(memblk, ' '); + fprintf(stderr, " found to be corrupted on %s!\n", action); + if (data.preCorrupt) { + fprintf(stderr, "Preguard corrupted\n"); + } + if (data.postCorrupt) { + fprintf(stderr, "Postguard corrupted\n"); + } + fprintf(stderr, "Block last checked OK at allocation %d. Now %d.\n", + memblk->lastCheckedOK, memento.sequence); + if ((memblk->flags & Memento_Flag_Reported) == 0) + { + memblk->flags |= Memento_Flag_Reported; + Memento_breakpointLocked(); + } + return 1; + } +#endif + return 0; +} + +static void do_free(void *blk, int eventType) +{ + Memento_BlkHeader *memblk; + + (void)eventType; + + if (Memento_event()) Memento_breakpointLocked(); + + if (blk == NULL) + return; + + memblk = MEMBLK_FROMBLK(blk); + VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk)); + if (checkBlock(memblk, "free")) + { + if (memento.abortOnCorruption) { + fprintf(stderr, "*** memblk corrupted, calling abort()\n"); + abort(); + } + return; + } + +#ifdef MEMENTO_DETAILS + Memento_storeDetails(memblk, eventType); +#endif + + VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk)); + if (memblk->flags & Memento_Flag_BreakOnFree) + Memento_breakpointLocked(); + + memento.alloc -= memblk->rawsize; + memento.numFrees++; + + Memento_removeBlock(&memento.used, memblk); + + VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk)); + if (Memento_Internal_makeSpace(MEMBLK_SIZE(memblk->rawsize))) { + VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk)); + VALGRIND_MAKE_MEM_DEFINED(MEMBLK_TOBLK(memblk), + memblk->rawsize + Memento_PostSize); +#ifndef MEMENTO_LEAKONLY + memset(MEMBLK_TOBLK(memblk), MEMENTO_FREEFILL, memblk->rawsize); +#endif + memblk->flags |= Memento_Flag_Freed; + Memento_addBlockTail(&memento.free, memblk, 1); + } else { + free_block(memblk); + } +} + +void Memento_free(void *blk) +{ + if (!memento.inited) + Memento_init(); + + MEMENTO_LOCK(); + do_free(blk, Memento_EventType_free); + MEMENTO_UNLOCK(); +} + +static void *do_realloc(void *blk, size_t newsize, int type) +{ + Memento_BlkHeader *memblk, *newmemblk; + size_t newsizemem; + int flags; + + if (Memento_failThisEventLocked()) { + errno = ENOMEM; + return NULL; + } + + memblk = MEMBLK_FROMBLK(blk); + VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk)); + if (checkBlock(memblk, "realloc")) { + errno = ENOMEM; + return NULL; + } + +#ifdef MEMENTO_DETAILS + Memento_storeDetails(memblk, type); +#endif + + VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk)); + if (memblk->flags & Memento_Flag_BreakOnRealloc) + Memento_breakpointLocked(); + + VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk)); + if (memento.maxMemory != 0 && memento.alloc - memblk->rawsize + newsize > memento.maxMemory) { + errno = ENOMEM; + return NULL; + } + + newsizemem = MEMBLK_SIZE(newsize); + Memento_removeBlock(&memento.used, memblk); + VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk)); + flags = memblk->flags; + newmemblk = MEMENTO_UNDERLYING_REALLOC(memblk, newsizemem); + if (newmemblk == NULL) + { + Memento_addBlockHead(&memento.used, memblk, 2); + return NULL; + } + memento.numReallocs++; + memento.totalAlloc += newsize; + memento.alloc -= newmemblk->rawsize; + memento.alloc += newsize; + if (memento.peakAlloc < memento.alloc) + memento.peakAlloc = memento.alloc; + newmemblk->flags = flags; +#ifndef MEMENTO_LEAKONLY + if (newmemblk->rawsize < newsize) { + char *newbytes = ((char *)MEMBLK_TOBLK(newmemblk))+newmemblk->rawsize; + VALGRIND_MAKE_MEM_DEFINED(newbytes, newsize - newmemblk->rawsize); + memset(newbytes, MEMENTO_ALLOCFILL, newsize - newmemblk->rawsize); + VALGRIND_MAKE_MEM_UNDEFINED(newbytes, newsize - newmemblk->rawsize); + } +#endif + newmemblk->rawsize = newsize; +#ifndef MEMENTO_LEAKONLY + VALGRIND_MAKE_MEM_DEFINED(newmemblk->preblk, Memento_PreSize); + memset(newmemblk->preblk, MEMENTO_PREFILL, Memento_PreSize); + VALGRIND_MAKE_MEM_UNDEFINED(newmemblk->preblk, Memento_PreSize); + VALGRIND_MAKE_MEM_DEFINED(MEMBLK_POSTPTR(newmemblk), Memento_PostSize); + memset(MEMBLK_POSTPTR(newmemblk), MEMENTO_POSTFILL, Memento_PostSize); + VALGRIND_MAKE_MEM_UNDEFINED(MEMBLK_POSTPTR(newmemblk), Memento_PostSize); +#endif + Memento_addBlockHead(&memento.used, newmemblk, 2); + return MEMBLK_TOBLK(newmemblk); +} + +void *Memento_realloc(void *blk, size_t newsize) +{ + void *ret; + + if (!memento.inited) + Memento_init(); + + if (blk == NULL) + { + MEMENTO_LOCK(); + ret = do_malloc(newsize, Memento_EventType_realloc); + MEMENTO_UNLOCK(); + if (!ret) errno = ENOMEM; + return ret; + } + if (newsize == 0) { + MEMENTO_LOCK(); + do_free(blk, Memento_EventType_realloc); + MEMENTO_UNLOCK(); + return NULL; + } + + MEMENTO_LOCK(); + ret = do_realloc(blk, newsize, Memento_EventType_realloc); + MEMENTO_UNLOCK(); + if (!ret) errno = ENOMEM; + return ret; +} + +int Memento_checkBlock(void *blk) +{ + Memento_BlkHeader *memblk; + int ret; + + if (blk == NULL) + return 0; + + MEMENTO_LOCK(); + memblk = MEMBLK_FROMBLK(blk); + ret = checkBlockUser(memblk, "check"); + MEMENTO_UNLOCK(); + return ret; +} + +#ifndef MEMENTO_LEAKONLY +static int Memento_Internal_checkAllAlloced(Memento_BlkHeader *memblk, void *arg) +{ + BlkCheckData *data = (BlkCheckData *)arg; + + Memento_Internal_checkAllocedBlock(memblk, data); + if (data->preCorrupt || data->postCorrupt) { + if ((data->found & 2) == 0) { + fprintf(stderr, "Allocated blocks:\n"); + data->found |= 2; + } + fprintf(stderr, " Block "); + showBlock(memblk, ' '); + if (data->preCorrupt) { + fprintf(stderr, " Preguard "); + } + if (data->postCorrupt) { + fprintf(stderr, "%s Postguard ", + (data->preCorrupt ? "&" : "")); + } + fprintf(stderr, "corrupted.\n " + "Block last checked OK at allocation %d. Now %d.\n", + memblk->lastCheckedOK, memento.sequence); + data->preCorrupt = 0; + data->postCorrupt = 0; + data->freeCorrupt = 0; + if ((memblk->flags & Memento_Flag_Reported) == 0) + { + memblk->flags |= Memento_Flag_Reported; + Memento_breakpointLocked(); + } + } + else + memblk->lastCheckedOK = memento.sequence; + return 0; +} + +static int Memento_Internal_checkAllFreed(Memento_BlkHeader *memblk, void *arg) +{ + BlkCheckData *data = (BlkCheckData *)arg; + + Memento_Internal_checkFreedBlock(memblk, data); + if (data->preCorrupt || data->postCorrupt || data->freeCorrupt) { + if ((data->found & 4) == 0) { + fprintf(stderr, "Freed blocks:\n"); + data->found |= 4; + } + fprintf(stderr, " "); + showBlock(memblk, ' '); + if (data->freeCorrupt) { + fprintf(stderr, " index %d (address "FMTP") onwards", (int)data->index, + &((char *)MEMBLK_TOBLK(memblk))[data->index]); + if (data->preCorrupt) { + fprintf(stderr, "+ preguard"); + } + if (data->postCorrupt) { + fprintf(stderr, "+ postguard"); + } + } else { + if (data->preCorrupt) { + fprintf(stderr, " preguard"); + } + if (data->postCorrupt) { + fprintf(stderr, "%s Postguard", + (data->preCorrupt ? "+" : "")); + } + } + VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(Memento_BlkHeader)); + fprintf(stderr, " corrupted.\n" + " Block last checked OK at allocation %d. Now %d.\n", + memblk->lastCheckedOK, memento.sequence); + if ((memblk->flags & Memento_Flag_Reported) == 0) + { + memblk->flags |= Memento_Flag_Reported; + Memento_breakpointLocked(); + } + VALGRIND_MAKE_MEM_NOACCESS(memblk, sizeof(Memento_BlkHeader)); + data->preCorrupt = 0; + data->postCorrupt = 0; + data->freeCorrupt = 0; + } + else + memblk->lastCheckedOK = memento.sequence; + return 0; +} +#endif /* MEMENTO_LEAKONLY */ + +static int Memento_checkAllMemoryLocked(void) +{ +#ifndef MEMENTO_LEAKONLY + BlkCheckData data; + + memset(&data, 0, sizeof(data)); + Memento_appBlocks(&memento.used, Memento_Internal_checkAllAlloced, &data); + Memento_appBlocks(&memento.free, Memento_Internal_checkAllFreed, &data); + return data.found; +#else + return 0; +#endif +} + +int Memento_checkAllMemory(void) +{ +#ifndef MEMENTO_LEAKONLY + int ret; + + MEMENTO_LOCK(); + ret = Memento_checkAllMemoryLocked(); + MEMENTO_UNLOCK(); + if (ret & 6) { + Memento_breakpoint(); + return 1; + } + return 0; +#endif +} + +int Memento_setParanoia(int i) +{ + memento.paranoia = i; + if (memento.paranoia > 0) + memento.countdown = memento.paranoia; + else + memento.countdown = -memento.paranoia; + return i; +} + +int Memento_paranoidAt(int i) +{ + memento.paranoidAt = i; + return i; +} + +int Memento_getBlockNum(void *b) +{ + Memento_BlkHeader *memblk; + if (b == NULL) + return 0; + memblk = MEMBLK_FROMBLK(b); + return (memblk->sequence); +} + +int Memento_check(void) +{ + int result; + + fprintf(stderr, "Checking memory\n"); + result = Memento_checkAllMemory(); + fprintf(stderr, "Memory checked!\n"); + return result; +} + +int Memento_find(void *a) +{ + findBlkData data; + int s; + + MEMENTO_LOCK(); + data.addr = a; + data.blk = NULL; + data.flags = 0; + Memento_appBlocks(&memento.used, Memento_containsAddr, &data); + if (data.blk != NULL) { + fprintf(stderr, "Address "FMTP" is in %sallocated block ", + data.addr, + (data.flags == 1 ? "" : (data.flags == 2 ? + "preguard of " : "postguard of "))); + s = showBlock(data.blk, ' '); + fprintf(stderr, "\n"); + MEMENTO_UNLOCK(); + return s; + } + data.blk = NULL; + data.flags = 0; + Memento_appBlocks(&memento.free, Memento_containsAddr, &data); + if (data.blk != NULL) { + fprintf(stderr, "Address "FMTP" is in %sfreed block ", + data.addr, + (data.flags == 1 ? "" : (data.flags == 2 ? + "preguard of " : "postguard of "))); + s = showBlock(data.blk, ' '); + fprintf(stderr, "\n"); + MEMENTO_UNLOCK(); + return s; + } + MEMENTO_UNLOCK(); + return 0; +} + +void Memento_breakOnFree(void *a) +{ + findBlkData data; + + MEMENTO_LOCK(); + data.addr = a; + data.blk = NULL; + data.flags = 0; + Memento_appBlocks(&memento.used, Memento_containsAddr, &data); + if (data.blk != NULL) { + fprintf(stderr, "Will stop when address "FMTP" (in %sallocated block ", + data.addr, + (data.flags == 1 ? "" : (data.flags == 2 ? + "preguard of " : "postguard of "))); + showBlock(data.blk, ' '); + fprintf(stderr, ") is freed\n"); + VALGRIND_MAKE_MEM_DEFINED(data.blk, sizeof(Memento_BlkHeader)); + data.blk->flags |= Memento_Flag_BreakOnFree; + VALGRIND_MAKE_MEM_NOACCESS(data.blk, sizeof(Memento_BlkHeader)); + MEMENTO_UNLOCK(); + return; + } + data.blk = NULL; + data.flags = 0; + Memento_appBlocks(&memento.free, Memento_containsAddr, &data); + if (data.blk != NULL) { + fprintf(stderr, "Can't stop on free; address "FMTP" is in %sfreed block ", + data.addr, + (data.flags == 1 ? "" : (data.flags == 2 ? + "preguard of " : "postguard of "))); + showBlock(data.blk, ' '); + fprintf(stderr, "\n"); + MEMENTO_UNLOCK(); + return; + } + fprintf(stderr, "Can't stop on free; address "FMTP" is not in a known block.\n", a); + MEMENTO_UNLOCK(); +} + +void Memento_breakOnRealloc(void *a) +{ + findBlkData data; + + MEMENTO_LOCK(); + data.addr = a; + data.blk = NULL; + data.flags = 0; + Memento_appBlocks(&memento.used, Memento_containsAddr, &data); + if (data.blk != NULL) { + fprintf(stderr, "Will stop when address "FMTP" (in %sallocated block ", + data.addr, + (data.flags == 1 ? "" : (data.flags == 2 ? + "preguard of " : "postguard of "))); + showBlock(data.blk, ' '); + fprintf(stderr, ") is freed (or realloced)\n"); + VALGRIND_MAKE_MEM_DEFINED(data.blk, sizeof(Memento_BlkHeader)); + data.blk->flags |= Memento_Flag_BreakOnFree | Memento_Flag_BreakOnRealloc; + VALGRIND_MAKE_MEM_NOACCESS(data.blk, sizeof(Memento_BlkHeader)); + MEMENTO_UNLOCK(); + return; + } + data.blk = NULL; + data.flags = 0; + Memento_appBlocks(&memento.free, Memento_containsAddr, &data); + if (data.blk != NULL) { + fprintf(stderr, "Can't stop on free/realloc; address "FMTP" is in %sfreed block ", + data.addr, + (data.flags == 1 ? "" : (data.flags == 2 ? + "preguard of " : "postguard of "))); + showBlock(data.blk, ' '); + fprintf(stderr, "\n"); + MEMENTO_UNLOCK(); + return; + } + fprintf(stderr, "Can't stop on free/realloc; address "FMTP" is not in a known block.\n", a); + MEMENTO_UNLOCK(); +} + +int Memento_failAt(int i) +{ + memento.failAt = i; + if ((memento.sequence > memento.failAt) && + (memento.failing != 0)) + Memento_startFailing(); + return i; +} + +size_t Memento_setMax(size_t max) +{ + memento.maxMemory = max; + return max; +} + +void Memento_startLeaking(void) +{ + memento.leaking++; +} + +void Memento_stopLeaking(void) +{ + memento.leaking--; +} + +int Memento_squeezing(void) +{ + return memento.squeezing; +} + +#endif /* MEMENTO_CPP_EXTRAS_ONLY */ + +#ifdef __cplusplus +/* Dumb overrides for the new and delete operators */ + +void *operator new(size_t size) +{ + void *ret; + + if (!memento.inited) + Memento_init(); + + if (size == 0) + size = 1; + MEMENTO_LOCK(); + ret = do_malloc(size, Memento_EventType_new); + MEMENTO_UNLOCK(); + return ret; +} + +void operator delete(void *pointer) +{ + if (!pointer) + return; + + MEMENTO_LOCK(); + do_free(pointer, Memento_EventType_delete); + MEMENTO_UNLOCK(); +} + +/* Some C++ systems (apparently) don't provide new[] or delete[] + * operators. Provide a way to cope with this */ +#ifndef MEMENTO_CPP_NO_ARRAY_CONSTRUCTORS +void *operator new[](size_t size) +{ + void *ret; + if (!memento.inited) + Memento_init(); + + if (size == 0) + size = 1; + MEMENTO_LOCK(); + ret = do_malloc(size, Memento_EventType_newArray); + MEMENTO_UNLOCK(); + return ret; +} + +void operator delete[](void *pointer) +{ + MEMENTO_LOCK(); + do_free(pointer, Memento_EventType_deleteArray); + MEMENTO_UNLOCK(); +} +#endif /* MEMENTO_CPP_NO_ARRAY_CONSTRUCTORS */ +#endif /* __cplusplus */ + +#else + +/* Just in case anyone has left some debugging code in... */ +void (Memento_breakpoint)(void) +{ +} + +int (Memento_checkBlock)(void *b) +{ + return 0; +} + +int (Memento_checkAllMemory)(void) +{ + return 0; +} + +int (Memento_check)(void) +{ + return 0; +} + +int (Memento_setParanoia)(int i) +{ + return 0; +} + +int (Memento_paranoidAt)(int i) +{ + return 0; +} + +int (Memento_breakAt)(int i) +{ + return 0; +} + +int (Memento_getBlockNum)(void *i) +{ + return 0; +} + +int (Memento_find)(void *a) +{ + return 0; +} + +int (Memento_failAt)(int i) +{ + return 0; +} + +void (Memento_breakOnFree)(void *a) +{ +} + +void (Memento_breakOnRealloc)(void *a) +{ +} + +void *(Memento_takeRef)(void *a) +{ + return a; +} + +void *(Memento_dropRef)(void *a) +{ + return a; +} + +void *(Memento_adjustRef)(void *a, int adjust) +{ + return a; +} + +void *(Memento_reference)(void *a) +{ + return a; +} + +#undef Memento_malloc +#undef Memento_free +#undef Memento_realloc +#undef Memento_calloc +#undef Memento_strdup + +void *Memento_malloc(size_t size) +{ + return MEMENTO_UNDERLYING_MALLOC(size); +} + +void Memento_free(void *b) +{ + MEMENTO_UNDERLYING_FREE(b); +} + +void *Memento_realloc(void *b, size_t s) +{ + return MEMENTO_UNDERLYING_REALLOC(b, s); +} + +void *Memento_calloc(size_t n, size_t s) +{ + return MEMENTO_UNDERLYING_CALLOC(n, s); +} + +/* Avoid calling strdup, in case our compiler doesn't support it. + * Yes, I'm looking at you, early Visual Studios. */ +char *Memento_strdup(const char *s) +{ + size_t len = strlen(s)+1; + char *ret = MEMENTO_UNDERLYING_MALLOC(len); + if (ret != NULL) + memcpy(ret, s, len); + return ret; +} + +/* Avoid calling asprintf, in case our compiler doesn't support it. + * Vaguely unhappy about relying on vsnprintf, but... */ +int Memento_asprintf(char **ret, const char *format, ...) +{ + va_list va; + int n; + int n2; + + va_start(va, format); + n = vsnprintf(NULL, 0, format, va); + va_end(va); + if (n < 0) + return n; + + *ret = MEMENTO_UNDERLYING_MALLOC(n+1); + if (*ret == NULL) + return -1; + + va_start(va, format); + n2 = vsnprintf(*ret, n + 1, format, va); + va_end(va); + + return n2; +} + +/* Avoid calling vasprintf, in case our compiler doesn't support it. + * Vaguely unhappy about relying on vsnprintf, but... */ +int Memento_vasprintf(char **ret, const char *format, va_list ap) +{ + int n; + va_list ap2; + va_copy(ap2, ap); + + n = vsnprintf(NULL, 0, format, ap); + if (n < 0) { + va_end(ap2); + return n; + } + + *ret = MEMENTO_UNDERLYING_MALLOC(n+1); + if (*ret == NULL) { + va_end(ap2); + return -1; + } + + n = vsnprintf(*ret, n + 1, format, ap2); + va_end(ap2); + + return n; +} + +void (Memento_listBlocks)(void) +{ +} + +void (Memento_listNewBlocks)(void) +{ +} + +size_t (Memento_setMax)(size_t max) +{ + return 0; +} + +void (Memento_stats)(void) +{ +} + +void *(Memento_label)(void *ptr, const char *label) +{ + return ptr; +} + +void (Memento_info)(void *addr) +{ +} + +void (Memento_listBlockInfo)(void) +{ +} + +void (Memento_startLeaking)(void) +{ +} + +void (Memento_stopLeaking)(void) +{ +} + +int (Memento_squeezing)(void) +{ + return 0; +} + +#endif diff --git a/extract/src/memento.h b/extract/src/memento.h new file mode 100644 index 00000000..2dc1271d --- /dev/null +++ b/extract/src/memento.h @@ -0,0 +1,343 @@ +/* Copyright (C) 2009-2018 Artifex Software, Inc. + All Rights Reserved. + + This software is provided AS-IS with no warranty, either express or + implied. + + This software is distributed under license and may not be copied, modified + or distributed except as expressly authorized under the terms of that + license. Refer to licensing information at http://www.artifex.com + or contact Artifex Software, Inc., 1305 Grant Avenue - Suite 200, + Novato, CA 94945, U.S.A., +1(415)492-9861, for further information. +*/ + +/* Memento: A library to aid debugging of memory leaks/heap corruption. + * + * Usage (with C): + * First, build your project with MEMENTO defined, and include this + * header file wherever you use malloc, realloc or free. + * This header file will use macros to point malloc, realloc and free to + * point to Memento_malloc, Memento_realloc, Memento_free. + * + * Run your program, and all mallocs/frees/reallocs should be redirected + * through here. When the program exits, you will get a list of all the + * leaked blocks, together with some helpful statistics. You can get the + * same list of allocated blocks at any point during program execution by + * calling Memento_listBlocks(); + * + * Every call to malloc/free/realloc counts as an 'allocation event'. + * On each event Memento increments a counter. Every block is tagged with + * the current counter on allocation. Every so often during program + * execution, the heap is checked for consistency. By default this happens + * after 1024 events, then after 2048 events, then after 4096 events, etc. + * This can be changed at runtime by using Memento_setParanoia(int level). + * 0 turns off such checking, 1 sets checking to happen on every event, + * any positive number n sets checking to happen once every n events, + * and any negative number n sets checking to happen after -n events, then + * after -2n events etc. + * + * The default paranoia level is therefore -1024. + * + * Memento keeps blocks around for a while after they have been freed, and + * checks them as part of these heap checks to see if they have been + * written to (or are freed twice etc). + * + * A given heap block can be checked for consistency (it's 'pre' and + * 'post' guard blocks are checked to see if they have been written to) + * by calling Memento_checkBlock(void *blockAddress); + * + * A check of all the memory can be triggered by calling Memento_check(); + * (or Memento_checkAllMemory(); if you'd like it to be quieter). + * + * A good place to breakpoint is Memento_breakpoint, as this will then + * trigger your debugger if an error is detected. This is done + * automatically for debug windows builds. + * + * If a block is found to be corrupt, information will be printed to the + * console, including the address of the block, the size of the block, + * the type of corruption, the number of the block and the event on which + * it last passed a check for correctness. + * + * If you rerun, and call Memento_paranoidAt(int event); with this number + * the code will wait until it reaches that event and then start + * checking the heap after every allocation event. Assuming it is a + * deterministic failure, you should then find out where in your program + * the error is occurring (between event x-1 and event x). + * + * Then you can rerun the program again, and call + * Memento_breakAt(int event); and the program will call + * Memento_Breakpoint() when event x is reached, enabling you to step + * through. + * + * Memento_find(address) will tell you what block (if any) the given + * address is in. + * + * An example: + * Suppose we have a gs invocation that crashes with memory corruption. + * * Build with -DMEMENTO. + * * In your debugger put a breakpoint on Memento_breakpoint. + * * Run the program. It will stop in Memento_inited. + * * Execute Memento_setParanoia(1); (In VS use Ctrl-Alt-Q). (Note #1) + * * Continue execution. + * * It will detect the memory corruption on the next allocation event + * after it happens, and stop in Memento_breakpoint. The console should + * show something like: + * + * Freed blocks: + * 0x172e610(size=288,num=1415) index 256 (0x172e710) onwards corrupted + * Block last checked OK at allocation 1457. Now 1458. + * + * * This means that the block became corrupted between allocation 1457 + * and 1458 - so if we rerun and stop the program at 1457, we can then + * step through, possibly with a data breakpoint at 0x172e710 and see + * when it occurs. + * * So restart the program from the beginning. When we stop after + * initialisation execute Memento_breakAt(1457); (and maybe + * Memento_setParanoia(1), or Memento_setParanoidAt(1457)) + * * Continue execution until we hit Memento_breakpoint. + * * Now you can step through and watch the memory corruption happen. + * + * Note #1: Using Memento_setParanoia(1) can cause your program to run + * very slowly. You may instead choose to use Memento_setParanoia(100) + * (or some other figure). This will only exhaustively check memory on + * every 100th allocation event. This trades speed for the size of the + * average allocation event range in which detection of memory corruption + * occurs. You may (for example) choose to run once checking every 100 + * allocations and discover that the corruption happens between events + * X and X+100. You can then rerun using Memento_paranoidAt(X), and + * it'll only start exhaustively checking when it reaches X. + * + * More than one memory allocator? + * + * If you have more than one memory allocator in the system (like for + * instance the ghostscript chunk allocator, that builds on top of the + * standard malloc and returns chunks itself), then there are some things + * to note: + * + * * If the secondary allocator gets its underlying blocks from calling + * malloc, then those will be checked by Memento, but 'subblocks' that + * are returned to the secondary allocator will not. There is currently + * no way to fix this other than trying to bypass the secondary + * allocator. One way I have found to do this with the chunk allocator + * is to tweak its idea of a 'large block' so that it puts every + * allocation in its own chunk. Clearly this negates the point of having + * a secondary allocator, and is therefore not recommended for general + * use. + * + * * Again, if the secondary allocator gets its underlying blocks from + * calling malloc (and hence Memento) leak detection should still work + * (but whole blocks will be detected rather than subblocks). + * + * * If on every allocation attempt the secondary allocator calls into + * Memento_failThisEvent(), and fails the allocation if it returns true + * then more useful features can be used; firstly memory squeezing will + * work, and secondly, Memento will have a "finer grained" paranoia + * available to it. + * + * Usage with C++: + * + * Memento has some experimental code in it to trap new/delete (and + * new[]/delete[] if required) calls. + * + * In order for this to work, either: + * + * 1) Build memento.c with the c++ compiler. + * + * or + * + * 2) Build memento.c as normal with the C compiler, then from any + * one of your .cpp files, do: + * + * #define MEMENTO_CPP_EXTRAS_ONLY + * #include "memento.c" + * + * In the case where MEMENTO is not defined, this will not do anything. + * + * Both Windows and GCC provide separate new[] and delete[] operators + * for arrays. Apparently some systems do not. If this is the case for + * your system, define MEMENTO_CPP_NO_ARRAY_CONSTRUCTORS. + * + * "libbacktrace.so failed to load" + * + * In order to give nice backtraces on unix, Memento will try to use + * a libbacktrace dynamic library. If it can't find it, you'll see + * that warning, and your backtraces won't include file/line information. + * + * To fix this you'll need to build your own libbacktrace. Don't worry + * it's really easy: + * git clone git://github.com/ianlancetaylor/libbacktrace + * cd libbacktrace + * ./configure + * make + * + * This leaves the build .so as .libs/libbacktrace.so + * + * Memento will look for this on LD_LIBRARY_PATH, or in /opt/lib/, + * or in /lib/, or in /usr/lib/, or in /usr/local/lib/. I recommend + * using /opt/lib/ as this won't conflict with anything that you + * get via a package manager like apt. + * + * sudo mkdir /opt + * sudo mkdir /opt/lib + * sudo cp .libs/libbacktrace.so /opt/lib/ + */ + +#ifndef MEMENTO_H + +#include <stdlib.h> +#include <stdarg.h> + +#define MEMENTO_H + +#ifndef MEMENTO_UNDERLYING_MALLOC +#define MEMENTO_UNDERLYING_MALLOC malloc +#endif +#ifndef MEMENTO_UNDERLYING_FREE +#define MEMENTO_UNDERLYING_FREE free +#endif +#ifndef MEMENTO_UNDERLYING_REALLOC +#define MEMENTO_UNDERLYING_REALLOC realloc +#endif +#ifndef MEMENTO_UNDERLYING_CALLOC +#define MEMENTO_UNDERLYING_CALLOC calloc +#endif + +#ifndef MEMENTO_MAXALIGN +#define MEMENTO_MAXALIGN (sizeof(int)) +#endif + +#define MEMENTO_PREFILL 0xa6 +#define MEMENTO_POSTFILL 0xa7 +#define MEMENTO_ALLOCFILL 0xa8 +#define MEMENTO_FREEFILL 0xa9 + +#define MEMENTO_FREELIST_MAX 0x2000000 + +int Memento_checkBlock(void *); +int Memento_checkAllMemory(void); +int Memento_check(void); + +int Memento_setParanoia(int); +int Memento_paranoidAt(int); +int Memento_breakAt(int); +void Memento_breakOnFree(void *a); +void Memento_breakOnRealloc(void *a); +int Memento_getBlockNum(void *); +int Memento_find(void *a); +void Memento_breakpoint(void); +int Memento_failAt(int); +int Memento_failThisEvent(void); +void Memento_listBlocks(void); +void Memento_listNewBlocks(void); +size_t Memento_setMax(size_t); +void Memento_stats(void); +void *Memento_label(void *, const char *); +void Memento_tick(void); + +void *Memento_malloc(size_t s); +void *Memento_realloc(void *, size_t s); +void Memento_free(void *); +void *Memento_calloc(size_t, size_t); +char *Memento_strdup(const char*); +int Memento_asprintf(char **ret, const char *format, ...); +int Memento_vasprintf(char **ret, const char *format, va_list ap); + +void Memento_info(void *addr); +void Memento_listBlockInfo(void); +void *Memento_takeByteRef(void *blk); +void *Memento_dropByteRef(void *blk); +void *Memento_takeShortRef(void *blk); +void *Memento_dropShortRef(void *blk); +void *Memento_takeIntRef(void *blk); +void *Memento_dropIntRef(void *blk); +void *Memento_takeRef(void *blk); +void *Memento_dropRef(void *blk); +void *Memento_adjustRef(void *blk, int adjust); +void *Memento_reference(void *blk); + +int Memento_checkPointerOrNull(void *blk); +int Memento_checkBytePointerOrNull(void *blk); +int Memento_checkShortPointerOrNull(void *blk); +int Memento_checkIntPointerOrNull(void *blk); + +void Memento_startLeaking(void); +void Memento_stopLeaking(void); + +/* Returns number of allocation events so far. */ +int Memento_sequence(void); + +/* Returns non-zero if our process was forked by Memento squeeze. */ +int Memento_squeezing(void); + +void Memento_fin(void); + +void Memento_bt(void); + +#ifdef MEMENTO + +#ifndef COMPILING_MEMENTO_C +#define malloc Memento_malloc +#define free Memento_free +#define realloc Memento_realloc +#define calloc Memento_calloc +#define strdup Memento_strdup +#define asprintf Memento_asprintf +#define vasprintf Memento_vasprintf +#endif + +#else + +#define Memento_malloc MEMENTO_UNDERLYING_MALLOC +#define Memento_free MEMENTO_UNDERLYING_FREE +#define Memento_realloc MEMENTO_UNDERLYING_REALLOC +#define Memento_calloc MEMENTO_UNDERLYING_CALLOC +#define Memento_strdup strdup +#define Memento_asprintf asprintf +#define Memento_vasprintf vasprintf + +#define Memento_checkBlock(A) 0 +#define Memento_checkAllMemory() 0 +#define Memento_check() 0 +#define Memento_setParanoia(A) 0 +#define Memento_paranoidAt(A) 0 +#define Memento_breakAt(A) 0 +#define Memento_breakOnFree(A) 0 +#define Memento_breakOnRealloc(A) 0 +#define Memento_getBlockNum(A) 0 +#define Memento_find(A) 0 +#define Memento_breakpoint() do {} while (0) +#define Memento_failAt(A) 0 +#define Memento_failThisEvent() 0 +#define Memento_listBlocks() do {} while (0) +#define Memento_listNewBlocks() do {} while (0) +#define Memento_setMax(A) 0 +#define Memento_stats() do {} while (0) +#define Memento_label(A,B) (A) +#define Memento_info(A) do {} while (0) +#define Memento_listBlockInfo() do {} while (0) +#define Memento_takeByteRef(A) (A) +#define Memento_dropByteRef(A) (A) +#define Memento_takeShortRef(A) (A) +#define Memento_dropShortRef(A) (A) +#define Memento_takeIntRef(A) (A) +#define Memento_dropIntRef(A) (A) +#define Memento_takeRef(A) (A) +#define Memento_dropRef(A) (A) +#define Memento_adjustRef(A,V) (A) +#define Memento_reference(A) (A) +#define Memento_checkPointerOrNull(A) 0 +#define Memento_checkBytePointerOrNull(A) 0 +#define Memento_checkShortPointerOrNull(A) 0 +#define Memento_checkIntPointerOrNull(A) 0 + +#define Memento_tick() do {} while (0) +#define Memento_startLeaking() do {} while (0) +#define Memento_stopLeaking() do {} while (0) +#define Memento_fin() do {} while (0) +#define Memento_bt() do {} while (0) +#define Memento_sequence() (0) +#define Memento_squeezing() (0) + +#endif /* MEMENTO */ + +#endif /* MEMENTO_H */ diff --git a/extract/src/memento.py b/extract/src/memento.py new file mode 100755 index 00000000..987cd4fd --- /dev/null +++ b/extract/src/memento.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 + +''' +Post-processor for Memento. + +Args: + -q <quiet> + Controls how often we output 'Memory squeezing @ ...' lines. E.g. '-q + 10' outputs for multiples of 10. +''' + +import os +import re +import sys + + +def main(): + quiet = 1 + out_raw = None + args = iter(sys.argv[1:]) + while 1: + try: + arg = next(args) + except StopIteration: + break + if arg == '-h': + print(__doc__) + elif arg == '-o': + out_raw = open(next(args), 'w') + elif arg == '-q': + quiet = int(next(args)) + else: + raise Exception(f'unrecognised arg: {arg}') + + openbsd = os.uname()[0] == 'OpenBSD' + n = None + segv = 0 + leaks = 0 + lines = [] + for line in sys.stdin: + if out_raw: + out_raw.write(line) + m = re.match('^Memory squeezing @ ([0-9]+)( complete)?', line) + if m: + if not m.group(2): + # Start of squeeze. + + if not openbsd: + # Looks like memento's forked processes might terminate + # before they get to output the 'Memory squeezing @ <N> + # complete' line. + # + assert n is None, f'n={n} line={line!r}' + + n = int(m.group(1)) + if n % quiet == 0: + sys.stdout.write(line) + sys.stdout.flush() + else: + # End of squeeze. + assert n == int(m.group(1)) + # Output info about any failure: + if segv or leaks: + print(f'Failure at squeeze {n}: segv={segv} leaks={leaks}:') + for l in lines: + if l.endswith('\n'): + l = l[:-1] + print(f' {l}') + lines = [] + segv = 0 + leaks = 0 + n = None + else: + if n is not None: + lines.append(line) + if line.startswith('SEGV at:'): + segv = 1 + if line.startswith('Allocated blocks'): + leaks = 1 + + +if __name__ == '__main__': + main() diff --git a/extract/src/misc-test.c b/extract/src/misc-test.c new file mode 100644 index 00000000..58b098ff --- /dev/null +++ b/extract/src/misc-test.c @@ -0,0 +1,86 @@ +#include "memento.h" +#include "xml.h" + +#include <errno.h> +#include <stdio.h> + + +static int s_num_fails = 0; + +static void s_check( + int values_equal, + const char* text, + int ret, + const char* value_s, + int errno_, + const char* value_expected_s, + int errno_expected + ) +{ + int ok; + if (errno_expected) { + ok = (ret == -1 && errno_ == errno_expected); + } + else { + ok = (ret == 0 && values_equal); + } + + if (ok) printf(" ok: "); + else printf(" fail:"); + printf(" text=%16s", text); + if (errno_expected) printf(" errno_expected=%6i", errno_expected); + else printf(" value_expected=%6s", value_expected_s); + printf(". result: ret=%2i value=%6s errno=%3i", ret, value_s, errno_); + printf(".\n"); + if (!ok) s_num_fails += 1; +} + +static void s_check_int(const char* text, int value_expected, int expected_errno) +{ + int value; + int ret = extract_xml_str_to_int(text, &value); + char value_s[32]; + char value_expected_s[32]; + snprintf(value_s, sizeof(value_s), "%i", value); + snprintf(value_expected_s, sizeof(value_expected_s), "%i", value_expected); + s_check(value == value_expected, text, ret, value_s, errno, value_expected_s, expected_errno); + return; +} + +static void s_check_uint(const char* text, unsigned expected_value, int expected_errno) +{ + unsigned value; + int ret = extract_xml_str_to_uint(text, &value); + char value_s[32]; + char value_expected_s[32]; + snprintf(value_s, sizeof(value_s), "%u", value); + snprintf(value_expected_s, sizeof(value_expected_s), "%u", value); + s_check(value == expected_value, text, ret, value_s, errno, value_expected_s, expected_errno); + return; +} + +int main(void) +{ + printf("testing extract_xml_str_to_int():\n"); + s_check_int("2", 2, 0); + s_check_int("-20", -20, 0); + s_check_int("-20b", 0, EINVAL); + s_check_int("123456789123", 0, ERANGE); + + printf("testing extract_xml_str_to_uint():\n"); + s_check_uint("2", 2, 0); + s_check_uint("-20", 0, ERANGE); + s_check_uint("-20b", 0, EINVAL); + s_check_uint("123456789123", 0, ERANGE); + + printf("s_num_fails=%i\n", s_num_fails); + + if (s_num_fails) { + printf("Failed\n"); + return 1; + } + else { + printf("Succeeded\n"); + return 0; + } +} diff --git a/extract/src/outf.c b/extract/src/outf.c new file mode 100644 index 00000000..95575c16 --- /dev/null +++ b/extract/src/outf.c @@ -0,0 +1,42 @@ +#include "memento.h" +#include "outf.h" + +#include <stdarg.h> +#include <stdio.h> +#include <string.h> + +static int s_verbose = 0; + +void outf_verbose_set(int verbose) +{ + s_verbose = verbose; +} + +void (outf)( + int level, + const char* file, + int line, + const char* fn, + int ln, + const char* format, + ... + ) +{ + va_list va; + if (level > s_verbose) { + return; + } + + if (ln) { + fprintf(stderr, "%s:%i:%s: ", file, line, fn); + } + va_start(va, format); + vfprintf(stderr, format, va); + va_end(va); + if (ln) { + size_t len = strlen(format); + if (len == 0 || format[len-1] != '\n') { + fprintf(stderr, "\n"); + } + } +} diff --git a/extract/src/outf.h b/extract/src/outf.h new file mode 100644 index 00000000..a2b6c078 --- /dev/null +++ b/extract/src/outf.h @@ -0,0 +1,32 @@ +#ifndef ARTIFEX_EXTRACT_OUTF_H +#define ARTIFEX_EXTRACT_OUTF_H + +/* Only for internal use by extract code. */ + +void (outf)( + int level, + const char* file, int line, + const char* fn, + int ln, + const char* format, + ... + ); +/* Outputs text if <level> is less than or equal to verbose value set by +outf_level_set(). */ + +#define outf(format, ...) \ + (outf)(1, __FILE__, __LINE__, __FUNCTION__, 1 /*ln*/, format, ##__VA_ARGS__) + +#define outf0(format, ...) \ + (outf)(0, __FILE__, __LINE__, __FUNCTION__, 1 /*ln*/, format, ##__VA_ARGS__) + +#define outfx(format, ...) + +/* Simple printf-style debug output. */ + +#define outfx(format, ...) + +void outf_verbose_set(int verbose); +/* Set verbose value. Higher values are more verbose. Initial value is 0. */ + +#endif diff --git a/extract/src/template.docx b/extract/src/template.docx Binary files differnew file mode 100644 index 00000000..8ad94155 --- /dev/null +++ b/extract/src/template.docx diff --git a/extract/src/xml.c b/extract/src/xml.c new file mode 100644 index 00000000..8dab511b --- /dev/null +++ b/extract/src/xml.c @@ -0,0 +1,505 @@ +#include "../include/extract_alloc.h" + +#include "mem.h" +#include "memento.h" +#include "outf.h" +#include "xml.h" + +#include <assert.h> +#include <errno.h> +#include <float.h> +#include <limits.h> + +#ifdef _MSC_VER + #include "compat_stdint.h" + #include "compat_strtoll.h" +#else + #include <stdint.h> +#endif + +#include <stdlib.h> +#include <string.h> + + +/* These str_*() functions realloc buffer as required. All return 0 or -1 with +errno set. */ + +/* Appends first <s_len> chars of string <s> to *p. */ +static int str_catl(extract_alloc_t* alloc, char** p, const char* s, int s_len) +{ + size_t p_len = (*p) ? strlen(*p) : 0; + if (extract_realloc2( + alloc, + p, + p_len + 1, + p_len + s_len + 1 + )) return -1; + memcpy(*p + p_len, s, s_len); + (*p)[p_len + s_len] = 0; + return 0; +} + +/* Appends a char. */ +static int str_catc(extract_alloc_t* alloc, char** p, char c) +{ + return str_catl(alloc, p, &c, 1); +} + +/* Unused but usefult o keep code here. */ +#if 0 +/* Appends a string. */ +static int str_cat(extract_alloc_t* alloc, char** p, const char* s) +{ + return str_catl(alloc, p, s, strlen(s)); +} +#endif + +char* extract_xml_tag_attributes_find(extract_xml_tag_t* tag, const char* name) +{ + int i; + for (i=0; i<tag->attributes_num; ++i) { + if (!strcmp(tag->attributes[i].name, name)) { + char* ret = tag->attributes[i].value; + return ret; + } + } + outf("Failed to find attribute '%s'",name); + return NULL; +} + +int extract_xml_tag_attributes_find_float( + extract_xml_tag_t* tag, + const char* name, + float* o_out + ) +{ + const char* value = extract_xml_tag_attributes_find(tag, name); + if (!value) { + errno = ESRCH; + return -1; + } + if (extract_xml_str_to_float(value, o_out)) return -1; + return 0; +} + +int extract_xml_tag_attributes_find_double( + extract_xml_tag_t* tag, + const char* name, + double* o_out + ) +{ + const char* value = extract_xml_tag_attributes_find(tag, name); + if (!value) { + errno = ESRCH; + return -1; + } + if (extract_xml_str_to_double(value, o_out)) return -1; + return 0; +} + +int extract_xml_tag_attributes_find_int( + extract_xml_tag_t* tag, + const char* name, + int* o_out + ) +{ + const char* text = extract_xml_tag_attributes_find(tag, name); + return extract_xml_str_to_int(text, o_out); +} + +int extract_xml_tag_attributes_find_uint( + extract_xml_tag_t* tag, + const char* name, + unsigned* o_out + ) +{ + const char* text = extract_xml_tag_attributes_find(tag, name); + return extract_xml_str_to_uint(text, o_out); +} + +int extract_xml_tag_attributes_find_size( + extract_xml_tag_t* tag, + const char* name, + size_t* o_out + ) +{ + const char* text = extract_xml_tag_attributes_find(tag, name); + return extract_xml_str_to_size(text, o_out); +} + +int extract_xml_str_to_llint(const char* text, long long* o_out) +{ + char* endptr; + long long x; + if (!text) { + errno = ESRCH; + return -1; + } + if (text[0] == 0) { + errno = EINVAL; + return -1; + } + errno = 0; + x = strtoll(text, &endptr, 10 /*base*/); + if (errno) { + return -1; + } + if (*endptr) { + errno = EINVAL; + return -1; + } + *o_out = x; + return 0; +} + +int extract_xml_str_to_ullint(const char* text, unsigned long long* o_out) +{ + char* endptr; + unsigned long long x; + if (!text) { + errno = ESRCH; + return -1; + } + if (text[0] == 0) { + errno = EINVAL; + return -1; + } + errno = 0; + x = strtoull(text, &endptr, 10 /*base*/); + if (errno) { + return -1; + } + if (*endptr) { + errno = EINVAL; + return -1; + } + *o_out = x; + return 0; +} + +int extract_xml_str_to_int(const char* text, int* o_out) +{ + long long x; + if (extract_xml_str_to_llint(text, &x)) return -1; + if (x > INT_MAX || x < INT_MIN) { + errno = ERANGE; + return -1; + } + *o_out = (int) x; + return 0; +} + +int extract_xml_str_to_uint(const char* text, unsigned* o_out) +{ + unsigned long long x; + if (extract_xml_str_to_ullint(text, &x)) return -1; + if (x > UINT_MAX) { + errno = ERANGE; + return -1; + } + *o_out = (unsigned) x; + return 0; +} + +int extract_xml_str_to_size(const char* text, size_t* o_out) +{ + unsigned long long x; + if (extract_xml_str_to_ullint(text, &x)) return -1; + if (x > SIZE_MAX) { + errno = ERANGE; + return -1; + } + *o_out = (size_t) x; + return 0; +} + +int extract_xml_str_to_double(const char* text, double* o_out) +{ + char* endptr; + double x; + if (!text) { + errno = ESRCH; + return -1; + } + if (text[0] == 0) { + errno = EINVAL; + return -1; + } + errno = 0; + x = strtod(text, &endptr); + if (errno) { + return -1; + } + if (*endptr) { + errno = EINVAL; + return -1; + } + *o_out = x; + return 0; +} + +int extract_xml_str_to_float(const char* text, float* o_out) +{ + double x; + if (extract_xml_str_to_double(text, &x)) { + return -1; + } + if (x > FLT_MAX || x < -FLT_MAX) { + errno = ERANGE; + return -1; + } + *o_out = (float) x; + return 0; +} + +static int extract_xml_tag_attributes_append( + extract_alloc_t* alloc, + extract_xml_tag_t* tag, + char* name, + char* value + ) +{ + if (extract_realloc2( + alloc, + &tag->attributes, + sizeof(extract_xml_attribute_t) * tag->attributes_num, + sizeof(extract_xml_attribute_t) * (tag->attributes_num+1) + )) return -1; + tag->attributes[tag->attributes_num].name = name; + tag->attributes[tag->attributes_num].value = value; + tag->attributes_num += 1; + return 0; +} + +void extract_xml_tag_init(extract_xml_tag_t* tag) +{ + tag->name = NULL; + tag->attributes = NULL; + tag->attributes_num = 0; + extract_astring_init(&tag->text); +} + +void extract_xml_tag_free(extract_alloc_t* alloc, extract_xml_tag_t* tag) +{ + int i; + extract_free(alloc, &tag->name); + for (i=0; i<tag->attributes_num; ++i) { + extract_xml_attribute_t* attribute = &tag->attributes[i]; + extract_free(alloc, &attribute->name); + extract_free(alloc, &attribute->value); + } + extract_free(alloc, &tag->attributes); + extract_astring_free(alloc, &tag->text); + extract_xml_tag_init(tag); +} + +/* Unused but useful to keep code here. */ +#if 0 +/* Like strcmp() but also handles NULL. */ +static int extract_xml_strcmp_null(const char* a, const char* b) +{ + if (!a && !b) return 0; + if (!a) return -1; + if (!b) return 1; + return strcmp(a, b); +} +#endif + +/* Unused but usefult o keep code here. */ +#if 0 +/* Compares tag name, then attributes; returns -1, 0 or +1. Does not compare +extract_xml_tag_t::text members. */ +int extract_xml_compare_tags(const extract_xml_tag_t* lhs, const extract_xml_tag_t* rhs) +{ + int d; + int i; + d = extract_xml_strcmp_null(lhs->name, rhs->name); + if (d) return d; + for(i=0;; ++i) { + if (i >= lhs->attributes_num || i >= rhs->attributes_num) { + break; + } + const extract_xml_attribute_t* lhs_attribute = &lhs->attributes[i]; + const extract_xml_attribute_t* rhs_attribute = &rhs->attributes[i]; + d = extract_xml_strcmp_null(lhs_attribute->name, rhs_attribute->name); + if (d) return d; + d = extract_xml_strcmp_null(lhs_attribute->value, rhs_attribute->value); + if (d) return d; + } + if (lhs->attributes_num > rhs->attributes_num) return +1; + if (lhs->attributes_num < rhs->attributes_num) return -1; + return 0; +} +#endif + + +int extract_xml_pparse_init(extract_alloc_t* alloc, extract_buffer_t* buffer, const char* first_line) +{ + char* first_line_buffer = NULL; + int e = -1; + + if (first_line) { + size_t first_line_len = strlen(first_line); + size_t actual; + if (extract_malloc(alloc, &first_line_buffer, first_line_len + 1)) goto end; + + if (extract_buffer_read(buffer, first_line_buffer, first_line_len, &actual)) { + outf("error: failed to read first line."); + goto end; + } + first_line_buffer[actual] = 0; + if (strcmp(first_line, first_line_buffer)) { + outf("Unrecognised prefix: ", first_line_buffer); + errno = ESRCH; + goto end; + } + } + + for(;;) { + char c; + int ee = extract_buffer_read(buffer, &c, 1, NULL); + if (ee) { + if (ee==1) errno = ESRCH; /* EOF. */ + goto end; + } + if (c == '<') { + break; + } + else if (c == ' ' || c == '\n') {} + else { + outf("Expected '<' but found c=%i", c); + goto end; + } + } + e = 0; + + end: + extract_free(alloc, &first_line_buffer); + return e; +} + +static int s_next(extract_buffer_t* buffer, int* ret, char* o_c) +/* Reads next char, but if EOF sets *ret=+1, errno=ESRCH and returns +1. */ +{ + int e = extract_buffer_read(buffer, o_c, 1, NULL); + if (e == +1) { + *ret = +1; + errno = ESRCH; + } + return e; +} + +static const char* extract_xml_tag_string(extract_alloc_t* alloc, extract_xml_tag_t* tag) +{ + static char* buffer = NULL; + extract_free(alloc, &buffer); + extract_asprintf(alloc, &buffer, "<name=%s>", tag->name ? tag->name : ""); + return buffer; +} + +int extract_xml_pparse_next(extract_buffer_t* buffer, extract_xml_tag_t* out) +{ + int ret = -1; + char* attribute_name = NULL; + char* attribute_value = NULL; + char c; + int i; + extract_alloc_t* alloc = extract_buffer_alloc(buffer); + + if (0) outf("out is: %s", extract_xml_tag_string(extract_buffer_alloc(buffer), out)); + assert(buffer); + extract_xml_tag_free(alloc, out); + + /* Read tag name. */ + for( i=0;; ++i) { + int e = extract_buffer_read(buffer, &c, 1, NULL); + if (e) { + if (e == +1) ret = 1; /* EOF is not an error here. */ + goto end; + } + if (c == '>' || c == ' ') break; + if (str_catc(alloc, &out->name, c)) goto end; + } + if (c == ' ') { + + /* Read attributes. */ + for(;;) { + + /* Read attribute name. */ + for(;;) { + if (s_next(buffer, &ret, &c)) goto end; + if (c == '=' || c == '>' || c == ' ') break; + if (str_catc(alloc, &attribute_name, c)) goto end; + } + if (c == '>') break; + + if (c == '=') { + /* Read attribute value. */ + int quote_single = 0; + int quote_double = 0; + size_t l; + for(;;) { + if (s_next(buffer, &ret, &c)) goto end; + if (c == '\'') quote_single = !quote_single; + else if (c == '"') quote_double = !quote_double; + else if (!quote_single && !quote_double + && (c == ' ' || c == '/' || c == '>') + ) { + /* We are at end of attribute value. */ + break; + } + else if (c == '\\') { + // Escape next character. + if (s_next(buffer, &ret, &c)) goto end; + } + if (str_catc(alloc, &attribute_value, c)) goto end; + } + + /* Remove any enclosing quotes. */ + l = strlen(attribute_value); + if (l >= 2) { + if ( + (attribute_value[0] == '"' && attribute_value[l-1] == '"') + || + (attribute_value[0] == '\'' && attribute_value[l-1] == '\'') + ) { + memmove(attribute_value, attribute_value+1, l-2); + attribute_value[l-2] = 0; + } + } + } + + if (extract_xml_tag_attributes_append(alloc, out, attribute_name, attribute_value)) goto end; + attribute_name = NULL; + attribute_value = NULL; + if (c == '/') { + if (s_next(buffer, &ret, &c)) goto end; + } + if (c == '>') break; + } + } + + /* Read plain text until next '<'. */ + for(;;) { + /* We don't use s_next() here because EOF is not an error. */ + int e = extract_buffer_read(buffer, &c, 1, NULL); + if (e == +1) { + break; /* EOF is not an error here. */ + } + if (e) goto end; + if (c == '<') break; + if (extract_astring_catc(alloc, &out->text, c)) goto end; + } + + ret = 0; + + end: + + extract_free(alloc, &attribute_name); + extract_free(alloc, &attribute_value); + if (ret) { + extract_xml_tag_free(alloc, out); + } + return ret; +} + diff --git a/extract/src/xml.h b/extract/src/xml.h new file mode 100644 index 00000000..d11fd886 --- /dev/null +++ b/extract/src/xml.h @@ -0,0 +1,123 @@ +#ifndef ARTIFEX_EXTRACT_XML +#define ARTIFEX_EXTRACT_XML + +/* Only for internal use by extract code. */ + +#include "../include/extract_buffer.h" + +#include "astring.h" + + +/* Things for representing XML. */ + +typedef struct { + char* name; + char* value; +} extract_xml_attribute_t; + +/* Represents a single <...> XML tag plus trailing text. */ +typedef struct { + char* name; + extract_xml_attribute_t* attributes; + int attributes_num; + extract_astring_t text; +} extract_xml_tag_t; + + +void extract_xml_tag_init(extract_xml_tag_t* tag); +/* Initialises tag. Will cause leak if tag contains data - in this case call +extract_xml_tag_free(). */ + +void extract_xml_tag_free(extract_alloc_t* alloc, extract_xml_tag_t* tag); +/* Frees tag and then calls extract_xml_tag_init(). */ + + +int extract_xml_pparse_init(extract_alloc_t* alloc, extract_buffer_t* buffer, const char* first_line); +/* extract_xml_pparse_*(): simple XML 'pull' parser. + +extract_xml_pparse_init() merely consumes the initial '<'. Thereafter +extract_xml_pparse_next() consumes the next '<' before returning the previous +tag. */ + +/* Opens specified file. + +If first_line is not NULL, we check that it matches the first line in the file. + +Returns -1 with errno=ESRCH if we fail to read the first '<' due to EOF. +*/ + + +int extract_xml_pparse_next(extract_buffer_t* buffer, extract_xml_tag_t* out); +/* Returns the next XML tag. + +Returns 0 with *out containing next tag; or -1 with errno set if error; or +1 +with errno=ESRCH if EOF. + +*out is initially passed to extract_xml_tag_free(), so *out must have been +initialised, e.g. by by extract_xml_tag_init(). */ + + +char* extract_xml_tag_attributes_find(extract_xml_tag_t* tag, const char* name); +/* Returns pointer to value of specified attribute, or NULL if not found. */ + +int extract_xml_tag_attributes_find_float( + extract_xml_tag_t* tag, + const char* name, + float* o_out + ); +/* Finds float value of specified attribute, returning error if not found or +there is trailing text. */ + +int extract_xml_tag_attributes_find_double( + extract_xml_tag_t* tag, + const char* name, + double* o_out + ); +/* Finds double value of specified attribute, returning error if not found or there is +trailing text. */ + + +/* Next few functions write to out-param and return zero on success, else +return -1 with errno set. + +An error is returned if value is out of range or there is any trailing text. */ + +int extract_xml_str_to_llint(const char* text, long long* o_out); + +int extract_xml_str_to_ullint(const char* text, unsigned long long* o_out); + +int extract_xml_str_to_int(const char* text, int* o_out); + +int extract_xml_str_to_uint(const char* text, unsigned* o_out); + +int extract_xml_str_to_size(const char* text, size_t* o_out); + +int extract_xml_str_to_double(const char* text, double* o_out); + +int extract_xml_str_to_float(const char* text, float* o_out); + + +int extract_xml_tag_attributes_find_int( + extract_xml_tag_t* tag, + const char* name, + int* o_out + ); +/* Finds int value of specified attribute, returning error if not found. */ + +int extract_xml_tag_attributes_find_uint( + extract_xml_tag_t* tag, + const char* name, + unsigned* o_out + ); +/* Finds unsigned int value of specified attribute, returning error if not +found. */ + +int extract_xml_tag_attributes_find_size( + extract_xml_tag_t* tag, + const char* name, + size_t* o_out + ); +/* Finds unsigned int value of specified attribute, returning error if not +found. */ + +#endif diff --git a/extract/src/zip-test.c b/extract/src/zip-test.c new file mode 100644 index 00000000..67082342 --- /dev/null +++ b/extract/src/zip-test.c @@ -0,0 +1,224 @@ +/* Crude programme to show detailed information about a zip file. */ + +#include "memento.h" +#include "outf.h" + +#include <assert.h> +#include <ctype.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + + +static int s_native_little_endinesss(void) +{ + static const char a[] = { 1, 2}; + uint16_t b = *(uint16_t*) a; + if (b == 1 + 2*256) { + /* Native little-endiness. */ + return 1; + } + else if (b == 2 + 1*256) { + return 0; + } + abort(); +} + + +static int s_show(const char* filename) +{ + outf("Looking at filename=%s", filename); + assert(s_native_little_endinesss()); + FILE* f = fopen(filename, "r"); + assert(f); + size_t datasize = 10*1000*1000; + char* data = extract_malloc(datasize); + assert(data); + size_t n = fread(data, 1, datasize, f); + assert(n < datasize); + datasize = n; + outf("datasize=%zi", datasize); + fclose(f); + + /* look for End of central directory (EOCD) record. */ + uint32_t magic = 0x06054b50; + char* pos = data + datasize - 22; + for(;;) { + if (!memcmp(pos, &magic, sizeof(magic))) break; + assert(pos > data); + pos -= 1; + } + outf("found EOCD at offset=%li", pos-data); + uint16_t disk_number = *(uint16_t*)(pos+4); + uint16_t disk_cd = *(uint16_t*)(pos+6); + uint16_t num_records_on_disk = *(uint16_t*)(pos+8); + uint16_t num_records = *(uint16_t*)(pos+10); + uint32_t size_cd = *(uint32_t*)(pos+12); + uint32_t offset_cd = *(uint32_t*)(pos+16); + uint16_t comment_length = *(uint16_t*)(pos+20); + char* comment = extract_malloc(comment_length + 1); + assert(comment); + memcpy(comment, pos+22, comment_length); + comment[comment_length] = 0; + assert(strlen(comment) == comment_length); + outf(" EOCD:"); + outf(" disk_number=%i", disk_number); + outf(" disk_cd=%i", disk_cd); + outf(" num_records_on_disk=%i", num_records_on_disk); + outf(" num_records=%i", num_records); + outf(" size_cd=%i", size_cd); + outf(" offset_cd=%i", offset_cd); + outf(" comment_length=%i", comment_length); + outf(" comment=%s", comment); + + if (pos != data + datasize - 22 - comment_length) { + outf("file does not end with EOCD. datasize=%zi pos-data=%li datasize-22-comment_length=%zi", + datasize, + pos-data, + datasize-22-comment_length + ); + /* I think this isn't actually an error according to the Zip standard, + but zip files created by us should always pass this test. Note that + Word doesn't like trailing data after the EOCD record, but will repair + the file. */ + assert(0); + } + + pos = data + offset_cd; + int i; + for (i=0; i<num_records_on_disk; ++i) { + outf(" file %i: offset=%i", i, pos - data); + magic = 0x02014b50; + assert(!memcmp(pos, &magic, sizeof(magic))); + uint16_t version_made_by = *(uint16_t*)(pos+4); + uint16_t version_needed = *(uint16_t*)(pos+6); + uint16_t general_bit_flag = *(uint16_t*)(pos+8); + uint16_t compression_method = *(uint16_t*)(pos+10); + uint16_t mtime = *(uint16_t*)(pos+12); + uint16_t mdate = *(uint16_t*)(pos+14); + uint32_t crc = *(uint32_t*)(pos+16); + uint32_t size_compressed = *(uint32_t*)(pos+20); + uint32_t size_uncompressed = *(uint32_t*)(pos+24); + uint16_t filename_length = *(uint16_t*)(pos+28); + uint16_t extrafield_length = *(uint16_t*)(pos+30); + uint16_t filecomment_length = *(uint16_t*)(pos+32); + uint16_t disk_number = *(uint16_t*)(pos+34); + uint16_t internal_attributes = *(uint16_t*)(pos+36); + uint32_t external_attributes = *(uint32_t*)(pos+38); + uint32_t offset = *(uint32_t*)(pos+42); + char* filename = extract_malloc(filename_length + 1); + assert(filename); + memcpy(filename, pos+46, filename_length); + filename[filename_length] = 0; + + char* comment = extract_malloc(filecomment_length + 1); + assert(comment); + memcpy(comment, pos+46+filename_length+extrafield_length, filecomment_length); + comment[filecomment_length] = 0; + assert(strlen(comment) == filecomment_length); + outf(" version_made_by=0x%x", version_made_by); + outf(" version_needed=0x%x", version_needed); + outf(" general_bit_flag=0x%x", general_bit_flag); + outf(" compression_method=%i", compression_method); + outf(" mtime=%i", mtime); + outf(" mdate=%i", mdate); + outf(" crc=%i", crc); + outf(" size_compressed=%i", size_compressed); + outf(" size_uncompressed=%i", size_uncompressed); + outf(" filename_length=%i", filename_length); + outf(" extrafield_length=%i", extrafield_length); + outf(" filecomment_length=%i", filecomment_length); + outf(" disk_number=%i", disk_number); + outf(" internal_attributes=0x%x", internal_attributes); + outf(" external_attributes=0x%x", external_attributes); + outf(" offset=%i", offset); + outf(" filename=%s", filename); + + if (extrafield_length) { + outf( " extra:"); + fprintf(stderr, " "); + char* extra = pos + 46+filename_length; + int j; + for (j=0; j<extrafield_length; ++j) { + unsigned char c = extra[j]; + if (isprint(c) && c != '\\') fputc(c, stderr); + else fprintf(stderr, "\\x%02x", c); + } + fputc('\n', stderr); + } + + /* show local file header. */ + { + char* local_pos = data + offset; + outf(" local header offset=%i", i, local_pos - data); + magic = 0x04034b50; + assert(!memcmp(local_pos, &magic, sizeof(magic))); + + uint16_t version_needed = *(uint16_t*)(local_pos+4); + uint16_t general_bit_flag = *(uint16_t*)(local_pos+6); + uint16_t compression_method = *(uint16_t*)(local_pos+8); + uint16_t mtime = *(uint16_t*)(local_pos+10); + uint16_t mdate = *(uint16_t*)(local_pos+12); + uint32_t crc = *(uint32_t*)(local_pos+14); + uint32_t size_compressed = *(uint32_t*)(local_pos+18); + uint32_t size_uncompressed = *(uint32_t*)(local_pos+22); + uint16_t filename_length = *(uint16_t*)(local_pos+26); + uint16_t extrafield_length = *(uint16_t*)(local_pos+28); + + char* filename = extract_malloc(filename_length + 1); + assert(filename); + memcpy(filename, local_pos+30, filename_length); + filename[filename_length] = 0; + + outf(" version_needed=0x%x", version_needed); + outf(" general_bit_flag=0x%x", general_bit_flag); + outf(" compression_method=%i", compression_method); + outf(" mtime=%i", mtime); + outf(" mdate=%i", mdate); + outf(" crc=%i", crc); + outf(" size_compressed=%i", size_compressed); + outf(" size_uncompressed=%i", size_uncompressed); + outf(" filename_length=%i", filename_length); + outf(" extrafield_length=%i", extrafield_length); + outf(" filecomment_length=%i", filecomment_length); + outf(" disk_number=%i", disk_number); + outf(" internal_attributes=0x%x", internal_attributes); + outf(" external_attributes=0x%x", external_attributes); + outf(" offset=%i", offset); + outf(" filename=%s", filename); + + if (extrafield_length) { + outf( " extra:"); + fprintf(stderr, " "); + char* extra = local_pos + 30 + filename_length; + int j; + for (j=0; j<extrafield_length; ++j) { + unsigned char c = extra[j]; + if (isprint(c) && c != '\\') fputc(c, stderr); + else fprintf(stderr, "\\x%02x", c); + } + fputc('\n', stderr); + } + + } + + outf(" comment=%s", comment); + + pos += 46 + filename_length + extrafield_length + filecomment_length; + } + + outf("finished"); + extract_free(&data); + + return 0; +} + +int main(int argc, char** argv) +{ + outf_level_set(1); + int i; + for (i=1; i<argc; ++i) { + s_show(argv[i]); + } + return 0; +} diff --git a/extract/src/zip.c b/extract/src/zip.c new file mode 100644 index 00000000..013cd578 --- /dev/null +++ b/extract/src/zip.c @@ -0,0 +1,307 @@ +#include "../include/extract_alloc.h" + +#include "mem.h" +#include "memento.h" +#include "outf.h" +#include "zip.h" + +#include <zlib.h> +/* For crc32(). */ + +#include <assert.h> +#include <errno.h> +#include <limits.h> + +#ifdef _MSC_VER + #include "compat_stdint.h" +#else + #include <stdint.h> +#endif + + +typedef struct +{ + int16_t mtime; + int16_t mdate; + int32_t crc_sum; + int32_t size_compressed; + int32_t size_uncompressed; + char* name; + uint32_t offset; + uint16_t attr_internal; + uint32_t attr_external; + +} extract_zip_cd_file_t; + +struct extract_zip_t +{ + extract_buffer_t* buffer; + extract_zip_cd_file_t* cd_files; + int cd_files_num; + + /* errno_ is set to non-zero if any operation fails; avoids need to check + after every small output operation. */ + int errno_; + int eof; + + /* Defaults for various values in zip file headers etc. */ + uint16_t mtime; + uint16_t mdate; + uint16_t version_creator; + uint16_t version_extract; + uint16_t general_purpose_bit_flag; + uint16_t file_attr_internal; + uint32_t file_attr_external; + char* archive_comment; +}; + +int extract_zip_open(extract_buffer_t* buffer, extract_zip_t** o_zip) +{ + int e = -1; + extract_zip_t* zip; + extract_alloc_t* alloc = extract_buffer_alloc(buffer); + + if (extract_malloc(alloc, &zip, sizeof(*zip))) goto end; + + zip->cd_files = NULL; + zip->cd_files_num = 0; + zip->buffer = buffer; + zip->errno_ = 0; + zip->eof = 0; + + /* We could maybe convert current date/time to the ms-dos format required + here, but using zeros doesn't seem to make a difference to Word etc. */ + zip->mtime = 0; + zip->mdate = 0; + + /* These are all copied from command-line zip on unix. */ + zip->version_creator = (0x3 << 8) + 30; /* 0x3 is unix, 30 means 3.0. */ + zip->version_extract = 10; /* 10 means 1.0. */ + zip->general_purpose_bit_flag = 0; + zip->file_attr_internal = 0; + + /* We follow command-line zip which uses 0x81a40000 which is octal + 0100644:0. (0100644 is S_IFREG (regular file) plus rw-r-r. See stat(2) for + details.) */ + zip->file_attr_external = (0100644 << 16) + 0; + if (extract_strdup(alloc, "Artifex", &zip->archive_comment)) goto end; + + e = 0; + + end: + if (e) { + if (zip) extract_free(alloc, &zip->archive_comment); + extract_free(alloc, &zip); + *o_zip = NULL; + } + else { + *o_zip = zip; + } + return e; +} + +static int s_native_little_endinesss(void) +{ + static const char a[] = { 1, 2}; + uint16_t b = *(uint16_t*) a; + if (b == 1 + 2*256) { + /* Native little-endiness. */ + return 1; + } + else if (b == 2 + 1*256) { + /* Native big-endiness. */ + return 0; + } + abort(); +} + +static int s_write(extract_zip_t* zip, const void* data, size_t data_length) +{ + size_t actual; + int e; + if (zip->errno_) return -1; + if (zip->eof) return +1; + e = extract_buffer_write(zip->buffer, data, data_length, &actual); + if (e == -1) zip->errno_ = errno; + if (e == +1) zip->eof = 1; + return e; +} + +static int s_write_uint32(extract_zip_t* zip, uint32_t value) +{ + if (s_native_little_endinesss()) { + return s_write(zip, &value, sizeof(value)); + } + else { + unsigned char value2[4] = { + (unsigned char) (value >> 0), + (unsigned char) (value >> 8), + (unsigned char) (value >> 16), + (unsigned char) (value >> 24) + }; + return s_write(zip, &value2, sizeof(value2)); + } +} + +static int s_write_uint16(extract_zip_t* zip, uint16_t value) +{ + if (s_native_little_endinesss()) { + return s_write(zip, &value, sizeof(value)); + } + else { + unsigned char value2[2] = { + (unsigned char) (value >> 0), + (unsigned char) (value >> 8) + }; + return s_write(zip, &value2, sizeof(value2)); + } +} + +static int s_write_string(extract_zip_t* zip, const char* text) +{ + return s_write(zip, text, strlen(text)); +} + + +int extract_zip_write_file( + extract_zip_t* zip, + const void* data, + size_t data_length, + const char* name + ) +{ + int e = -1; + extract_zip_cd_file_t* cd_file = NULL; + extract_alloc_t* alloc = extract_buffer_alloc(zip->buffer); + + if (data_length > INT_MAX) { + assert(0); + errno = EINVAL; + return -1; + } + /* Create central directory file header for later. */ + if (extract_realloc2( + alloc, + &zip->cd_files, + sizeof(extract_zip_cd_file_t) * zip->cd_files_num, + sizeof(extract_zip_cd_file_t) * (zip->cd_files_num+1) + )) goto end; + cd_file = &zip->cd_files[zip->cd_files_num]; + cd_file->name = NULL; + + cd_file->mtime = zip->mtime; + cd_file->mdate = zip->mtime; + cd_file->crc_sum = (int32_t) crc32(crc32(0, NULL, 0), data, (int) data_length); + cd_file->size_compressed = (int) data_length; + cd_file->size_uncompressed = (int) data_length; + if (extract_strdup(alloc, name, &cd_file->name)) goto end; + cd_file->offset = (int) extract_buffer_pos(zip->buffer); + cd_file->attr_internal = zip->file_attr_internal; + cd_file->attr_external = zip->file_attr_external; + if (!cd_file->name) goto end; + + /* Write local file header. */ + { + const char extra_local[] = ""; /* Modify for testing. */ + s_write_uint32(zip, 0x04034b50); + s_write_uint16(zip, zip->version_extract); /* Version needed to extract (minimum). */ + s_write_uint16(zip, zip->general_purpose_bit_flag); /* General purpose bit flag */ + s_write_uint16(zip, 0); /* Compression method */ + s_write_uint16(zip, cd_file->mtime); /* File last modification time */ + s_write_uint16(zip, cd_file->mdate); /* File last modification date */ + s_write_uint32(zip, cd_file->crc_sum); /* CRC-32 of uncompressed data */ + s_write_uint32(zip, cd_file->size_compressed); /* Compressed size */ + s_write_uint32(zip, cd_file->size_uncompressed); /* Uncompressed size */ + s_write_uint16(zip, (uint16_t) strlen(name)); /* File name length (n) */ + s_write_uint16(zip, sizeof(extra_local)-1); /* Extra field length (m) */ + s_write_string(zip, cd_file->name); /* File name */ + s_write(zip, extra_local, sizeof(extra_local)-1); /* Extra field */ + } + /* Write the (uncompressed) data. */ + s_write(zip, data, data_length); + + if (zip->errno_) e = -1; + else if (zip->eof) e = +1; + else e = 0; + + + end: + + if (e) { + /* Leave zip->cd_files_num unchanged, so calling extract_zip_close() + will write out any earlier files. Free cd_file->name to avoid leak. */ + if (cd_file) extract_free(alloc, &cd_file->name); + } + else { + /* cd_files[zip->cd_files_num] is valid. */ + zip->cd_files_num += 1; + } + + return e; +} + +int extract_zip_close(extract_zip_t** pzip) +{ + int e = -1; + size_t pos; + size_t len; + int i; + extract_zip_t* zip = *pzip; + extract_alloc_t* alloc; + if (!zip) { + return 0; + } + alloc = extract_buffer_alloc(zip->buffer); + pos = extract_buffer_pos(zip->buffer); + len = 0; + + /* Write Central directory file headers, freeing data as we go. */ + for (i=0; i<zip->cd_files_num; ++i) { + const char extra[] = ""; + size_t pos2 = extract_buffer_pos(zip->buffer); + extract_zip_cd_file_t* cd_file = &zip->cd_files[i]; + s_write_uint32(zip, 0x02014b50); + s_write_uint16(zip, zip->version_creator); /* Version made by, copied from command-line zip. */ + s_write_uint16(zip, zip->version_extract); /* Version needed to extract (minimum). */ + s_write_uint16(zip, zip->general_purpose_bit_flag); /* General purpose bit flag */ + s_write_uint16(zip, 0); /* Compression method */ + s_write_uint16(zip, cd_file->mtime); /* File last modification time */ + s_write_uint16(zip, cd_file->mdate); /* File last modification date */ + s_write_uint32(zip, cd_file->crc_sum); /* CRC-32 of uncompressed data */ + s_write_uint32(zip, cd_file->size_compressed); /* Compressed size */ + s_write_uint32(zip, cd_file->size_uncompressed); /* Uncompressed size */ + s_write_uint16(zip, (uint16_t) strlen(cd_file->name)); /* File name length (n) */ + s_write_uint16(zip, sizeof(extra)-1); /* Extra field length (m) */ + s_write_uint16(zip, 0); /* File comment length (k) */ + s_write_uint16(zip, 0); /* Disk number where file starts */ + s_write_uint16(zip, cd_file->attr_internal); /* Internal file attributes */ + s_write_uint32(zip, cd_file->attr_external); /* External file attributes. */ + s_write_uint32(zip, cd_file->offset); /* Offset of local file header. */ + s_write_string(zip, cd_file->name); /* File name */ + s_write(zip, extra, sizeof(extra)-1); /* Extra field */ + len += extract_buffer_pos(zip->buffer) - pos2; + extract_free(alloc, &cd_file->name); + } + extract_free(alloc, &zip->cd_files); + + /* Write End of central directory record. */ + s_write_uint32(zip, 0x06054b50); + s_write_uint16(zip, 0); /* Number of this disk */ + s_write_uint16(zip, 0); /* Disk where central directory starts */ + s_write_uint16(zip, (uint16_t) zip->cd_files_num); /* Number of central directory records on this disk */ + s_write_uint16(zip, (uint16_t) zip->cd_files_num); /* Total number of central directory records */ + s_write_uint32(zip, (int) len); /* Size of central directory (bytes) */ + s_write_uint32(zip, (int) pos); /* Offset of start of central directory, relative to start of archive */ + + s_write_uint16(zip, (uint16_t) strlen(zip->archive_comment)); /* Comment length (n) */ + s_write_string(zip, zip->archive_comment); + extract_free(alloc, &zip->archive_comment); + + if (zip->errno_) e = -1; + else if (zip->eof) e = +1; + else e = 0; + + extract_free(alloc, pzip); + + return e; +} diff --git a/extract/src/zip.h b/extract/src/zip.h new file mode 100644 index 00000000..570f475a --- /dev/null +++ b/extract/src/zip.h @@ -0,0 +1,64 @@ +#ifndef ARTIFEX_EXTRACT_ZIP +#define ARTIFEX_EXTRACT_ZIP + +/* Only for internal use by extract code. */ + +#include "../include/extract_buffer.h" + +#include <stddef.h> + + +/* Support for creating zip file content. + +Content is uncompressed. + +Unless otherwise stated, all functions return 0 on success or -1 with errno +set. +*/ + +typedef struct extract_zip_t extract_zip_t; +/* Abstract handle for zipfile state. */ + + +int extract_zip_open(extract_buffer_t* buffer, extract_zip_t** o_zip); +/* Creates an extract_zip_t that writes to specified buffer. + +buffer: + Destination for zip file content. +o_zip: + Out-param. +*/ + +int extract_zip_write_file( + extract_zip_t* zip, + const void* data, + size_t data_length, + const char* name + ); +/* Writes specified data into the zip file. + +Returns same as extract_buffer_write(): 0 on success, +1 if short write due to +EOF or -1 with errno set. + +zip: + From extract_zip_open(). +data: + File contents. +data_length: + Length in bytes of file contents. +name: + Name of file within the zip file. +*/ + + +int extract_zip_close(extract_zip_t** pzip); +/* Finishes writing the zip file (e.g. appends Central directory file headers +and End of central directory record). + +Does not call extract_buffer_close(). + +zip: + From extract_zip_open(). +*/ + +#endif |