diff options
author | Thomas Deutschmann <whissi@gentoo.org> | 2021-03-30 10:59:39 +0200 |
---|---|---|
committer | Thomas Deutschmann <whissi@gentoo.org> | 2021-04-01 00:04:14 +0200 |
commit | 5ff1d6955496b3cf9a35042c9ac35db43bc336b1 (patch) | |
tree | 6d470f7eb448f59f53e8df1010aec9dad8ce1f72 /extract/include | |
parent | Import Ghostscript 9.53.1 (diff) | |
download | ghostscript-gpl-patches-5ff1d6955496b3cf9a35042c9ac35db43bc336b1.tar.gz ghostscript-gpl-patches-5ff1d6955496b3cf9a35042c9ac35db43bc336b1.tar.bz2 ghostscript-gpl-patches-5ff1d6955496b3cf9a35042c9ac35db43bc336b1.zip |
Import Ghostscript 9.54ghostscript-9.54
Signed-off-by: Thomas Deutschmann <whissi@gentoo.org>
Diffstat (limited to 'extract/include')
-rw-r--r-- | extract/include/extract.h | 212 | ||||
-rw-r--r-- | extract/include/extract_alloc.h | 76 | ||||
-rw-r--r-- | extract/include/extract_buffer.h | 278 | ||||
-rw-r--r-- | extract/include/extract_buffer_impl.h | 72 | ||||
-rw-r--r-- | extract/include/extract_compat_inline.h | 15 |
5 files changed, 653 insertions, 0 deletions
diff --git a/extract/include/extract.h b/extract/include/extract.h new file mode 100644 index 00000000..97f43cd6 --- /dev/null +++ b/extract/include/extract.h @@ -0,0 +1,212 @@ +#ifndef ARITFEX_EXTRACT_H +#define ARITFEX_EXTRACT_H + +#include "extract_alloc.h" +#include "extract_buffer.h" + + +/* Functions for creating docx archives. + +We can accept images and paragraphs of text from intermediate format data, for +example created by these commands: + + mutool draw -F xmltext ... + gs -sDEVICE=txtwrite -dTextFormat=4 ... + +Unless otherwise stated, all functions return 0 on success or -1 with errno +set. +*/ + + +typedef struct extract_t extract_t; +/* State for processing a document. */ + + +int extract_begin( + extract_alloc_t* alloc, + extract_t** pextract + ); +/* Creates a new extract_t* for use by other extract_*() functions. All +allocation will be done with <alloc> (which can be NULL in which case we use +malloc/free, or from extract_alloc_create()). */ + + +int extract_read_intermediate( + extract_t* extract, + extract_buffer_t* buffer, + int autosplit + ); +/* Reads XML specification of spans and images from <buffer> and adds to +<extract>. + +(Makes internal calls to extract_span_begin(), extract_add_image() etc.) */ + + +int extract_page_begin(extract_t* extract); +/* Must be called before extract_span_begin(). */ + + +int extract_span_begin( + extract_t* extract, + const char* font_name, + int font_bold, + int font_italic, + int wmode, + double ctm_a, + double ctm_b, + double ctm_c, + double ctm_d, + double ctm_e, + double ctm_f, + double trm_a, + double trm_b, + double trm_c, + double trm_d, + double trm_e, + double trm_f + ); +/* Starts a new span. +extract + As passed to earlier call to extract_begin(). +font_name + . +font_bold + 0 or 1. +font_italic + 0 or 1. +wmode + 0 or 1. +ctm_* + Matrix values. +trm_* + Matrix values. +*/ + + +int extract_add_char( + extract_t* extract, + double x, + double y, + unsigned ucs, + double adv, + int autosplit + ); +/* Appends specified character to current span. +extract + As passed to earlier call to extract_begin(). +autosplit +x +y + Position on page. +ucs + Unicode value. +adv + Advance of this character. +autosplit + If non-zero, we do additional splitting to stress the join algorithm. +*/ + + +int extract_span_end(extract_t* extract); +/* Must be called before starting a new span or ending current page. */ + + +typedef void (*extract_image_data_free)(void* handle, void* image_data); +/* Callback for freeing image data. See extract_add_image(). */ + + +int extract_add_image( + extract_t* extract, + const char* type, + double x, + double y, + double w, + double h, + char* data, + size_t data_size, + extract_image_data_free data_free, + void* data_free_handle + ); +/* Adds an image to the current page. + +type + E.g. 'png'. Is copied so need to persist after we return. +x y w h + Location and size of image. +data data_size + The raw image data. +data_free + If not NULL, extract code will call data_free(data) when it has finished + with <data>. Otherwise the lifetime of <data> is the responsibility of the + caller and it must persist for at least the lifetime of <extract>. +*/ + + +int extract_page_end(extract_t* extract); +/* Must be called to finish page started by extract_page_begin(). */ + + +int extract_process( + extract_t* extract, + int spacing, + int rotation, + int images + ); +/* Evaluates all un-processed pages to generate docx data and frees internal +page data (individual spans, lines, paragraphs etc). E.g. call this after +extract_page_end() to reduce internal data use. */ + + +int extract_write(extract_t* extract, extract_buffer_t* buffer); +/* Writes docx archive to buffer. The docx archive will contain text and images +from extract_process(). + +Uses an internal template docx archive. */ + + +int extract_write_content(extract_t* extract, extract_buffer_t* buffer); +/* Writes docx xml for paragraphs into buffer. + +(This is the xml containing paragraphs of text that is inserted into +the template word/document.xml object within the docx zip archive by +extract_write()). */ + + +int extract_write_template( + extract_t* extract, + const char* path_template, + const char* path_out, + int preserve_dir + ); +/* Like extract_write() but uses a provided template document. + +Uses the 'zip' and 'unzip' commands internally. + +extract: + . +path_template: + Name of docx file to use as a template. +path_out: + Name of docx file to create. Must not contain single-quote, double quote, + space or ".." sequence - these will force EINVAL error because they could + make internal shell commands unsafe. +preserve_dir: + If true, we don't delete the temporary directory <path_out>.dir containing +*/ + + +void extract_end( extract_t** pextract); +/* Frees all data associated with *pextract and sets *pextract to NULL. */ + + +void extract_internal_end(void); +/* Cleans up internal singelton state that can look like a memory leak when +running under Memento or valgrind. */ + +void extract_exp_min(extract_t* extract, size_t size); +/* If size is non-zero, sets minimum actual allocation size, and we only +allocate in powers of two times this size. This is an attempt to improve speed +with memento squeeze. Default is 0 (every call to extract_realloc() calls +realloc(). */ + +#endif diff --git a/extract/include/extract_alloc.h b/extract/include/extract_alloc.h new file mode 100644 index 00000000..28a52a3c --- /dev/null +++ b/extract/include/extract_alloc.h @@ -0,0 +1,76 @@ +#ifndef EXTRACT_ALLOC_H +#define EXTRACT_ALLOC_H + +/* Allocation support. */ + +#include <stdlib.h> + +typedef void* (*extract_realloc_fn_t)(void* state, void* prev, size_t size); +/* An allocation function to be provided by user of the extract library. + +Should behave like realloc(), except for taking the additional 'void* state' +arg. */ + +typedef struct extract_alloc_t extract_alloc_t; +/* Abstract allocator, created by extract_alloc_create(). */ + +int extract_alloc_create(extract_realloc_fn_t realloc_fn, void* realloc_state, extract_alloc_t** palloc); +/* Creates a new extract_alloc_t* for use with extract_malloc() etc. */ + +void extract_alloc_destroy(extract_alloc_t** palloc); +/* Destroys an extract_alloc_t* that was created by extract_alloc_create(). + +Returns with *palloc set to NULL. Does nothing if *palloc is already NULL. */ + +int extract_malloc(extract_alloc_t* alloc, void** pptr, size_t size); +/* Sets *pptr to point to new allocated memory and returns 0. On error return +-1 with errno set and *pptr=NULL. + +Uses malloc() if <alloc> is NULL, otherwise <alloc> must have been created by +extract_alloc_create() and we use the extract_realloc_fn_t that was originally +passed to extract_alloc_create(). */ + +int extract_realloc(extract_alloc_t* alloc, void** pptr, size_t newsize); +/* Sets *pptr to point to reallocated memory and returns 0. On error return -1 +with errno set and *pptr=NULL. + +Uses realloc() if <alloc> is NULL, otherwise <alloc> must have been created by +extract_alloc_create() and we use the extract_realloc_fn_t that was originally +passed to extract_alloc_create(). */ + +void extract_free(extract_alloc_t* alloc, void** pptr); +/* Frees block pointed to by *pptr and sets *pptr to NULL. + +Uses free() if <alloc> is NULL, otherwise <alloc> must have been created by +extract_alloc_create() and we use the extract_realloc_fn_t that was originally +passed to extract_alloc_create(). */ + +#define extract_malloc(alloc, pptr, size) (extract_malloc)(alloc, (void**) pptr, size) +#define extract_realloc(alloc, pptr, newsize) (extract_realloc)(alloc, (void**) pptr, newsize) +#define extract_free(alloc, pptr) (extract_free)(alloc, (void**) pptr) +/* These allow callers to use any pointer type, not just void*. */ + +typedef struct +{ + int num_malloc; + int num_realloc; + int num_free; + int num_libc_realloc; +} extract_alloc_stats_t; + +extract_alloc_stats_t* extract_alloc_stats(extract_alloc_t* alloc); +/* Retrieve statistics. */ + +int extract_realloc2(extract_alloc_t* alloc, void** pptr, size_t oldsize, size_t newsize); +/* A realloc variant that takes the existing buffer size. + +If <oldsize> is not zero and *pptr is not NULL, <oldsize> must be the size of +the existing buffer and may used internally to over-allocate in order to avoid +too many calls to realloc(). See extract_alloc_exp_min() for more information. +*/ + +#define extract_realloc2(alloc, pptr, oldsize, newsize) (extract_realloc2)(alloc, (void**) pptr, oldsize, newsize) + +void extract_alloc_exp_min(extract_alloc_t* alloc, size_t size); + +#endif diff --git a/extract/include/extract_buffer.h b/extract/include/extract_buffer.h new file mode 100644 index 00000000..b0abbec3 --- /dev/null +++ b/extract/include/extract_buffer.h @@ -0,0 +1,278 @@ +#ifndef ARTIFEX_EXTRACT_BUFFER_H +#define ARTIFEX_EXTRACT_BUFFER_H + +#include "extract_alloc.h" + +#include <stddef.h> + +/* Work around MSVS issues with our use of 'inline'. */ +#ifdef _MSC_VER + #include "extract_compat_inline.h" +#endif + +/* Reading and writing abstractions. + +We use inline code in the common case where reading or writing can be satisfied +using a cache. +*/ + + +typedef struct extract_buffer_t extract_buffer_t; +/* Abstract state for a buffer. */ + + +static inline int extract_buffer_read( + extract_buffer_t* buffer, + void* data, + size_t numbytes, + size_t* o_actual + ); +/* Reads specified number of bytes from buffer into data..+bytes, making multiple calls to +the underlying extract_buffer_fn_read function until we have read <numbytes> or reached +EOF. If we reach EOF, . Returns +1 if +short read due to EOF. + +buffer: + As returned by earlier call to extract_buffer_open(). +data: + Location of transferred data. +bytes: + Number of bytes transferred. +o_actual: + Optional out-param, set to actual number of bytes read. If we return zero + this will always be <numbytes>; otherwise will be less than <numbytes>. + +For speed reasons, this is implemented in extract_buffer_impl.h and uses only +inline code if the requested data can be read from the cache. +*/ + + +static inline int extract_buffer_write( + extract_buffer_t* buffer, + const void* data, + size_t numbytes, + size_t* o_actual + ); +/* Writes specified data into buffer. Returns +1 if short write due to EOF. + +buffer: + As returned by earlier call to extract_buffer_open(). +data: + Location of source data. +bytes: + Number of bytes to copy. +out_actual: + Optional out-param, set to actual number of bytes written. If we return + zero this will always be <numbytes>; otherwise will be less than <numbytes> + and can even be negative if internal cache-flush using fn_write() fails or + returns EOF. + +For speed reasons, this is implemented in extract_buffer_impl.h and uses only +inline code if there is space in the cache for the data. +*/ + + +size_t extract_buffer_pos(extract_buffer_t* buffer); +/* Returns number of bytes read or number of bytes written so far. */ + + +int extract_buffer_close(extract_buffer_t** io_buffer); +/* Closes down an extract_buffer_t and frees all internal resources. + +Can return error or +1 for EOF if write buffer and fn_write() fails when +flushing cache. + +Always sets *io_buffer to NULL. Does nothing if *io_buffer is already NULL. +*/ + + +typedef int (*extract_buffer_fn_read)(void* handle, void* destination, size_t numbytes, size_t* o_actual); +/* Callback used by read buffer. Should read data from buffer into the supplied +destination. Short reads are not an error. + +E.g. used to fill cache or to handle large reads. + +Should returns 0 on success (including short read or EOF) or -1 with errno set. + +handle: + As passed to extract_buffer_open(). +destination: + Start of destination. +bytes: + Number of bytes in destination. +o_actual: + Out-param, set to zero if EOF. Otherwise set to the number of bytes + transferred in the range 1..<numbytes> inclusive. +*/ + +typedef int (*extract_buffer_fn_write)(void* handle, const void* source, size_t numbytes, size_t* o_actual); +/* Callback used by write buffer. Should write data from the supplied source +into the buffer; short writes are not an error. + +E.g. used to flush cache or to handle large writes. + +Should return 0 on success (including short write or EOF) or -1 with errno set. + +handle: + As passed to extract_buffer_open(). +source: + Start of source. +bytes: + Number of bytes in source. +o_actual: + Out-param, set to zero if EOF. Otherwise set to the number of bytes + transferred in the range 1..<numbytes> inclusive. +*/ + +typedef int (*extract_buffer_fn_cache)(void* handle, void** o_cache, size_t* o_numbytes); +/* Callback to flush/populate cache. + +If the buffer is for writing: + Should return a memory region to which data can be written. Any data + written to a previous cache will have already been passed to fn_write() so + this can overlap or be the same as any previously-returned cache. + +If the buffer is for reading: + Should return a memory region containing more data to be read. All data in + any previously-returned cache has been read so this can overlap or be the + same as any previous cache. + +handle: + As passed to extract_buffer_open(). +o_data: + Out-param, set to point to new cache. +o_numbytes: + Out-param, set to size of new cache. + +If no data is available due to EOF, should return with *o_numbytes set to zero. +*/ + +typedef void (*extract_buffer_fn_close)(void* handle); +/* Called by extract_buffer_close(). + +handle: + As passed to extract_buffer_open(). +*/ + +extract_alloc_t* extract_buffer_alloc(extract_buffer_t* buffer); +/* Returns the extract_alloc_t* originally passed to extract_buffer_open*(). */ + + +int extract_buffer_open( + extract_alloc_t* alloc, + void* handle, + extract_buffer_fn_read fn_read, + extract_buffer_fn_write fn_write, + extract_buffer_fn_cache fn_cache, + extract_buffer_fn_close fn_close, + extract_buffer_t** o_buffer + ); +/* Creates an extract_buffer_t that uses specified callbacks. + +If fn_read is non-NULL the buffer is a read buffer, else if fn_write is +non-NULL the buffer is a write buffer. Passing non-NULL for both or neither is +not supported. + +alloc: + NULL or from extract_alloc_create(). Is only used to allocate the + extract_buffer_t returned in *o_buffer. +handle: + Passed to fn_read, fn_write, fn_cache and fn_close callbacks. +fn_read: + Callback for reading data. +fn_write: + Callback for writing data. +fn_cache: + Optional cache callback. +fn_close: + Optional close callback. +o_buffer: + Out-param. Set to NULL on error. +*/ + + +int extract_buffer_open_simple( + extract_alloc_t* alloc, + const void* data, + size_t numbytes, + void* handle, + extract_buffer_fn_close fn_close, + extract_buffer_t** o_buffer + ); +/* Creates an extract_buffer_t that reads from or writes to a single fixed +block of memory. + +The address region data..+data_length must exist for the lifetime of the +returned extract_buffer_t. + +alloc: + NULL or from extract_alloc_create(). Is only used to allocate the + extract_buffer_t returned in *o_buffer. +data: + Start of memory region. Note that if the extract_buffer_t is used as a + write buffer then data[] will be written-to, despite the 'const'. [This + use of const avoids the need for the caller to use a cast when creating a + read-buffer.] +bytes: + Size of memory region. +handle: + Passed to fn_close. +fn_close: + Optional callback called by extract_buffer_close(). E.g. could copy the + memory region elsewhere if the buffer was used as a write buffer. +o_buffer: + Out-param. +*/ + + +int extract_buffer_open_file( + extract_alloc_t* alloc, + const char* path, + int writable, + extract_buffer_t** o_buffer + ); +/* Creates a buffer that reads from, or writes to, a file. For portability +uses an internal FILE* rather than an integer file descriptor, so doesn't use +extract_buffer's caching support because FILE* already provides caching. + +path: + Path of file to read from. +writable: + We create read buffer if zero, else a write buffer. +o_buffer: + Out-param. Set to NULL on error. +*/ + + +typedef struct +{ + extract_buffer_t* buffer; + char* data; + size_t alloc_size; + size_t data_size; +} extract_buffer_expanding_t; +/* A write buffer that writes to an automatically-growing contiguous area of +memory. */ + +int extract_buffer_expanding_create( + extract_alloc_t* alloc, + extract_buffer_expanding_t* buffer_expanding + ); +/* Creates a writable buffer that writes into an automatically-growing +contiguous area of memory. + +alloc: + NULL or from extract_alloc_create(). +buffer_expanding: + Out-param; *buffer_expanding is initialised. + +Initialises buffer_expanding. buffer_expanding->buffer can be passed to +extract_buffer_*() functions. After buffer_close(), the written data is +available in buffer_expanding->data..+data_size, which will have been allocated +using <alloc>. */ + + +/* Include implementations of inline-functions. */ +#include "extract_buffer_impl.h" + +#endif diff --git a/extract/include/extract_buffer_impl.h b/extract/include/extract_buffer_impl.h new file mode 100644 index 00000000..25f533dd --- /dev/null +++ b/extract/include/extract_buffer_impl.h @@ -0,0 +1,72 @@ +/* Implementation of inline functions. + +We expose some implementation details to allow extract_buffer_read() and +extract_buffer_write() to be inline; specifically we allow the compiler to +optimise the common case where reading/writing uses the cache only. */ + + +#include <string.h> + +typedef struct +{ + void* cache; + size_t numbytes; + size_t pos; +} extract_buffer_cache_t; + + +int extract_buffer_read_internal( + extract_buffer_t* buffer, + void* data, + size_t numbytes, + size_t* o_actual + ); +/* Internal use only. */ + +static inline int extract_buffer_read( + extract_buffer_t* buffer, + void* data, + size_t numbytes, + size_t* o_actual + ) +{ + extract_buffer_cache_t* cache = (void*) buffer; + if (cache->numbytes - cache->pos < numbytes) { + /* Can't use just the cache. */ + return extract_buffer_read_internal(buffer, data, numbytes, o_actual); + } + /* We can use just the cache. */ + memcpy(data, (char*) cache->cache + cache->pos, numbytes); + cache->pos += numbytes; + if (o_actual) *o_actual = numbytes; + return 0; +} + + +int extract_buffer_write_internal( + extract_buffer_t* buffer, + const void* data, + size_t numbytes, + size_t* o_actual + ); +/* Internal use only. */ + +static inline int extract_buffer_write( + extract_buffer_t* buffer, + const void* data, + size_t numbytes, + size_t* o_actual + ) +{ + extract_buffer_cache_t* cache = (void*) buffer; + if (cache->numbytes - cache->pos < numbytes) { + /* Can't use just the cache. */ + return extract_buffer_write_internal(buffer, data, numbytes, o_actual); + } + /* We can use just the cache. */ + memcpy((char*) cache->cache + cache->pos, data, numbytes); + cache->pos += numbytes; + if (o_actual) *o_actual = numbytes; + return 0; +} + diff --git a/extract/include/extract_compat_inline.h b/extract/include/extract_compat_inline.h new file mode 100644 index 00000000..bb82ead2 --- /dev/null +++ b/extract/include/extract_compat_inline.h @@ -0,0 +1,15 @@ +#ifndef ARTIFEX_EXTRACT_COMPAT_INLINE +#define ARTIFEX_EXTRACT_COMPAT_INLINE + +#if !defined __cplusplus && defined(_MSC_VER) + #if (_MSC_VER < 1500) + /* inline and inline__ not available so remove all mention of + inline. This may result in warnings about unused static functions. */ + #define inline + #else + /* __inline is always available. */ + #define inline __inline + #endif +#endif + +#endif |