summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorThomas Deutschmann <whissi@gentoo.org>2021-03-30 10:59:39 +0200
committerThomas Deutschmann <whissi@gentoo.org>2021-04-01 00:04:14 +0200
commit5ff1d6955496b3cf9a35042c9ac35db43bc336b1 (patch)
tree6d470f7eb448f59f53e8df1010aec9dad8ce1f72 /extract/include
parentImport Ghostscript 9.53.1 (diff)
downloadghostscript-gpl-patches-5ff1d6955496b3cf9a35042c9ac35db43bc336b1.tar.gz
ghostscript-gpl-patches-5ff1d6955496b3cf9a35042c9ac35db43bc336b1.tar.bz2
ghostscript-gpl-patches-5ff1d6955496b3cf9a35042c9ac35db43bc336b1.zip
Import Ghostscript 9.54ghostscript-9.54
Signed-off-by: Thomas Deutschmann <whissi@gentoo.org>
Diffstat (limited to 'extract/include')
-rw-r--r--extract/include/extract.h212
-rw-r--r--extract/include/extract_alloc.h76
-rw-r--r--extract/include/extract_buffer.h278
-rw-r--r--extract/include/extract_buffer_impl.h72
-rw-r--r--extract/include/extract_compat_inline.h15
5 files changed, 653 insertions, 0 deletions
diff --git a/extract/include/extract.h b/extract/include/extract.h
new file mode 100644
index 00000000..97f43cd6
--- /dev/null
+++ b/extract/include/extract.h
@@ -0,0 +1,212 @@
+#ifndef ARITFEX_EXTRACT_H
+#define ARITFEX_EXTRACT_H
+
+#include "extract_alloc.h"
+#include "extract_buffer.h"
+
+
+/* Functions for creating docx archives.
+
+We can accept images and paragraphs of text from intermediate format data, for
+example created by these commands:
+
+ mutool draw -F xmltext ...
+ gs -sDEVICE=txtwrite -dTextFormat=4 ...
+
+Unless otherwise stated, all functions return 0 on success or -1 with errno
+set.
+*/
+
+
+typedef struct extract_t extract_t;
+/* State for processing a document. */
+
+
+int extract_begin(
+ extract_alloc_t* alloc,
+ extract_t** pextract
+ );
+/* Creates a new extract_t* for use by other extract_*() functions. All
+allocation will be done with <alloc> (which can be NULL in which case we use
+malloc/free, or from extract_alloc_create()). */
+
+
+int extract_read_intermediate(
+ extract_t* extract,
+ extract_buffer_t* buffer,
+ int autosplit
+ );
+/* Reads XML specification of spans and images from <buffer> and adds to
+<extract>.
+
+(Makes internal calls to extract_span_begin(), extract_add_image() etc.) */
+
+
+int extract_page_begin(extract_t* extract);
+/* Must be called before extract_span_begin(). */
+
+
+int extract_span_begin(
+ extract_t* extract,
+ const char* font_name,
+ int font_bold,
+ int font_italic,
+ int wmode,
+ double ctm_a,
+ double ctm_b,
+ double ctm_c,
+ double ctm_d,
+ double ctm_e,
+ double ctm_f,
+ double trm_a,
+ double trm_b,
+ double trm_c,
+ double trm_d,
+ double trm_e,
+ double trm_f
+ );
+/* Starts a new span.
+extract
+ As passed to earlier call to extract_begin().
+font_name
+ .
+font_bold
+ 0 or 1.
+font_italic
+ 0 or 1.
+wmode
+ 0 or 1.
+ctm_*
+ Matrix values.
+trm_*
+ Matrix values.
+*/
+
+
+int extract_add_char(
+ extract_t* extract,
+ double x,
+ double y,
+ unsigned ucs,
+ double adv,
+ int autosplit
+ );
+/* Appends specified character to current span.
+extract
+ As passed to earlier call to extract_begin().
+autosplit
+x
+y
+ Position on page.
+ucs
+ Unicode value.
+adv
+ Advance of this character.
+autosplit
+ If non-zero, we do additional splitting to stress the join algorithm.
+*/
+
+
+int extract_span_end(extract_t* extract);
+/* Must be called before starting a new span or ending current page. */
+
+
+typedef void (*extract_image_data_free)(void* handle, void* image_data);
+/* Callback for freeing image data. See extract_add_image(). */
+
+
+int extract_add_image(
+ extract_t* extract,
+ const char* type,
+ double x,
+ double y,
+ double w,
+ double h,
+ char* data,
+ size_t data_size,
+ extract_image_data_free data_free,
+ void* data_free_handle
+ );
+/* Adds an image to the current page.
+
+type
+ E.g. 'png'. Is copied so need to persist after we return.
+x y w h
+ Location and size of image.
+data data_size
+ The raw image data.
+data_free
+ If not NULL, extract code will call data_free(data) when it has finished
+ with <data>. Otherwise the lifetime of <data> is the responsibility of the
+ caller and it must persist for at least the lifetime of <extract>.
+*/
+
+
+int extract_page_end(extract_t* extract);
+/* Must be called to finish page started by extract_page_begin(). */
+
+
+int extract_process(
+ extract_t* extract,
+ int spacing,
+ int rotation,
+ int images
+ );
+/* Evaluates all un-processed pages to generate docx data and frees internal
+page data (individual spans, lines, paragraphs etc). E.g. call this after
+extract_page_end() to reduce internal data use. */
+
+
+int extract_write(extract_t* extract, extract_buffer_t* buffer);
+/* Writes docx archive to buffer. The docx archive will contain text and images
+from extract_process().
+
+Uses an internal template docx archive. */
+
+
+int extract_write_content(extract_t* extract, extract_buffer_t* buffer);
+/* Writes docx xml for paragraphs into buffer.
+
+(This is the xml containing paragraphs of text that is inserted into
+the template word/document.xml object within the docx zip archive by
+extract_write()). */
+
+
+int extract_write_template(
+ extract_t* extract,
+ const char* path_template,
+ const char* path_out,
+ int preserve_dir
+ );
+/* Like extract_write() but uses a provided template document.
+
+Uses the 'zip' and 'unzip' commands internally.
+
+extract:
+ .
+path_template:
+ Name of docx file to use as a template.
+path_out:
+ Name of docx file to create. Must not contain single-quote, double quote,
+ space or ".." sequence - these will force EINVAL error because they could
+ make internal shell commands unsafe.
+preserve_dir:
+ If true, we don't delete the temporary directory <path_out>.dir containing
+*/
+
+
+void extract_end( extract_t** pextract);
+/* Frees all data associated with *pextract and sets *pextract to NULL. */
+
+
+void extract_internal_end(void);
+/* Cleans up internal singelton state that can look like a memory leak when
+running under Memento or valgrind. */
+
+void extract_exp_min(extract_t* extract, size_t size);
+/* If size is non-zero, sets minimum actual allocation size, and we only
+allocate in powers of two times this size. This is an attempt to improve speed
+with memento squeeze. Default is 0 (every call to extract_realloc() calls
+realloc(). */
+
+#endif
diff --git a/extract/include/extract_alloc.h b/extract/include/extract_alloc.h
new file mode 100644
index 00000000..28a52a3c
--- /dev/null
+++ b/extract/include/extract_alloc.h
@@ -0,0 +1,76 @@
+#ifndef EXTRACT_ALLOC_H
+#define EXTRACT_ALLOC_H
+
+/* Allocation support. */
+
+#include <stdlib.h>
+
+typedef void* (*extract_realloc_fn_t)(void* state, void* prev, size_t size);
+/* An allocation function to be provided by user of the extract library.
+
+Should behave like realloc(), except for taking the additional 'void* state'
+arg. */
+
+typedef struct extract_alloc_t extract_alloc_t;
+/* Abstract allocator, created by extract_alloc_create(). */
+
+int extract_alloc_create(extract_realloc_fn_t realloc_fn, void* realloc_state, extract_alloc_t** palloc);
+/* Creates a new extract_alloc_t* for use with extract_malloc() etc. */
+
+void extract_alloc_destroy(extract_alloc_t** palloc);
+/* Destroys an extract_alloc_t* that was created by extract_alloc_create().
+
+Returns with *palloc set to NULL. Does nothing if *palloc is already NULL. */
+
+int extract_malloc(extract_alloc_t* alloc, void** pptr, size_t size);
+/* Sets *pptr to point to new allocated memory and returns 0. On error return
+-1 with errno set and *pptr=NULL.
+
+Uses malloc() if <alloc> is NULL, otherwise <alloc> must have been created by
+extract_alloc_create() and we use the extract_realloc_fn_t that was originally
+passed to extract_alloc_create(). */
+
+int extract_realloc(extract_alloc_t* alloc, void** pptr, size_t newsize);
+/* Sets *pptr to point to reallocated memory and returns 0. On error return -1
+with errno set and *pptr=NULL.
+
+Uses realloc() if <alloc> is NULL, otherwise <alloc> must have been created by
+extract_alloc_create() and we use the extract_realloc_fn_t that was originally
+passed to extract_alloc_create(). */
+
+void extract_free(extract_alloc_t* alloc, void** pptr);
+/* Frees block pointed to by *pptr and sets *pptr to NULL.
+
+Uses free() if <alloc> is NULL, otherwise <alloc> must have been created by
+extract_alloc_create() and we use the extract_realloc_fn_t that was originally
+passed to extract_alloc_create(). */
+
+#define extract_malloc(alloc, pptr, size) (extract_malloc)(alloc, (void**) pptr, size)
+#define extract_realloc(alloc, pptr, newsize) (extract_realloc)(alloc, (void**) pptr, newsize)
+#define extract_free(alloc, pptr) (extract_free)(alloc, (void**) pptr)
+/* These allow callers to use any pointer type, not just void*. */
+
+typedef struct
+{
+ int num_malloc;
+ int num_realloc;
+ int num_free;
+ int num_libc_realloc;
+} extract_alloc_stats_t;
+
+extract_alloc_stats_t* extract_alloc_stats(extract_alloc_t* alloc);
+/* Retrieve statistics. */
+
+int extract_realloc2(extract_alloc_t* alloc, void** pptr, size_t oldsize, size_t newsize);
+/* A realloc variant that takes the existing buffer size.
+
+If <oldsize> is not zero and *pptr is not NULL, <oldsize> must be the size of
+the existing buffer and may used internally to over-allocate in order to avoid
+too many calls to realloc(). See extract_alloc_exp_min() for more information.
+*/
+
+#define extract_realloc2(alloc, pptr, oldsize, newsize) (extract_realloc2)(alloc, (void**) pptr, oldsize, newsize)
+
+void extract_alloc_exp_min(extract_alloc_t* alloc, size_t size);
+
+#endif
diff --git a/extract/include/extract_buffer.h b/extract/include/extract_buffer.h
new file mode 100644
index 00000000..b0abbec3
--- /dev/null
+++ b/extract/include/extract_buffer.h
@@ -0,0 +1,278 @@
+#ifndef ARTIFEX_EXTRACT_BUFFER_H
+#define ARTIFEX_EXTRACT_BUFFER_H
+
+#include "extract_alloc.h"
+
+#include <stddef.h>
+
+/* Work around MSVS issues with our use of 'inline'. */
+#ifdef _MSC_VER
+ #include "extract_compat_inline.h"
+#endif
+
+/* Reading and writing abstractions.
+
+We use inline code in the common case where reading or writing can be satisfied
+using a cache.
+*/
+
+
+typedef struct extract_buffer_t extract_buffer_t;
+/* Abstract state for a buffer. */
+
+
+static inline int extract_buffer_read(
+ extract_buffer_t* buffer,
+ void* data,
+ size_t numbytes,
+ size_t* o_actual
+ );
+/* Reads specified number of bytes from buffer into data..+bytes, making multiple calls to
+the underlying extract_buffer_fn_read function until we have read <numbytes> or reached
+EOF. If we reach EOF, . Returns +1 if
+short read due to EOF.
+
+buffer:
+ As returned by earlier call to extract_buffer_open().
+data:
+ Location of transferred data.
+bytes:
+ Number of bytes transferred.
+o_actual:
+ Optional out-param, set to actual number of bytes read. If we return zero
+ this will always be <numbytes>; otherwise will be less than <numbytes>.
+
+For speed reasons, this is implemented in extract_buffer_impl.h and uses only
+inline code if the requested data can be read from the cache.
+*/
+
+
+static inline int extract_buffer_write(
+ extract_buffer_t* buffer,
+ const void* data,
+ size_t numbytes,
+ size_t* o_actual
+ );
+/* Writes specified data into buffer. Returns +1 if short write due to EOF.
+
+buffer:
+ As returned by earlier call to extract_buffer_open().
+data:
+ Location of source data.
+bytes:
+ Number of bytes to copy.
+out_actual:
+ Optional out-param, set to actual number of bytes written. If we return
+ zero this will always be <numbytes>; otherwise will be less than <numbytes>
+ and can even be negative if internal cache-flush using fn_write() fails or
+ returns EOF.
+
+For speed reasons, this is implemented in extract_buffer_impl.h and uses only
+inline code if there is space in the cache for the data.
+*/
+
+
+size_t extract_buffer_pos(extract_buffer_t* buffer);
+/* Returns number of bytes read or number of bytes written so far. */
+
+
+int extract_buffer_close(extract_buffer_t** io_buffer);
+/* Closes down an extract_buffer_t and frees all internal resources.
+
+Can return error or +1 for EOF if write buffer and fn_write() fails when
+flushing cache.
+
+Always sets *io_buffer to NULL. Does nothing if *io_buffer is already NULL.
+*/
+
+
+typedef int (*extract_buffer_fn_read)(void* handle, void* destination, size_t numbytes, size_t* o_actual);
+/* Callback used by read buffer. Should read data from buffer into the supplied
+destination. Short reads are not an error.
+
+E.g. used to fill cache or to handle large reads.
+
+Should returns 0 on success (including short read or EOF) or -1 with errno set.
+
+handle:
+ As passed to extract_buffer_open().
+destination:
+ Start of destination.
+bytes:
+ Number of bytes in destination.
+o_actual:
+ Out-param, set to zero if EOF. Otherwise set to the number of bytes
+ transferred in the range 1..<numbytes> inclusive.
+*/
+
+typedef int (*extract_buffer_fn_write)(void* handle, const void* source, size_t numbytes, size_t* o_actual);
+/* Callback used by write buffer. Should write data from the supplied source
+into the buffer; short writes are not an error.
+
+E.g. used to flush cache or to handle large writes.
+
+Should return 0 on success (including short write or EOF) or -1 with errno set.
+
+handle:
+ As passed to extract_buffer_open().
+source:
+ Start of source.
+bytes:
+ Number of bytes in source.
+o_actual:
+ Out-param, set to zero if EOF. Otherwise set to the number of bytes
+ transferred in the range 1..<numbytes> inclusive.
+*/
+
+typedef int (*extract_buffer_fn_cache)(void* handle, void** o_cache, size_t* o_numbytes);
+/* Callback to flush/populate cache.
+
+If the buffer is for writing:
+ Should return a memory region to which data can be written. Any data
+ written to a previous cache will have already been passed to fn_write() so
+ this can overlap or be the same as any previously-returned cache.
+
+If the buffer is for reading:
+ Should return a memory region containing more data to be read. All data in
+ any previously-returned cache has been read so this can overlap or be the
+ same as any previous cache.
+
+handle:
+ As passed to extract_buffer_open().
+o_data:
+ Out-param, set to point to new cache.
+o_numbytes:
+ Out-param, set to size of new cache.
+
+If no data is available due to EOF, should return with *o_numbytes set to zero.
+*/
+
+typedef void (*extract_buffer_fn_close)(void* handle);
+/* Called by extract_buffer_close().
+
+handle:
+ As passed to extract_buffer_open().
+*/
+
+extract_alloc_t* extract_buffer_alloc(extract_buffer_t* buffer);
+/* Returns the extract_alloc_t* originally passed to extract_buffer_open*(). */
+
+
+int extract_buffer_open(
+ extract_alloc_t* alloc,
+ void* handle,
+ extract_buffer_fn_read fn_read,
+ extract_buffer_fn_write fn_write,
+ extract_buffer_fn_cache fn_cache,
+ extract_buffer_fn_close fn_close,
+ extract_buffer_t** o_buffer
+ );
+/* Creates an extract_buffer_t that uses specified callbacks.
+
+If fn_read is non-NULL the buffer is a read buffer, else if fn_write is
+non-NULL the buffer is a write buffer. Passing non-NULL for both or neither is
+not supported.
+
+alloc:
+ NULL or from extract_alloc_create(). Is only used to allocate the
+ extract_buffer_t returned in *o_buffer.
+handle:
+ Passed to fn_read, fn_write, fn_cache and fn_close callbacks.
+fn_read:
+ Callback for reading data.
+fn_write:
+ Callback for writing data.
+fn_cache:
+ Optional cache callback.
+fn_close:
+ Optional close callback.
+o_buffer:
+ Out-param. Set to NULL on error.
+*/
+
+
+int extract_buffer_open_simple(
+ extract_alloc_t* alloc,
+ const void* data,
+ size_t numbytes,
+ void* handle,
+ extract_buffer_fn_close fn_close,
+ extract_buffer_t** o_buffer
+ );
+/* Creates an extract_buffer_t that reads from or writes to a single fixed
+block of memory.
+
+The address region data..+data_length must exist for the lifetime of the
+returned extract_buffer_t.
+
+alloc:
+ NULL or from extract_alloc_create(). Is only used to allocate the
+ extract_buffer_t returned in *o_buffer.
+data:
+ Start of memory region. Note that if the extract_buffer_t is used as a
+ write buffer then data[] will be written-to, despite the 'const'. [This
+ use of const avoids the need for the caller to use a cast when creating a
+ read-buffer.]
+bytes:
+ Size of memory region.
+handle:
+ Passed to fn_close.
+fn_close:
+ Optional callback called by extract_buffer_close(). E.g. could copy the
+ memory region elsewhere if the buffer was used as a write buffer.
+o_buffer:
+ Out-param.
+*/
+
+
+int extract_buffer_open_file(
+ extract_alloc_t* alloc,
+ const char* path,
+ int writable,
+ extract_buffer_t** o_buffer
+ );
+/* Creates a buffer that reads from, or writes to, a file. For portability
+uses an internal FILE* rather than an integer file descriptor, so doesn't use
+extract_buffer's caching support because FILE* already provides caching.
+
+path:
+ Path of file to read from.
+writable:
+ We create read buffer if zero, else a write buffer.
+o_buffer:
+ Out-param. Set to NULL on error.
+*/
+
+
+typedef struct
+{
+ extract_buffer_t* buffer;
+ char* data;
+ size_t alloc_size;
+ size_t data_size;
+} extract_buffer_expanding_t;
+/* A write buffer that writes to an automatically-growing contiguous area of
+memory. */
+
+int extract_buffer_expanding_create(
+ extract_alloc_t* alloc,
+ extract_buffer_expanding_t* buffer_expanding
+ );
+/* Creates a writable buffer that writes into an automatically-growing
+contiguous area of memory.
+
+alloc:
+ NULL or from extract_alloc_create().
+buffer_expanding:
+ Out-param; *buffer_expanding is initialised.
+
+Initialises buffer_expanding. buffer_expanding->buffer can be passed to
+extract_buffer_*() functions. After buffer_close(), the written data is
+available in buffer_expanding->data..+data_size, which will have been allocated
+using <alloc>. */
+
+
+/* Include implementations of inline-functions. */
+#include "extract_buffer_impl.h"
+
+#endif
diff --git a/extract/include/extract_buffer_impl.h b/extract/include/extract_buffer_impl.h
new file mode 100644
index 00000000..25f533dd
--- /dev/null
+++ b/extract/include/extract_buffer_impl.h
@@ -0,0 +1,72 @@
+/* Implementation of inline functions.
+
+We expose some implementation details to allow extract_buffer_read() and
+extract_buffer_write() to be inline; specifically we allow the compiler to
+optimise the common case where reading/writing uses the cache only. */
+
+
+#include <string.h>
+
+typedef struct
+{
+ void* cache;
+ size_t numbytes;
+ size_t pos;
+} extract_buffer_cache_t;
+
+
+int extract_buffer_read_internal(
+ extract_buffer_t* buffer,
+ void* data,
+ size_t numbytes,
+ size_t* o_actual
+ );
+/* Internal use only. */
+
+static inline int extract_buffer_read(
+ extract_buffer_t* buffer,
+ void* data,
+ size_t numbytes,
+ size_t* o_actual
+ )
+{
+ extract_buffer_cache_t* cache = (void*) buffer;
+ if (cache->numbytes - cache->pos < numbytes) {
+ /* Can't use just the cache. */
+ return extract_buffer_read_internal(buffer, data, numbytes, o_actual);
+ }
+ /* We can use just the cache. */
+ memcpy(data, (char*) cache->cache + cache->pos, numbytes);
+ cache->pos += numbytes;
+ if (o_actual) *o_actual = numbytes;
+ return 0;
+}
+
+
+int extract_buffer_write_internal(
+ extract_buffer_t* buffer,
+ const void* data,
+ size_t numbytes,
+ size_t* o_actual
+ );
+/* Internal use only. */
+
+static inline int extract_buffer_write(
+ extract_buffer_t* buffer,
+ const void* data,
+ size_t numbytes,
+ size_t* o_actual
+ )
+{
+ extract_buffer_cache_t* cache = (void*) buffer;
+ if (cache->numbytes - cache->pos < numbytes) {
+ /* Can't use just the cache. */
+ return extract_buffer_write_internal(buffer, data, numbytes, o_actual);
+ }
+ /* We can use just the cache. */
+ memcpy((char*) cache->cache + cache->pos, data, numbytes);
+ cache->pos += numbytes;
+ if (o_actual) *o_actual = numbytes;
+ return 0;
+}
+
diff --git a/extract/include/extract_compat_inline.h b/extract/include/extract_compat_inline.h
new file mode 100644
index 00000000..bb82ead2
--- /dev/null
+++ b/extract/include/extract_compat_inline.h
@@ -0,0 +1,15 @@
+#ifndef ARTIFEX_EXTRACT_COMPAT_INLINE
+#define ARTIFEX_EXTRACT_COMPAT_INLINE
+
+#if !defined __cplusplus && defined(_MSC_VER)
+ #if (_MSC_VER < 1500)
+ /* inline and inline__ not available so remove all mention of
+ inline. This may result in warnings about unused static functions. */
+ #define inline
+ #else
+ /* __inline is always available. */
+ #define inline __inline
+ #endif
+#endif
+
+#endif