diff options
Diffstat (limited to 'extract/src/document.h')
-rw-r--r-- | extract/src/document.h | 137 |
1 files changed, 128 insertions, 9 deletions
diff --git a/extract/src/document.h b/extract/src/document.h index c59348f4..2dc4f1ee 100644 --- a/extract/src/document.h +++ b/extract/src/document.h @@ -1,6 +1,15 @@ #ifndef ARTIFEX_EXTRACT_DOCUMENT_H #define ARTIFEX_EXTRACT_DOCUMENT_H +#include "../include/extract.h" + +#ifdef _MSC_VER + #include "compat_stdint.h" +#else + #include <stdint.h> +#endif + + static const double pi = 3.141592653589793; typedef struct @@ -9,6 +18,16 @@ typedef struct double y; } point_t; +const char* extract_point_string(const point_t* point); + +typedef struct +{ + point_t min; + point_t max; +} rect_t; + +const char* extract_rect_string(const rect_t* rect); + typedef struct { double a; @@ -19,9 +38,15 @@ typedef struct double f; } matrix_t; -double matrix_expansion(matrix_t m); +const char* extract_matrix_string(const matrix_t* matrix); -int matrix_cmp4(const matrix_t* lhs, const matrix_t* rhs) +double extract_matrix_expansion(matrix_t m); +/* Returns a*d - b*c. */ + +point_t extract_multiply_matrix_point(matrix_t m, point_t p); +matrix_t extract_multiply_matrix_matrix(matrix_t m1, matrix_t m2); + +int extract_matrix_cmp4(const matrix_t* lhs, const matrix_t* rhs) ; /* Returns zero if first four members of *lhs and *rhs are equal, otherwise +/-1. */ @@ -48,7 +73,7 @@ typedef struct matrix_t trm; char* font_name; - /* font size is matrix_expansion(trm). */ + /* font size is extract_matrix_cmp4(trm). */ struct { unsigned font_bold : 1; @@ -61,14 +86,21 @@ typedef struct } span_t; /* List of chars that have same font and are usually adjacent. */ -char_t* span_char_last(span_t* span); +void extract_span_init(span_t* span); + +void extract_span_free(extract_alloc_t* alloc, span_t** pspan); +/* Frees a span_t, returning with *pspan set to NULL. */ + +void extract_spans_free(extract_alloc_t* alloc, span_t*** pspans, int spans_num); + +char_t* extract_span_char_last(span_t* span); /* Returns last character in span. */ -int span_append_c(extract_alloc_t* alloc, span_t* span, int c); +int extract_span_append_c(extract_alloc_t* alloc, span_t* span, int c); /* Appends new char_t to an span_t with .ucs=c and all other fields zeroed. */ -const char* span_string(extract_alloc_t* alloc, span_t* span); +const char* extract_span_string(extract_alloc_t* alloc, span_t* span); /* Returns static string containing info about span_t. */ typedef struct @@ -78,10 +110,13 @@ typedef struct } line_t; /* List of spans that are aligned on same line. */ -span_t* line_span_first(line_t* line); +void extract_line_free(extract_alloc_t* alloc, line_t** pline); +void extract_lines_free(extract_alloc_t* alloc, line_t*** plines, int lines_num); + +span_t* extract_line_span_first(line_t* line); /* Returns first span in a line. */ -span_t* line_span_last(line_t* line); +span_t* extract_line_span_last(line_t* line); /* Returns last span in a line. */ typedef struct @@ -112,6 +147,61 @@ typedef struct <name> and <id> are created to be unique identifiers for use in generated docx file. */ +void extract_image_clear(extract_alloc_t* alloc, image_t* image); + +typedef struct +{ + float color; + rect_t rect; +} tableline_t; +/* A line that is part of a table. */ + +typedef struct +{ + tableline_t* tablelines; + int tablelines_num; +} tablelines_t; + + +typedef struct +{ + rect_t rect; + + /* If left/above is true, this cell is not obscured by cell to its + left/above. */ + uint8_t left; + uint8_t above; + + /* extend_right and extend_down are 1 for normal cells, 2 for cells which + extend right/down to cover an additional column/row, 3 to cover two + additional columns/rows etc. */ + int extend_right; + int extend_down; + + /* Contents of this cell. */ + line_t** lines; + int lines_num; + paragraph_t** paragraphs; + int paragraphs_num; +} cell_t; +/* A cell within a table. */ + +void extract_cell_init(cell_t* cell); +void extract_cell_free(extract_alloc_t* alloc, cell_t** pcell); + +typedef struct +{ + point_t pos; /* top-left. */ + + /* Array of cells_num_x*cells_num_y cells; cell (x, y) is: + cells_num_x * y + x. + */ + cell_t** cells; + int cells_num_x; + int cells_num_y; +} table_t; + + typedef struct { span_t** spans; @@ -129,10 +219,17 @@ typedef struct int paragraphs_num; /* These refer to items in .lines. Initially empty, then set by extract_join(). */ + + tablelines_t tablelines_horizontal; + tablelines_t tablelines_vertical; + + table_t** tables; + int tables_num; } extract_page_t; /* A page. Contains different representations of the list of spans. NB not -called page_t because this clashes with a system type on hpux. */ ++called page_t because this clashes with a system type on hpux. */ + typedef struct { @@ -150,9 +247,31 @@ typedef struct int imagetypes_num; } images_t; + int extract_document_join(extract_alloc_t* alloc, document_t* document); +/* This does all the work of finding paragraphs and tables. */ double extract_matrices_to_font_size(matrix_t* ctm, matrix_t* trm); +/* Things below here are used when generating output. */ + +typedef struct +{ + char* name; + double size; + int bold; + int italic; +} font_t; +/* Basic information about current font. */ + +typedef struct +{ + font_t font; + matrix_t* ctm_prev; +} content_state_t; +/* Used to keep track of font information when writing paragraphs of odt +content, e.g. so we know whether a font has changed so need to start a new odt +span. */ + #endif |