diff options
Diffstat (limited to 'extract/src/docx_template_build.py')
-rwxr-xr-x | extract/src/docx_template_build.py | 46 |
1 files changed, 27 insertions, 19 deletions
diff --git a/extract/src/docx_template_build.py b/extract/src/docx_template_build.py index 8b836300..e04137d5 100755 --- a/extract/src/docx_template_build.py +++ b/extract/src/docx_template_build.py @@ -14,13 +14,13 @@ Args: -i <in-path> Set template docx/odt file to extract from. - + -n docx | odt Infix to use in generated identifier names. -o <out-path> Set name of output files. - + We write to <out-path>.c and <out-path>.h. ''' @@ -82,7 +82,7 @@ def check_path_safe(path): raise Exception(f'Path is unsafe because contains "..": {path!r}') for c in path: if not c.isalnum() and c not in '/._-': - #print(f'unsafe character {c} in: {path}') + #print(f'unsafe character {c} in: {path}') raise Exception(f'Path is unsafe because contains "{c}": {path!r}') def path_safe(path): @@ -134,37 +134,37 @@ def main(): path_out = next(args) else: assert 0, f'unrecognised arg: {arg}' - + if not path_in: return - + if not path_in: raise Exception('Need to specify -i <in-path>') if not infix: raise Exception('Need to specify -n <name>') if not path_out: raise Exception('Need to specify -o <out-path>') - + check_path_safe(path_in) check_path_safe(path_out) path_temp = f'{path_in}.dir' os.system(f'rm -r "{path_temp}" 2>/dev/null') system(f'unzip -q -d {path_temp} {path_in}') - + out_c = io.StringIO() out_c.write(f'/* THIS IS AUTO-GENERATED CODE, DO NOT EDIT. */\n') out_c.write(f'\n') out_c.write(f'#include "{os.path.basename(path_out)}.h"\n') out_c.write(f'\n') - - + + out_c.write(f'const {infix}_template_item_t {infix}_template_items[] =\n') out_c.write(f'{{\n') - + num_items = 0 for dirpath, dirnames, filenames in os.walk(path_temp): dirnames.sort() - + if 0: # Write code to create directory item in zip. This isn't recognised by zipinfo, and doesn't # make Word like the file. @@ -174,7 +174,7 @@ def main(): if not name.endswith('/'): name += '/' out_c3.write(f' if (extract_zip_write_file(zip, NULL, 0, "{infix}")) goto end;\n') - + for filename in sorted(filenames): num_items += 1 path = os.path.join(dirpath, filename) @@ -205,7 +205,15 @@ def main(): for tag in 'dc:creator', 'cp:lastModifiedBy': text = re.sub(f'[<]{tag}[>][^<]*[<]/{tag}[>]', f'<{tag}></{tag}>', text) - out_c.write(f' "{text}"\n') + out_c.write(f' "') + # Represent non-ascii utf-8 bytes as C escape sequences. + for c in text: + if ord( c) <= 127: + out_c.write( c) + else: + for cc in c.encode( 'utf-8'): + out_c.write( f'\\x{cc:02x}') + out_c.write(f'"\n') else: data = read(os.path.join(dirpath, filename), encoding=None) out_c.write(f' "') @@ -216,17 +224,17 @@ def main(): out_c.write(f'"\n "') out_c.write(f'\\x{byte:02x}') out_c.write(f'"\n') - + out_c.write(f' }},\n') - out_c.write(f' \n') - + out_c.write(f'\n') + out_c.write(f'}};\n') out_c.write(f'\n') out_c.write(f'int {infix}_template_items_num = {num_items};\n') - + out_c = out_c.getvalue() write_if_diff(out_c, f'{path_out}.c', 'utf-8', force) - + out_h = io.StringIO() out_h.write(f'#ifndef EXTRACT_{infix.upper()}_TEMPLATE_H\n') out_h.write(f'#define EXTRACT_{infix.upper()}_TEMPLATE_H\n') @@ -247,6 +255,6 @@ def main(): out_h.write(f'#endif\n') write_if_diff(out_h.getvalue(), f'{path_out}.h', 'utf-8', force) #os.system(f'rm -r "{path_temp}"') - + if __name__ == '__main__': main() |