#!/usr/bin/env python3

import sys

codes = []
for line in open('data/unicode.txt').readlines(): # Original source: https://www.unicode.org/Public/14.0.0/ucd/UnicodeData.txt
    try:
        code = int(line.split(';')[0], 16)

        # Ensure the character is encodable (surrogates are not)
        chr(code).encode('utf8')

        if (code >= 128):
            codes.append(code)
    except:
        pass

# Generate the compose file

data = ''
for n, code in enumerate(codes):
        data += '<Linefeed> '
        data += ' '.join(f'<{c}>' for c in ('%05d' % n))
        data += f' : "{chr(code)}"\n'

open('data/keyd.compose', 'w').write(data)

# Generate the corresponding src/unicode.c

# OPT: We could condense this and shave off lookup time by using an offset
# table to capitalize on codepoint contiguity, but 35k is small enough to
# warrant keeping the entire thing in memory.

open('src/unicode.c', 'w').write(f'''
	/* GENERATED BY {sys.argv[0]}, DO NOT MODIFY BY HAND. */

	#include <stdint.h>
	#include <stdlib.h>

	uint32_t unicode_table[] = {{ {','.join(map(str, codes))} }};

	int lookup_xcompose_code(uint32_t codepoint) {{
		size_t i = 0;

		for(i = 0; i < sizeof(unicode_table)/sizeof(unicode_table[0]); i++) {{
			if (unicode_table[i] == codepoint)
				return i;
		}}

		return -1;
	}}
'''
.replace('\n\t', '\n')
.lstrip()
)
