diff --git a/node.gyp b/node.gyp index 926fe98959c21a..df70d26f35f4e7 100644 --- a/node.gyp +++ b/node.gyp @@ -859,6 +859,8 @@ 'action_name': 'node_js2c', 'process_outputs_as_sources': 1, 'inputs': [ + # Put the code first so it's a dependency and can be used for invocation. + 'tools/js2c.py', '<@(library_files)', 'config.gypi', 'tools/js2c_macros/check_macros.py' @@ -878,9 +880,8 @@ }] ], 'action': [ - 'python', 'tools/js2c.py', - '<@(_outputs)', - '<@(_inputs)', + 'python', '<@(_inputs)', + '--target', '<@(_outputs)', ], }, ], diff --git a/tools/js2c.py b/tools/js2c.py index 86648f785cee5b..9c5130a91630c3 100755 --- a/tools/js2c.py +++ b/tools/js2c.py @@ -27,37 +27,36 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -# This is a utility for converting JavaScript source code into C-style -# char arrays. It is used for embedded JavaScript code in the V8 -# library. - +""" +This is a utility for converting JavaScript source code into uint16_t[], +that are used for embeding JavaScript code into the Node.js binary. +""" +import argparse import os import re -import sys - - -def ToCArray(elements, step=10): - slices = (elements[i:i+step] for i in range(0, len(elements), step)) - slices = map(lambda s: ','.join(str(x) for x in s), slices) - return ',\n'.join(slices) +import functools +import codecs def ReadFile(filename): - file = open(filename, "rt") - try: - lines = file.read() - finally: - file.close() - return lines + if is_verbose: + print(filename) + with codecs.open(filename, "r", "utf-8") as f: + lines = f.read() + return lines -def ReadLines(filename): +def ReadMacroFiles(filenames): + """ + + :rtype: List(str) + """ result = [] - for line in open(filename, "rt"): - if '#' in line: - line = line[:line.index('#')] - line = line.strip() - if len(line) > 0: - result.append(line) + for filename in filenames: + with open(filename, "rt") as f: + # strip python-like comments and whitespace padding + lines = [line.split('#')[0].strip() for line in f] + # filter empty lines + result.extend(filter(bool, lines)) return result @@ -70,6 +69,7 @@ def ExpandConstants(lines, constants): def ExpandMacros(lines, macros): def expander(s): return ExpandMacros(s, macros) + for name, macro in macros.items(): name_pattern = re.compile("\\b%s\\(" % name) pattern_match = name_pattern.search(lines, 0) @@ -82,13 +82,15 @@ def expander(s): last_match = end arg_index = [0] # Wrap state into array, to work around Python "scoping" mapping = {} - def add_arg(str): + + def add_arg(s): # Remember to expand recursively in the arguments if arg_index[0] >= len(macro.args): return - replacement = expander(str.strip()) + replacement = expander(s.strip()) mapping[macro.args[arg_index[0]]] = replacement arg_index[0] += 1 + while end < len(lines) and height > 0: # We don't count commas at higher nesting levels. if lines[end] == ',' and height == 1: @@ -100,10 +102,11 @@ def add_arg(str): height = height - 1 end = end + 1 # Remember to add the last match. - add_arg(lines[last_match:end-1]) - if arg_index[0] < len(macro.args) -1: + add_arg(lines[last_match:end - 1]) + if arg_index[0] < len(macro.args) - 1: lineno = lines.count(os.linesep, 0, start) + 1 - raise Exception('line %s: Too few arguments for macro "%s"' % (lineno, name)) + raise Exception( + 'line %s: Too few arguments for macro "%s"' % (lineno, name)) result = macro.expand(mapping) # Replace the occurrence of the macro with the expansion lines = lines[:start] + result + lines[end:] @@ -115,33 +118,37 @@ class TextMacro: def __init__(self, args, body): self.args = args self.body = body + def expand(self, mapping): result = self.body for key, value in mapping.items(): - result = result.replace(key, value) + result = result.replace(key, value) return result + class PythonMacro: def __init__(self, args, fun): self.args = args self.fun = fun + def expand(self, mapping): args = [] for arg in self.args: args.append(mapping[arg]) return str(self.fun(*args)) + CONST_PATTERN = re.compile('^const\s+([a-zA-Z0-9_]+)\s*=\s*([^;]*);$') MACRO_PATTERN = re.compile('^macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$') PYTHON_MACRO_PATTERN = re.compile('^python\s+macro\s+([a-zA-Z0-9_]+)\s*\(([^)]*)\)\s*=\s*([^;]*);$') -def ReadMacros(lines): - constants = { } - macros = { } + +def ReadMacros(macro_files): + lines = ReadMacroFiles(macro_files) + constants = {} + macros = {} for line in lines: - hash = line.find('#') - if hash != -1: line = line[:hash] - line = line.strip() + line = line.split('#')[0].strip() if len(line) == 0: continue const_match = CONST_PATTERN.match(line) @@ -153,20 +160,20 @@ def ReadMacros(lines): macro_match = MACRO_PATTERN.match(line) if macro_match: name = macro_match.group(1) - args = [s.strip() for s in macro_match.group(2).split(',')] + args = [p.strip() for p in macro_match.group(2).split(',')] body = macro_match.group(3).strip() macros[name] = TextMacro(args, body) else: python_match = PYTHON_MACRO_PATTERN.match(line) if python_match: name = python_match.group(1) - args = [s.strip() for s in python_match.group(2).split(',')] + args = [p.strip() for p in macro_match.group(2).split(',')] body = python_match.group(3).strip() fun = eval("lambda " + ",".join(args) + ': ' + body) macros[name] = PythonMacro(args, fun) else: raise Exception("Illegal line: " + line) - return (constants, macros) + return constants, macros TEMPLATE = """ @@ -177,14 +184,14 @@ def ReadMacros(lines): namespace native_module {{ -{definitions} +{0} void NativeModuleLoader::LoadJavaScriptSource() {{ - {initializers} + {1} }} UnionBytes NativeModuleLoader::GetConfig() {{ - return UnionBytes(config_raw, arraysize(config_raw)); // config.gypi + return UnionBytes(config_raw, {2}); // config.gypi }} }} // namespace native_module @@ -192,104 +199,137 @@ def ReadMacros(lines): }} // namespace node """ -ONE_BYTE_STRING = """ -static const uint8_t {var}[] = {{ {data} }}; -""" - TWO_BYTE_STRING = """ -static const uint16_t {var}[] = {{ {data} }}; -""" - - -INITIALIZER = """ -source_.emplace( - "{module}", - UnionBytes({var}, arraysize({var})) -); +static const uint16_t {0}[] = {{ +{1} +}}; """ -def JS2C(source, target): - modules = [] - consts = {} - macros = {} - macro_lines = [] - - for s in source: - if (os.path.split(str(s))[1]).endswith('macros.py'): - macro_lines.extend(ReadLines(str(s))) - else: - modules.append(s) - +INITIALIZER = 'source_.emplace("{0}", UnionBytes{{{1}, {2}}});' + +CONFIG_GYPI_ID = 'config_raw' + +SLUGGER_RE =re.compile('[.\-/]') + +is_verbose = False + +def GetDefinition(var, source, step=30): + encoded_source = bytearray(source, 'utf-16le') + code_points = [encoded_source[i] + (encoded_source[i+1] * 256) for i in range(0, len(encoded_source), 2)] + # For easier debugging, align to the common 3 char for code-points. + elements_s = ['%3s' % x for x in code_points] + # Put no more then `step` code-points in a line. + slices = [elements_s[i:i + step] for i in range(0, len(elements_s), step)] + lines = [','.join(s) for s in slices] + array_content = ',\n'.join(lines) + definition = TWO_BYTE_STRING.format(var, array_content) + return definition, len(code_points) + + +def AddModule(filename, consts, macros, definitions, initializers): + code = ReadFile(filename) + code = ExpandConstants(code, consts) + code = ExpandMacros(code, macros) + name = NormalizeFileName(filename) + slug = SLUGGER_RE.sub('_', name) + var = slug + '_raw' + definition, size = GetDefinition(var, code) + initializer = INITIALIZER.format(name, var, size) + definitions.append(definition) + initializers.append(initializer) + +def NormalizeFileName(filename): + split = filename.split(os.path.sep) + if split[0] == 'deps': + split = ['internal'] + split + else: # `lib/**/*.js` so drop the 'lib' part + split = split[1:] + filename = '/'.join(split) + return os.path.splitext(filename)[0] + + +def JS2C(source_files, target): # Process input from all *macro.py files - (consts, macros) = ReadMacros(macro_lines) + consts, macros = ReadMacros(source_files['.py']) # Build source code lines definitions = [] initializers = [] - def GetDefinition(var, source): - # Treat non-ASCII as UTF-8 and convert it to UTF-16. - if any(ord(c) > 127 for c in source): - source = map(ord, source.decode('utf-8').encode('utf-16be')) - source = [source[i] * 256 + source[i+1] for i in range(0, len(source), 2)] - source = ToCArray(source) - return TWO_BYTE_STRING.format(var=var, data=source) - else: - source = ToCArray(map(ord, source), step=20) - return ONE_BYTE_STRING.format(var=var, data=source) - - def AddModule(module, source): - var = '%s_raw' % (module.replace('-', '_').replace('/', '_')) - definition = GetDefinition(var, source) - initializer = INITIALIZER.format(module=module, - var=var) - definitions.append(definition) - initializers.append(initializer) - - for name in modules: - lines = ReadFile(str(name)) - lines = ExpandConstants(lines, consts) - lines = ExpandMacros(lines, macros) - - # On Windows, "./foo.bar" in the .gyp file is passed as "foo.bar" - # so don't assume there is always a slash in the file path. - if '/' in name or '\\' in name: - split = re.split('/|\\\\', name) - if split[0] == 'deps': - split = ['internal'] + split - else: - split = split[1:] - name = '/'.join(split) - - # if its a gypi file we're going to want it as json - # later on anyway, so get it out of the way now - if name.endswith('.gypi'): - # Currently only config.gypi is allowed - assert name == 'config.gypi' - lines = re.sub(r'\'true\'', 'true', lines) - lines = re.sub(r'\'false\'', 'false', lines) - lines = re.sub(r'#.*?\n', '', lines) - lines = re.sub(r'\'', '"', lines) - definition = GetDefinition('config_raw', lines) - definitions.append(definition) - else: - AddModule(name.split('.', 1)[0], lines) + for filename in source_files['.js']: + AddModule(filename, consts, macros, definitions, initializers) + + config_def, config_size = handle_config_gypi(source_files['config.gypi']) + definitions.append(config_def) # Emit result - output = open(str(target[0]), "w") - output.write( - TEMPLATE.format(definitions=''.join(definitions), - initializers=''.join(initializers))) - output.close() + definitions = ''.join(definitions) + initializers = '\n '.join(initializers) + out = TEMPLATE.format(definitions, initializers, config_size) + write_if_chaged(out, target) + + +def handle_config_gypi(config_filename): + # if its a gypi file we're going to want it as json + # later on anyway, so get it out of the way now + config = ReadFile(config_filename) + config = jsonify(config) + config_def, config_size = GetDefinition(CONFIG_GYPI_ID, config) + return config_def, config_size + + +def jsonify(config): + # 1. string comments + config = re.sub(r'#.*?\n', '', config) + # 3. normalize string literals from ' into " + config = re.sub('\'', '"', config) + # 2. turn pseudo-booleans strings into Booleans + config = re.sub('"true"', 'true', config) + config = re.sub('"false"', 'false', config) + return config + + +def write_if_chaged(content, target): + if os.path.exists(target): + with open(target, 'rt') as existing: + old_content = existing.read() + else: + old_content = '' + if old_content == content: + return + with open(target, "wt") as output: + output.write(content) + + +def SourceFileByExt(files_by_ext, filename): + """ + :type files_by_ext: dict + :type filename: str + :rtype: dict + """ + ext = os.path.splitext(filename)[-1] + files_by_ext.setdefault(ext, []).append(filename) + return files_by_ext def main(): - natives = sys.argv[1] - source_files = sys.argv[2:] - if source_files[-2] == '-t': - global TEMPLATE - TEMPLATE = source_files[-1] - source_files = source_files[:-2] - JS2C(source_files, [natives]) + parser = argparse.ArgumentParser( + description='Convert code files into `uint16_t[]`s', + fromfile_prefix_chars='@' + ) + parser.add_argument('--target', help='output file') + parser.add_argument('--verbose', action='store_true', help='output file') + parser.add_argument('sources', nargs='*', help='input files') + options = parser.parse_args() + global is_verbose + is_verbose = options.verbose + source_files = functools.reduce(SourceFileByExt, options.sources, {}) + # Should have exactly 3 types: `.js`, `.py`, and `.gypi` + assert len(source_files) == 3 + # Currently config.gypi is the only `.gypi` file allowed + assert source_files['.gypi'] == ['config.gypi'] + source_files['config.gypi'] = source_files.pop('.gypi')[0] + JS2C(source_files, options.target) + if __name__ == "__main__": main()