mirror of
https://github.com/reactos/reactos.git
synced 2024-11-23 11:33:31 +08:00
[ASMPP] Implement asm preprocessor
This converts ML style assembly to GAS compatible syntax
This commit is contained in:
parent
7277e26944
commit
61cc62d1b2
@ -153,7 +153,7 @@ if(NOT CMAKE_CROSSCOMPILING)
|
||||
add_subdirectory(sdk/tools)
|
||||
add_subdirectory(sdk/lib)
|
||||
|
||||
set(NATIVE_TARGETS bin2c widl gendib cabman fatten hpp isohybrid mkhive mkisofs obj2bin spec2def geninc mkshelllink utf16le xml2sdb)
|
||||
set(NATIVE_TARGETS asmpp bin2c widl gendib cabman fatten hpp isohybrid mkhive mkisofs obj2bin spec2def geninc mkshelllink utf16le xml2sdb)
|
||||
if(NOT MSVC)
|
||||
list(APPEND NATIVE_TARGETS rsym pefixup)
|
||||
endif()
|
||||
|
@ -467,8 +467,37 @@ function(allow_warnings __module)
|
||||
#target_compile_options(${__module} PRIVATE "-Wno-error")
|
||||
endfunction()
|
||||
|
||||
function(convert_asm_file _source_file _target_file)
|
||||
get_filename_component(_source_file_base_name ${_source_file} NAME_WE)
|
||||
get_filename_component(_source_file_full_path ${_source_file} ABSOLUTE)
|
||||
set(_preprocessed_asm_file ${CMAKE_CURRENT_BINARY_DIR}/${_target_file})
|
||||
add_custom_command(
|
||||
OUTPUT ${_preprocessed_asm_file}
|
||||
COMMAND native-asmpp ${_source_file_full_path} > ${_preprocessed_asm_file}
|
||||
DEPENDS native-asmpp ${_source_file_full_path})
|
||||
|
||||
endfunction()
|
||||
|
||||
function(convert_asm_files)
|
||||
foreach(_source_file ${ARGN})
|
||||
convert_asm_file(${_source_file} ${_source_file}.s)
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
macro(add_asm_files _target)
|
||||
list(APPEND ${_target} ${ARGN})
|
||||
foreach(_source_file ${ARGN})
|
||||
get_filename_component(_extension ${_source_file} EXT)
|
||||
get_filename_component(_source_file_base_name ${_source_file} NAME_WE)
|
||||
if (${_extension} STREQUAL ".asm")
|
||||
convert_asm_file(${_source_file} ${_source_file}.s)
|
||||
list(APPEND ${_target} ${CMAKE_CURRENT_BINARY_DIR}/${_source_file}.s)
|
||||
elseif (${_extension} STREQUAL ".inc")
|
||||
convert_asm_file(${_source_file} ${_source_file}.h)
|
||||
list(APPEND ${_target} ${CMAKE_CURRENT_BINARY_DIR}/${_source_file}.h)
|
||||
else()
|
||||
list(APPEND ${_target} ${_source_file})
|
||||
endif()
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
function(add_linker_script _target _linker_script_file)
|
||||
|
@ -2,7 +2,7 @@
|
||||
include(ExternalProject)
|
||||
|
||||
function(setup_host_tools)
|
||||
list(APPEND HOST_TOOLS bin2c widl gendib cabman fatten hpp isohybrid mkhive mkisofs obj2bin spec2def geninc mkshelllink txt2nls utf16le xml2sdb)
|
||||
list(APPEND HOST_TOOLS asmpp bin2c widl gendib cabman fatten hpp isohybrid mkhive mkisofs obj2bin spec2def geninc mkshelllink txt2nls utf16le xml2sdb)
|
||||
if(NOT MSVC)
|
||||
list(APPEND HOST_TOOLS rsym pefixup)
|
||||
endif()
|
||||
|
@ -475,21 +475,26 @@ macro(add_asm_files _target)
|
||||
get_includes(_directory_includes)
|
||||
get_directory_property(_defines COMPILE_DEFINITIONS)
|
||||
foreach(_source_file ${ARGN})
|
||||
get_filename_component(_source_file_base_name ${_source_file} NAME_WE)
|
||||
get_filename_component(_source_file_full_path ${_source_file} ABSOLUTE)
|
||||
set(_preprocessed_asm_file ${CMAKE_CURRENT_BINARY_DIR}/asm/${_source_file_base_name}_${_target}.asm)
|
||||
get_source_file_property(_defines_semicolon_list ${_source_file_full_path} COMPILE_DEFINITIONS)
|
||||
unset(_source_file_defines)
|
||||
foreach(_define ${_defines_semicolon_list})
|
||||
if(NOT ${_define} STREQUAL "NOTFOUND")
|
||||
list(APPEND _source_file_defines -D${_define})
|
||||
endif()
|
||||
endforeach()
|
||||
add_custom_command(
|
||||
OUTPUT ${_preprocessed_asm_file}
|
||||
COMMAND cl /nologo /X /I${REACTOS_SOURCE_DIR}/sdk/include/asm /I${REACTOS_BINARY_DIR}/sdk/include/asm ${_directory_includes} ${_source_file_defines} ${_directory_defines} /D__ASM__ /D_USE_ML /EP /c ${_source_file_full_path} > ${_preprocessed_asm_file}
|
||||
DEPENDS ${_source_file_full_path})
|
||||
list(APPEND ${_target} ${_preprocessed_asm_file})
|
||||
get_filename_component(_extension ${_source_file} EXT)
|
||||
if (("${_extension}" STREQUAL ".asm") OR ("${_extension}" STREQUAL ".inc"))
|
||||
list(APPEND ${_target} ${_source_file})
|
||||
else()
|
||||
get_filename_component(_source_file_base_name ${_source_file} NAME_WE)
|
||||
get_filename_component(_source_file_full_path ${_source_file} ABSOLUTE)
|
||||
set(_preprocessed_asm_file ${CMAKE_CURRENT_BINARY_DIR}/asm/${_source_file_base_name}_${_target}.asm)
|
||||
get_source_file_property(_defines_semicolon_list ${_source_file_full_path} COMPILE_DEFINITIONS)
|
||||
unset(_source_file_defines)
|
||||
foreach(_define ${_defines_semicolon_list})
|
||||
if(NOT ${_define} STREQUAL "NOTFOUND")
|
||||
list(APPEND _source_file_defines -D${_define})
|
||||
endif()
|
||||
endforeach()
|
||||
add_custom_command(
|
||||
OUTPUT ${_preprocessed_asm_file}
|
||||
COMMAND cl /nologo /X /I${REACTOS_SOURCE_DIR}/sdk/include/asm /I${REACTOS_BINARY_DIR}/sdk/include/asm ${_directory_includes} ${_source_file_defines} ${_directory_defines} /D__ASM__ /D_USE_ML /EP /c ${_source_file_full_path} > ${_preprocessed_asm_file}
|
||||
DEPENDS ${_source_file_full_path})
|
||||
list(APPEND ${_target} ${_preprocessed_asm_file})
|
||||
endif()
|
||||
endforeach()
|
||||
endmacro()
|
||||
|
||||
|
@ -27,6 +27,7 @@ target_link_libraries(obj2bin PRIVATE host_includes)
|
||||
add_host_tool(spec2def spec2def/spec2def.c)
|
||||
add_host_tool(utf16le utf16le/utf16le.cpp)
|
||||
|
||||
add_subdirectory(asmpp)
|
||||
add_subdirectory(cabman)
|
||||
add_subdirectory(fatten)
|
||||
add_subdirectory(hhpcomp)
|
||||
|
8
sdk/tools/asmpp/CMakeLists.txt
Normal file
8
sdk/tools/asmpp/CMakeLists.txt
Normal file
@ -0,0 +1,8 @@
|
||||
|
||||
list(APPEND SOURCE
|
||||
asmpp.cpp
|
||||
)
|
||||
|
||||
add_host_tool(asmpp ${SOURCE})
|
||||
target_link_libraries(asmpp PRIVATE host_includes)
|
||||
set_property(TARGET asmpp PROPERTY CXX_STANDARD 11)
|
1208
sdk/tools/asmpp/asmpp.cpp
Normal file
1208
sdk/tools/asmpp/asmpp.cpp
Normal file
File diff suppressed because it is too large
Load Diff
31
sdk/tools/asmpp/asmpp.sln
Normal file
31
sdk/tools/asmpp/asmpp.sln
Normal file
@ -0,0 +1,31 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||
# Visual Studio Version 16
|
||||
VisualStudioVersion = 16.0.32510.428
|
||||
MinimumVisualStudioVersion = 10.0.40219.1
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "asmpp", "asmpp.vcxproj", "{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|x64 = Debug|x64
|
||||
Debug|x86 = Debug|x86
|
||||
Release|x64 = Release|x64
|
||||
Release|x86 = Release|x86
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Debug|x64.Build.0 = Debug|x64
|
||||
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Debug|x86.ActiveCfg = Debug|Win32
|
||||
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Debug|x86.Build.0 = Debug|Win32
|
||||
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Release|x64.ActiveCfg = Release|x64
|
||||
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Release|x64.Build.0 = Release|x64
|
||||
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Release|x86.ActiveCfg = Release|Win32
|
||||
{A1F7C9EE-4F70-43CD-A0BE-85D137B80583}.Release|x86.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||
SolutionGuid = {16936D9E-6E98-4126-8918-03218BC19061}
|
||||
EndGlobalSection
|
||||
EndGlobal
|
151
sdk/tools/asmpp/asmpp.vcxproj
Normal file
151
sdk/tools/asmpp/asmpp.vcxproj
Normal file
@ -0,0 +1,151 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<VCProjectVersion>16.0</VCProjectVersion>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<ProjectGuid>{a1f7c9ee-4f70-43cd-a0be-85d137b80583}</ProjectGuid>
|
||||
<RootNamespace>asmpp2</RootNamespace>
|
||||
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<PlatformToolset>v142</PlatformToolset>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="Shared">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
<LanguageStandard>stdcpp17</LanguageStandard>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<SDLCheck>true</SDLCheck>
|
||||
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<ConformanceMode>true</ConformanceMode>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="asmpp.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="tokenizer.hpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
249
sdk/tools/asmpp/tokenizer.hpp
Normal file
249
sdk/tools/asmpp/tokenizer.hpp
Normal file
@ -0,0 +1,249 @@
|
||||
/*
|
||||
* PROJECT: ReactOS host tools
|
||||
* LICENSE: MIT (https://spdx.org/licenses/MIT)
|
||||
* PURPOSE: Tokenizer class implementation
|
||||
* COPYRIGHT: Copyright 2021 Timo Kreuzer <timo.kreuzer@reactos.org>
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include <regex>
|
||||
#include <ctime>
|
||||
|
||||
// Uncomment this for easier debugging
|
||||
#if 0
|
||||
#define throw __debugbreak(); throw
|
||||
#endif
|
||||
|
||||
extern time_t search_time;
|
||||
|
||||
struct TOKEN_DEF
|
||||
{
|
||||
int Type;
|
||||
std::string RegExString;
|
||||
};
|
||||
|
||||
class Token
|
||||
{
|
||||
const std::string& m_text;
|
||||
unsigned int m_pos;
|
||||
unsigned int m_len;
|
||||
#if _DEBUG
|
||||
std::string m_dbgstr;
|
||||
#endif
|
||||
int m_type;
|
||||
|
||||
public:
|
||||
|
||||
Token(const std::string& text, size_t pos, size_t len, int type)
|
||||
: m_text(text),
|
||||
m_pos(static_cast<unsigned int>(pos)),
|
||||
m_len(static_cast<unsigned int>(len)),
|
||||
m_type(type)
|
||||
{
|
||||
#if _DEBUG
|
||||
m_dbgstr = str();
|
||||
#endif
|
||||
}
|
||||
|
||||
std::string str() const
|
||||
{
|
||||
return m_text.substr(m_pos, m_len);
|
||||
}
|
||||
|
||||
int type() const
|
||||
{
|
||||
return m_type;
|
||||
}
|
||||
};
|
||||
|
||||
struct Tokenizer
|
||||
{
|
||||
const std::vector<TOKEN_DEF> &m_tokendefs;
|
||||
const std::regex m_re;
|
||||
|
||||
typedef int myint;
|
||||
|
||||
static
|
||||
unsigned int
|
||||
count_captures(const std::string& exp)
|
||||
{
|
||||
bool in_char_group = false;
|
||||
unsigned int count = 0;
|
||||
|
||||
for (size_t i = 0; i < exp.size(); i++)
|
||||
{
|
||||
char c = exp[i];
|
||||
|
||||
// Skip escaped characters
|
||||
if (c == '\\')
|
||||
{
|
||||
i++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (in_char_group)
|
||||
{
|
||||
if (c == ']')
|
||||
{
|
||||
in_char_group = false;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '[')
|
||||
{
|
||||
in_char_group = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (c == '(')
|
||||
{
|
||||
if (exp[i + 1] != '?')
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static
|
||||
std::regex
|
||||
CompileMultiRegex(const std::vector<TOKEN_DEF> &tokendefs)
|
||||
{
|
||||
std::string combinedString;
|
||||
|
||||
if (tokendefs.size() == 0)
|
||||
{
|
||||
return std::regex();
|
||||
}
|
||||
|
||||
// Validate all token definitions
|
||||
for (auto def : tokendefs)
|
||||
{
|
||||
size_t found = -1;
|
||||
|
||||
// Count capture groups
|
||||
unsigned int count = count_captures(def.RegExString);
|
||||
if (count != 1)
|
||||
{
|
||||
throw "invalid count!\n";
|
||||
}
|
||||
}
|
||||
|
||||
// Combine all expressions into one (one capture group for each)
|
||||
combinedString = "(?:" + tokendefs[0].RegExString + ")";
|
||||
for (size_t i = 1; i < tokendefs.size(); i++)
|
||||
{
|
||||
combinedString += "|(?:" + tokendefs[i].RegExString + ")";
|
||||
}
|
||||
|
||||
return std::regex(combinedString, std::regex_constants::icase);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
struct TOKEN_REF
|
||||
{
|
||||
unsigned int pos;
|
||||
unsigned int len;
|
||||
int type;
|
||||
};
|
||||
|
||||
Tokenizer(std::vector<TOKEN_DEF> &tokendefs)
|
||||
: m_tokendefs(tokendefs),
|
||||
m_re(CompileMultiRegex(tokendefs))
|
||||
{
|
||||
}
|
||||
|
||||
TOKEN_REF match(std::smatch &matches, const std::string& str) const
|
||||
{
|
||||
return match(matches, str, 0);
|
||||
}
|
||||
|
||||
TOKEN_REF match(std::smatch &matches, const std::string &str, size_t startpos) const
|
||||
{
|
||||
const std::string::const_iterator first = str.cbegin() + startpos;
|
||||
const std::string::const_iterator last = str.cend();
|
||||
|
||||
// If we reached the end, there is nothing more to do
|
||||
if (first == last)
|
||||
{
|
||||
return TOKEN_REF{ static_cast<unsigned int>(startpos), 0, -1 };
|
||||
}
|
||||
|
||||
time_t start_time = time(NULL);
|
||||
|
||||
// Try to find a match
|
||||
if (!std::regex_search(first, last, matches, m_re))
|
||||
{
|
||||
throw "Failed to match\n";
|
||||
}
|
||||
|
||||
search_time += time(NULL) - start_time;
|
||||
|
||||
// Validate that it's at the start of the string
|
||||
if (matches.prefix().matched)
|
||||
{
|
||||
throw "Failed to match at current position!\n";
|
||||
}
|
||||
|
||||
// We have a match, check which one it is
|
||||
for (size_t i = 1; i < matches.size(); i++)
|
||||
{
|
||||
if (matches[i].matched)
|
||||
{
|
||||
unsigned int len = static_cast<unsigned int>(matches.length(i));
|
||||
int type = m_tokendefs[i - 1].Type;
|
||||
return TOKEN_REF{ static_cast<unsigned int>(startpos), len, type};
|
||||
}
|
||||
}
|
||||
|
||||
// We should never get here
|
||||
throw "Something went wrong!\n";
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class TokenList
|
||||
{
|
||||
using TOKEN_REF = typename Tokenizer::TOKEN_REF;
|
||||
|
||||
const Tokenizer& m_tokenizer;
|
||||
const std::string& m_text;
|
||||
std::vector<TOKEN_REF> m_tokens;
|
||||
|
||||
public:
|
||||
|
||||
TokenList(const Tokenizer& tokenizer, const std::string& text)
|
||||
: m_tokenizer(tokenizer),
|
||||
m_text(text)
|
||||
{
|
||||
size_t startpos = 0;
|
||||
size_t len = m_text.size();
|
||||
std::smatch matches;
|
||||
|
||||
m_tokens.reserve(len / 5);
|
||||
|
||||
while (startpos < len)
|
||||
{
|
||||
TOKEN_REF tref = m_tokenizer.match(matches, m_text, startpos);
|
||||
m_tokens.push_back(tref);
|
||||
startpos += tref.len;
|
||||
};
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return m_tokens.size();
|
||||
}
|
||||
|
||||
Token operator[](size_t n) const
|
||||
{
|
||||
return Token(m_text, m_tokens[n].pos, m_tokens[n].len, m_tokens[n].type);
|
||||
}
|
||||
|
||||
};
|
Loading…
Reference in New Issue
Block a user