gccrs: libgrust: Add format_parser library

Compile libformat_parser and link to it.

gcc/rust/ChangeLog:

	* Make-lang.in: Compile libformat_parser.
	* ast/rust-fmt.cc: New FFI definitions.
	* ast/rust-fmt.h: Likewise.
	* expand/rust-macro-builtins.cc (MacroBuiltin::format_args_handler): Call
	into libformat_parser.
	* expand/rust-macro-builtins.h: Define format_args!() handler proper.

libgrust/ChangeLog:

	* libformat_parser/Cargo.lock: New file.
	* libformat_parser/Cargo.toml: New file.
	* libformat_parser/generic_format_parser/Cargo.toml: New file.
	* libformat_parser/generic_format_parser/src/lib.rs: New file.
	* libformat_parser/src/bin.rs: New file.
	* libformat_parser/src/lib.rs: New file.
This commit is contained in:
Arthur Cohen 2024-04-23 13:38:58 +02:00
parent 473feb033d
commit 6fef4d6ffc
11 changed files with 1371 additions and 175 deletions

View File

@ -54,6 +54,8 @@ GCCRS_D_OBJS = \
rust/rustspec.o \
$(END)
LIBS += -ldl -lpthread
gccrs$(exeext): $(GCCRS_D_OBJS) $(EXTRA_GCC_OBJS) libcommon-target.a $(LIBDEPS)
+$(LINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \
$(GCCRS_D_OBJS) $(EXTRA_GCC_OBJS) libcommon-target.a \
@ -218,7 +220,7 @@ RUST_LIBDEPS = $(LIBDEPS) $(LIBPROC_MACRO_INTERNAL)
crab1$(exeext): $(RUST_ALL_OBJS) attribs.o $(BACKEND) $(RUST_LIBDEPS) $(rust.prev)
@$(call LINK_PROGRESS,$(INDEX.rust),start)
+$(LLINKER) $(ALL_LINKERFLAGS) $(LDFLAGS) -o $@ \
$(RUST_ALL_OBJS) attribs.o $(BACKEND) $(LIBS) $(LIBPROC_MACRO_INTERNAL) $(BACKENDLIBS)
$(RUST_ALL_OBJS) attribs.o $(BACKEND) $(LIBS) $(LIBPROC_MACRO_INTERNAL) rust/libformat_parser.a $(BACKENDLIBS)
@$(call LINK_PROGRESS,$(INDEX.rust),end)
# Build hooks.
@ -406,6 +408,13 @@ rust/%.o: rust/lex/%.cc
$(COMPILE) $(RUST_CXXFLAGS) $(RUST_INCLUDES) $<
$(POSTCOMPILE)
%.toml:
echo $@
rust/libformat_parser.a: $(srcdir)/../libgrust/libformat_parser/Cargo.toml $(wildcard $(srcdir)/../libgrust/libformat_parser/src/*.rs)
cargo build --manifest-path $(srcdir)/../libgrust/libformat_parser/Cargo.toml --release # FIXME: Not always release, right?
cp $(srcdir)/../libgrust/libformat_parser/target/release/liblibformat_parser.a $@
# build all rust/parse files in rust folder, add cross-folder includes
rust/%.o: rust/parse/%.cc
$(COMPILE) $(RUST_CXXFLAGS) $(RUST_INCLUDES) $<

View File

@ -19,78 +19,23 @@
#include "rust-fmt.h"
namespace Rust {
tl::expected<Fmt, Fmt::Error>
Fmt::parse_fmt_string (Fmt::Input input)
namespace Fmt {
Pieces
Pieces::collect (const std::string &to_parse)
{
return Fmt ();
}
tl::expected<Fmt::Result<tl::optional<Fmt::Format>>, Fmt::Error>
Fmt::maybe_format (Fmt::Input input)
{
tl::optional<Fmt::Format> none = tl::nullopt;
return Fmt::Result (input, none);
}
tl::expected<Fmt::Result<Fmt::Format>, Fmt::Error>
Fmt::format (Input input)
{
return Fmt::Result (input, Format ());
}
tl::expected<Fmt::Result<Fmt::Argument>, Fmt::Error>
Fmt::argument (Input input)
{
return Fmt::Result (input, Argument ());
}
tl::expected<Fmt::Result<Fmt::FormatSpec>, Fmt::Error>
Fmt::format_spec (Input input)
{
return Fmt::Result (input, FormatSpec ());
}
tl::expected<Fmt::Result<Fmt::Fill>, Fmt::Error>
Fmt::fill (Input input)
{
return Fmt::Result (input, Fill ());
}
tl::expected<Fmt::Result<Fmt::Align>, Fmt::Error>
Fmt::align (Input input)
{
switch (input[0])
{
case '<':
return Fmt::Result (input.substr (1), Align::Left);
case '^':
return Fmt::Result (input.substr (1), Align::Top);
case '>':
return Fmt::Result (input.substr (1), Align::Right);
default:
// TODO: Store the character here
// TODO: Can we have proper error locations?
// TODO: Maybe we should use a Rust::Literal string instead of a string
return tl::make_unexpected (Error::Align);
}
}
tl::expected<Fmt::Result<Fmt::Sign>, Fmt::Error>
Fmt::sign (Input input)
{
switch (input[0])
{
case '+':
return Fmt::Result (input.substr (1), Sign::Plus);
case '-':
return Fmt::Result (input.substr (1), Sign::Minus);
default:
// TODO: Store the character here
// TODO: Can we have proper error locations?
// TODO: Maybe we should use a Rust::Literal string instead of a string
return tl::make_unexpected (Error::Sign);
}
auto piece_slice = collect_pieces (to_parse.c_str ());
rust_debug ("[ARTHUR] %p, %lu", (void *) piece_slice.ptr, piece_slice.len);
// this performs multiple copies, can we avoid them maybe?
auto pieces
= std::vector (piece_slice.ptr, piece_slice.ptr + piece_slice.len);
rust_debug ("[ARTHUR] %p, %lu", (void *) pieces.data (), pieces.size ());
return Pieces{};
}
} // namespace Fmt
} // namespace Rust

View File

@ -19,115 +19,134 @@
#ifndef RUST_FMT_H
#define RUST_FMT_H
#include "expected.h"
#include "optional.h"
#include "rust-ast.h"
#include "rust-diagnostics.h"
#include "rust-system.h"
namespace Rust {
namespace Fmt {
/**
* This class implements the parsing of Rust format strings according to the
* grammar here: https://doc.rust-lang.org/std/fmt/index.html#syntax
*/
// TODO: Are there features that are only present in specific Rust editions?
class Fmt
struct RustHamster
{
public:
// TODO: Keep location information
// TODO: Switch to a Rust::AST::Literal here
using Input = std::string;
enum class Error
{
Align,
Sign,
};
template <typename T> class Result
{
public:
explicit Result (Input remaining_input, T result)
: remaining_input (remaining_input), result (result)
{}
private:
Input remaining_input;
T result;
};
// FIXME: Do not use an owned string here
static tl::expected<Fmt, Fmt::Error> parse_fmt_string (Input input);
private:
// the parse functions should return the remaining input as well as the
// expected node let's look at nom
// TODO: no string view :( use an owned string for now?
template <typename T> struct ParseResult
{
tl::expected<Result<T>, Error> inner;
ParseResult (tl::expected<Result<T>, Error> inner) : inner (inner) {}
ParseResult operator= (tl::expected<Result<T>, Error> inner)
{
return ParseResult (inner);
}
Input remaining_input () { return inner->remaining_input; }
T value () { return inner->value; }
};
struct Format
{
};
struct Argument
{
enum struct Kind
{
Integer,
Identifier,
} kind;
int integer;
Identifier identifier;
};
struct FormatSpec
{
};
struct Fill
{
char to_fill;
};
enum class Align
{
Left,
Top,
Right
};
enum class Sign
{
Plus,
Minus
};
// let's do one function per rule in the BNF
static tl::expected<Result<std::string>, Error> text (Input input);
static tl::expected<Result<tl::optional<Format>>, Error>
maybe_format (Input input);
static tl::expected<Result<Format>, Error> format (Input input);
static tl::expected<Result<Argument>, Error> argument (Input input);
static tl::expected<Result<FormatSpec>, Error> format_spec (Input input);
static tl::expected<Result<Fill>, Error> fill (Input input);
static tl::expected<Result<Align>, Error> align (Input input);
static tl::expected<Result<Sign>, Error> sign (Input input);
// hehe
};
struct InnerSpan
{
};
struct Count
{
enum class Kind
{
Is,
IsName,
IsParam,
IsStar,
Implied
} kind;
union
{
size_t is;
std::pair<RustHamster, InnerSpan> is_name;
size_t is_param;
size_t is_star;
} data;
};
struct DebugHex
{
};
struct Sign
{
};
struct Alignment
{
};
struct RustString
{
// hehe
};
struct Position
{
};
struct FormatSpec
{
/// Optionally specified character to fill alignment with.
tl::optional<char /* FIXME: This is a Rust char, not a C++ char - use an uint32_t instead? */> fill;
/// Span of the optionally specified fill character.
tl::optional<InnerSpan> fill_span;
/// Optionally specified alignment.
Alignment align;
/// The `+` or `-` flag.
tl::optional<Sign> sign;
/// The `#` flag.
bool alternate;
/// The `0` flag.
bool zero_pad;
/// The `x` or `X` flag. (Only for `Debug`.)
tl::optional<DebugHex> debug_hex;
/// The integer precision to use.
// Count <'a> precision;
/// The span of the precision formatting flag (for diagnostics).
tl::optional<InnerSpan> precision_span;
/// The string width requested for the resulting format.
// Count <'a> width;
/// The span of the width formatting flag (for diagnostics).
tl::optional<InnerSpan> width_span;
/// The descriptor string representing the name of the format desired for
/// this argument, this can be empty or any number of characters, although
/// it is required to be one word.
RustHamster ty;
// &'a str ty;
/// The span of the descriptor string (for diagnostics).
tl::optional<InnerSpan> ty_span;
};
struct Argument
{
Position position;
InnerSpan inner_span;
FormatSpec format;
};
struct Piece
{
enum class Kind
{
String,
NextArgument
} kind;
union
{
RustString string;
Argument *next_argument;
} data;
};
struct PieceSlice
{
Piece *ptr;
size_t len;
};
extern "C" {
PieceSlice
collect_pieces (const char *);
}
struct Pieces
{
static Pieces collect (const std::string &to_parse);
};
} // namespace Fmt
} // namespace Rust
#endif // ! RUST_FMT_H

View File

@ -30,6 +30,7 @@
#include "rust-parse.h"
#include "rust-session-manager.h"
#include "rust-attribute-values.h"
#include "rust-fmt.h"
namespace Rust {
@ -89,8 +90,8 @@ std::unordered_map<std::string, AST::MacroTranscriberFunc>
{"env", MacroBuiltin::env_handler},
{"cfg", MacroBuiltin::cfg_handler},
{"include", MacroBuiltin::include_handler},
{"format_args", MacroBuiltin::format_args_handler},
/* Unimplemented macro builtins */
{"format_args", MacroBuiltin::sorry},
{"option_env", MacroBuiltin::sorry},
{"format_args_nl", MacroBuiltin::sorry},
{"concat_idents", MacroBuiltin::sorry},
@ -942,6 +943,15 @@ MacroBuiltin::stringify_handler (location_t invoc_locus,
return AST::Fragment ({node}, std::move (token));
}
tl::optional<AST::Fragment>
MacroBuiltin::format_args_handler (location_t invoc_locus,
AST::MacroInvocData &invoc)
{
Fmt::Pieces::collect ("heyo this {is} what I {} want to {3}, {parse}");
return AST::Fragment::create_empty ();
}
tl::optional<AST::Fragment>
MacroBuiltin::sorry (location_t invoc_locus, AST::MacroInvocData &invoc)
{

View File

@ -157,6 +157,9 @@ public:
static tl::optional<AST::Fragment> line_handler (location_t invoc_locus,
AST::MacroInvocData &invoc);
static tl::optional<AST::Fragment>
format_args_handler (location_t invoc_locus, AST::MacroInvocData &invoc);
static tl::optional<AST::Fragment> sorry (location_t invoc_locus,
AST::MacroInvocData &invoc);

30
libgrust/libformat_parser/Cargo.lock generated Normal file
View File

@ -0,0 +1,30 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "generic_format_parser"
version = "0.1.0"
dependencies = [
"unicode-xid",
]
[[package]]
name = "libc"
version = "0.2.152"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7"
[[package]]
name = "libformat_parser"
version = "0.1.0"
dependencies = [
"generic_format_parser",
"libc",
]
[[package]]
name = "unicode-xid"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"

View File

@ -0,0 +1,21 @@
[package]
name = "libformat_parser"
version = "0.1.0"
edition = "2021"
[workspace]
members = [
"generic_format_parser",
]
[dependencies]
libc = "0.2"
generic_format_parser = { path = "generic_format_parser" }
[lib]
crate_type = ["staticlib", "rlib"]
[[bin]]
name = "format_parser_test"
path = "src/bin.rs"

View File

@ -0,0 +1,9 @@
[package]
name = "generic_format_parser"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
unicode-xid = "0.2.0"

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,7 @@
use libformat_parser::rust;
fn main() {
dbg!(rust::collect_pieces(
std::env::args().nth(1).unwrap().as_str()
));
}

View File

@ -0,0 +1,41 @@
//! FFI interface for `rustc_format_parser`
// what's the plan? Have a function return something that can be constructed into a vector?
// or an iterator?
use std::ffi::CStr;
// TODO: Use rustc's version here #3
use generic_format_parser::Piece;
// FIXME: Rename?
pub mod rust {
use generic_format_parser::{ParseMode, Parser, Piece};
pub fn collect_pieces(input: &str) -> Vec<Piece<'_>> {
// let parser = Parser::new();
let parser = Parser::new(input, None, None, true, ParseMode::Format);
parser.into_iter().collect()
}
}
#[repr(C)]
pub struct PieceSlice {
base_ptr: *const Piece<'static /* FIXME: That's wrong */>,
len: usize,
}
#[no_mangle]
pub extern "C" fn collect_pieces(input: *const libc::c_char) -> PieceSlice {
// FIXME: Add comment
let str = unsafe { CStr::from_ptr(input) };
// FIXME: No unwrap
let pieces = rust::collect_pieces(str.to_str().unwrap());
PieceSlice {
base_ptr: pieces.as_ptr(),
len: pieces.len(),
}
}