Parser combinators
Monads
This Parser
system is monadic. A
Monad
is a type for which we have implemented a special
composition rule, known as bind
(aliased to
>>
operator), as well as a method to bring values
into the monad, pure
. One example is the Maybe
monad.
module MonadMaybe
using ..Monads
import ..Monads: bind, pure
struct Maybe{T} <: Monad
::Union{Some{T}, Nothing}
valueend
bind(x::Maybe{T}, f::F) where {T, F} =
=== nothing ? nothing : f(something(x.value))
x.value
pure(::Type{Maybe}, value::T) where {T} =
Maybe{T}(Some(value))
end
module Monads
export Monad, bind, pure, fmap, starmap, value_type
abstract type Monad end
function bind end
function pure end
function fmap end
Base.:>>(m::M, f::F) where {M <: Monad, F} = bind(m, f)
pure(::Type{M}) where {M <: Monad} = (x -> pure(M, x))
fmap(::Type{M}, f::F) where {M <: Monad, F} = pure(M) ∘ f
fmap(f::F, m::M) where {M <: Monad, F} = m >> fmap(M, f)
starmap(::Type{M}, f::F) where {F, M <: Monad} = pure(M) ∘ splat(f)
end
Parsers
@testset "Parsing" begin
using AOC2024.Parsing
<<parsing-spec>>
end
A Parser
is a Monad
. The parse
function takes a Parser
and a string, returning a tuple of
a result and another string (the rest of the text that needs to be
parsed). We have a generic FnParser
that should cover all
bases using closures. However, we’ll have some specialized parsers that
are both inspectable and should allow optimizations.
module Parsing
using ..Monads
import ..Monads: bind, pure
import Base: parse
export Parser, pure, bind, result_type, result
abstract type Parser <: Monad end
# function parse end
value_type(::Type{Tuple{V, S}}) where {S <: AbstractString, V} = V
value_type(::Type{Union{A, B}}) where {A, B} = Union{value_type(A), value_type(B)}
value_type(::Type{Any}) = Any
function result_type(::Type{P}) where {P <: Parser}
= Base.return_types(parse, [P, String])
t
if length(t) == 1
value_type(t[1])
else
Any
end
end
result_type(::P) where {P <: Parser} = result_type(P)
result(x::Tuple{T, U}) where {T, U} = x[1]
mutable struct FnParser{F} <: Parser
::F
fnend
function parse(p::FnParser{F}, s::S) where {F, S <: AbstractString}
return p.fn(s)
end
const item_p = function(inp::S) where {S <: AbstractString}
isempty(inp) && throw(Fail("End of input"))
1], inp[2:end])
(inp[end |> FnParser
export item_p
<<parsing>>
end
The pure
parser, doesn’t consume input, but returns a
given value.
export pure_p
struct PureParser{T} <: Parser
::T
valueend
function parse(p::PureParser{T}, s::S) where {T, S <: AbstractString}
return (p.value, s)
end
function pure(::Type{P}, value::T) where {P <: Parser, T}
return PureParser{T}(value)
end
pure_p(value::T) where {T} = pure(Parser, value)
Base.convert(::Type{PureParser{Nothing}}, ::Nothing) = pure(nothing)
@test parse(pure_p(3), "abc") == (3, "abc")
@test parse(pure_p(nothing), "abc") == (nothing, "abc")
Parser Monad
A Monad
should have pure
and
bind
implemented.
function bind(p::P, f::F) where {P <: Parser, F}
function (inp::S) where {S <: AbstractString}
= parse(p, inp)
(x, next_inp) parse(f(x), next_inp)
end |> FnParser
end
@test parse(pure_p(1) >> (x -> pure_p(x + 1)), "abc") == (2, "abc")
Alternatives
abstract type Fail <: Exception end
struct FailMsg <: Fail
::String
msgend
struct Expected <: Fail
::AbstractString
what::AbstractString
gotend
struct ChoiceFail <: Fail
::Vector{Fail}
failsChoiceFail(f1::ChoiceFail, f2::ChoiceFail) = new([f1.fails; f2.fails])
ChoiceFail(f1::Fail, f2::ChoiceFail) = new([f1, f2.fails...])
ChoiceFail(f1::ChoiceFail, f2::Fail) = new([f1.fails..., f2])
ChoiceFail(f1::Fail, f2::Fail) = new([f1, f2])
end
struct Choice{A, B} <: Parser
::A
a::B
bend
function parse(p::Choice{A, B}, s::S) where {S <: AbstractString, A <: Parser, B <: Parser}
tryparse(p.a, s)
catch e1
tryparse(p.b, s)
catch e2throw(ChoiceFail(e1, e2))
end
end
end
Base.:|(p1::A, p2::B) where {A <: Parser, B <: Parser} =
Choice{A, B}(p1, p2)
struct ChoiceMany{P} <: Parser
::Vector{P}
psend
function parse(p::ChoiceMany{P}, s::S) where {P <: Parser, S <: AbstractString}
for q in p.ps
try
= parse(q, s)
(r, s) return (r, s)
catch e
continue
end
end
throw(Fail())
end
optional(p::P) where {P <: Parser} = p | nothing
Base.:~(p::P) where {P <: Parser} = optional(p)
@test parse(many(token(integer_p | match_p("abc"))), "123 abc 4 5 abc def") ==
123, "abc", 4, 5, "abc"], "def") ([
Combinators
Base.:>>>(a::A, b::B) where {A <: Parser, B <: Parser} = a >> (_ -> b)
Base.skip(p::P) where {P <: Parser} = v -> (p >>> pure_p(v))
struct SequenceParser{P} <: Parser
::P
psend
sequence(ps...) = SequenceParser(ps)
export sequence
@generated function parse(p::SequenceParser{P}, s::S) where {P, S <: AbstractString}
= length(P.types)
n :(begin
$((:(($(Symbol("a$i")), s) = parse(p.ps[$i], s)) for i = 1:n)...)
return (($((Symbol("a$i") for i = 1:n)...),), s)
end)
end
function many(p::P) where {P <: Parser}
= result_type(P)
RT function (s::S) where {S <: AbstractString}
= RT[]
result while true
try
= parse(p, s)
(x, s) push!(result, x)
catch
return (result, s)
end
end
end |> FnParser
end
sep_by_p(p::A, sep::B) where {A <: Parser, B <: Parser} =
sequence(p, many(sep >>> p)) >> starmap(Parser, (h, t) -> pushfirst!(t, h))
export many, sep_by_p
String literals and regexes
export match_p
struct RegexParser <: Parser
::Regex
reend
match_p(re::Regex) = RegexParser(re)
Base.convert(::Type{RegexParser}, re::Regex) = RegexParser(re)
function parse(p::RegexParser, s::S) where {S <: AbstractString}
if !startswith(s, p.re)
throw(Expected("$(p.re)", s))
end
= match(p.re, s)
m return (m, s[length(m.match)+1:end])
end
struct LiteralParser <: Parser
::String
litend
match_p(s::AbstractString) = LiteralParser(s)
function parse(p::LiteralParser, s::S) where {S <: AbstractString}
if !startswith(s, p.lit)
throw(Expected("$(p.lit)", s))
end
return (p.lit, s[length(p.lit)+1:end])
end
parse(p::Parser) = function(s::AbstractString)
parse(p, s)
end
Convenience parsers
token(p::A, space::B = RegexParser(r"\s*")) where {A <: Parser, B <: Parser} =
>> skip(space)
p
= fmap(x -> parse(Int, x.match), match_p(r"-?[1-9][0-9]*|0"))
integer_p
= token(integer_p)
integer
export token, integer_p, integer
@test parse(match_p("hello"), "hellogoodbye") == ("hello", "goodbye")
= match_p("a") >>> match_p("b")
p @test parse(p, "abc") == ("b", "c")
= sequence(match_p("a"), match_p("b"))
p @test parse(p, "abc") == (("a", "b"), "c")
#p = sequence(n=match_p("a"), m=match_p("b"))
#@test parse(p, "abc") == (Dict(:n => "a", :m => "b"), "c")
@test parse(integer_p, "123abc") == (123, "abc")
= many(integer)
p @test parse(p, "1 2 3 4 56 7 abc") == ([1, 2, 3, 4, 56, 7], "abc")
Many somethings
I implemented this parser but ended up not needing it.
union_types(u) = (u,)
union_types(u::Union) = (u.a, union_types(u.b)...)
function many_somethings(p::P) where {P <: Parser}
= Set(union_types(result_type(P)))
types @assert Nothing ∈ types
= collect(setdiff(types, [Nothing]))
value_types @assert all(value_types .<: Some)
if length(value_types) == 1
= value_types[1].types[1]
RT else
= [t.types[1] for t in value_types]
ts = Union{ts...}
RT end
function (s::S) where {S <: AbstractString}
= RT[]
result while true
try
= parse(p, s)
(x, s) === nothing && continue
x push!(result, something(x))
catch
return (result, s)
end
end
end |> FnParser
end