Parser (Polyglot)¶
In [ ]:
#!import ../../lib/fsharp/Notebooks.dib
#!import ../../lib/fsharp/Testing.dib
In [ ]:
#!import ../../lib/fsharp/Common.fs
In [ ]:
open Common
TextInput¶
In [ ]:
type Position =
{
line : int
column : int
}
In [ ]:
let initialPos = { line = 0; column = 0 }
In [ ]:
let inline incrCol (pos : Position) =
{ pos with column = pos.column + 1 }
In [ ]:
let inline incrLine pos =
{ line = pos.line + 1; column = 0 }
In [ ]:
type InputState =
{
lines : string[]
position : Position
}
In [ ]:
let inline fromStr str =
{
lines =
if str |> String.IsNullOrEmpty
then [||]
else str |> SpiralSm.split_string [| "\r\n"; "\n" |]
position = initialPos
}
In [ ]:
//// test
fromStr "" |> _assertEqual {
lines = [||]
position = { line = 0; column = 0 }
}
{ lines = [||] position = { line = 0 column = 0 } }
In [ ]:
//// test
fromStr "Hello \n World" |> _assertEqual {
lines = [| "Hello "; " World" |]
position = { line = 0; column = 0 }
}
{ lines = [|"Hello "; " World"|] position = { line = 0 column = 0 } }
In [ ]:
let inline currentLine inputState =
let linePos = inputState.position.line
if linePos < inputState.lines.Length
then inputState.lines.[linePos]
else "end of file"
In [ ]:
let inline nextChar input =
let linePos = input.position.line
let colPos = input.position.column
if linePos >= input.lines.Length
then input, None
else
let currentLine = currentLine input
if colPos < currentLine.Length then
let char = currentLine.[colPos]
let newPos = incrCol input.position
let newState = { input with position = newPos }
newState, Some char
else
let char = '\n'
let newPos = incrLine input.position
let newState = { input with position = newPos }
newState, Some char
In [ ]:
//// test
let newInput, charOpt = fromStr "Hello World" |> nextChar
newInput |> _assertEqual {
lines = [| "Hello World" |]
position = { line = 0; column = 1 }
}
charOpt |> _assertEqual (Some 'H')
{ lines = [|"Hello World"|] position = { line = 0 column = 1 } } Some 'H'
In [ ]:
//// test
let newInput, charOpt = fromStr "Hello\n\nWorld" |> nextChar
newInput |> _assertEqual {
lines = [| "Hello"; ""; "World" |]
position = { line = 0; column = 1 }
}
charOpt |> _assertEqual (Some 'H')
{ lines = [|"Hello"; ""; "World"|] position = { line = 0 column = 1 } } Some 'H'
Parser¶
In [ ]:
type Input = InputState
type ParserLabel = string
type ParserError = string
type ParserPosition =
{
currentLine : string
line : int
column : int
}
type ParseResult<'a> =
| Success of 'a
| Failure of ParserLabel * ParserError * ParserPosition
type Parser<'a> =
{
label : ParserLabel
parseFn : Input -> ParseResult<'a * Input>
}
In [ ]:
let inline printResult result =
match result with
| Success (value, input) ->
printfn $"%A{value}"
| Failure (label, error, parserPos) ->
let errorLine = parserPos.currentLine
let colPos = parserPos.column
let linePos = parserPos.line
let failureCaret = $"{' ' |> string |> String.replicate colPos}^{error}"
printfn $"Line:%i{linePos} Col:%i{colPos} Error parsing %s{label}\n%s{errorLine}\n%s{failureCaret}"
In [ ]:
let inline runOnInput parser input =
parser.parseFn input
In [ ]:
let inline run parser inputStr =
runOnInput parser (fromStr inputStr)
In [ ]:
let inline parserPositionFromInputState (inputState : Input) =
{
currentLine = currentLine inputState
line = inputState.position.line
column = inputState.position.column
}
In [ ]:
let inline getLabel parser =
parser.label
In [ ]:
let inline setLabel parser newLabel =
{
label = newLabel
parseFn = fun input ->
match parser.parseFn input with
| Success s -> Success s
| Failure (oldLabel, err, pos) -> Failure (newLabel, err, pos)
}
In [ ]:
let (<?>) = setLabel
In [ ]:
let inline satisfy predicate label =
{
label = label
parseFn = fun input ->
let remainingInput, charOpt = nextChar input
match charOpt with
| None ->
let err = "No more input"
let pos = parserPositionFromInputState input
Failure (label, err, pos)
| Some first ->
if predicate first
then Success (first, remainingInput)
else
let err = $"Unexpected '%c{first}'"
let pos = parserPositionFromInputState input
Failure (label, err, pos)
}
In [ ]:
//// test
let input = fromStr "Hello"
let parser = satisfy (fun c -> c = 'H') "H"
runOnInput parser input |> _assertEqual (
Success (
'H',
{
lines = [| "Hello" |]
position = { line = 0; column = 1 }
}
)
)
Success ('H', { lines = [|"Hello"|] position = { line = 0 column = 1 } })
In [ ]:
//// test
let input = fromStr "World"
let parser = satisfy (fun c -> c = 'H') "H"
runOnInput parser input |> _assertEqual (
Failure (
"H",
"Unexpected 'W'",
{
currentLine = "World"
line = 0
column = 0
}
)
)
Failure ("H", "Unexpected 'W'", { currentLine = "World" line = 0 column = 0 })
In [ ]:
let inline bindP f p =
{
label = "unknown"
parseFn = fun input ->
match runOnInput p input with
| Failure (label, err, pos) -> Failure (label, err, pos)
| Success (value1, remainingInput) -> runOnInput (f value1) remainingInput
}
In [ ]:
let inline (>>=) p f = bindP f p
In [ ]:
//// test
let input = fromStr "Hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = parser >>= fun c -> satisfy (fun c -> c = 'e') "e"
runOnInput parser2 input |> _assertEqual (
Success (
'e',
{
lines = [| "Hello" |]
position = { line = 0; column = 2 }
}
)
)
Success ('e', { lines = [|"Hello"|] position = { line = 0 column = 2 } })
In [ ]:
//// test
let input = fromStr "World"
let parser = satisfy (fun c -> c = 'W') "W"
let parser2 = parser >>= fun c -> satisfy (fun c -> c = 'e') "e"
runOnInput parser2 input |> _assertEqual (
Failure (
"e",
"Unexpected 'o'",
{
currentLine = "World"
line = 0
column = 1
}
)
)
Failure ("e", "Unexpected 'o'", { currentLine = "World" line = 0 column = 1 })
In [ ]:
let inline returnP x =
{
label = $"%A{x}"
parseFn = fun input -> Success (x, input)
}
In [ ]:
//// test
let input = fromStr "Hello"
let parser = returnP "Hello"
runOnInput parser input |> _assertEqual (
Success (
"Hello",
{
lines = [| "Hello" |]
position = { line = 0; column = 0 }
}
)
)
Success ("Hello", { lines = [|"Hello"|] position = { line = 0 column = 0 } })
In [ ]:
let inline mapP f =
bindP (f >> returnP)
In [ ]:
let (<!>) = mapP
In [ ]:
let inline (|>>) x f = f <!> x
In [ ]:
//// test
let input = fromStr "Hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = parser |>> string
runOnInput parser2 input |> _assertEqual (
Success (
"H",
{
lines = [| "Hello" |]
position = { line = 0; column = 1 }
}
)
)
Success ("H", { lines = [|"Hello"|] position = { line = 0 column = 1 } })
In [ ]:
let inline applyP fP xP =
fP >>=
fun f ->
xP >>=
fun x ->
returnP (f x)
In [ ]:
let (<*>) = applyP
In [ ]:
let inline lift2 f xP yP =
returnP f <*> xP <*> yP
In [ ]:
//// test
let input = fromStr "Hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = satisfy (fun c -> c = 'e') "e"
let parser3 = lift2 (fun c1 c2 -> string c1 + string c2) parser parser2
runOnInput parser3 input |> _assertEqual (
Success (
"He",
{
lines = [| "Hello" |]
position = { line = 0; column = 2 }
}
)
)
Success ("He", { lines = [|"Hello"|] position = { line = 0 column = 2 } })
In [ ]:
let inline andThen p1 p2 =
p1 >>=
fun p1Result ->
p2 >>=
fun p2Result ->
returnP (p1Result, p2Result)
<?> $"{getLabel p1} andThen {getLabel p2}"
In [ ]:
let (.>>.) = andThen
In [ ]:
//// test
let input = fromStr "Hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = satisfy (fun c -> c = 'e') "e"
let parser3 = parser .>>. parser2
runOnInput parser3 input |> _assertEqual (
Success (
('H', 'e'),
{
lines = [| "Hello" |]
position = { line = 0; column = 2 }
}
)
)
Success (('H', 'e'), { lines = [|"Hello"|] position = { line = 0 column = 2 } })
In [ ]:
let inline orElse p1 p2 =
{
label = $"{getLabel p1} orElse {getLabel p2}"
parseFn = fun input ->
match runOnInput p1 input with
| Success _ as result -> result
| Failure _ -> runOnInput p2 input
}
In [ ]:
let (<|>) = orElse
In [ ]:
//// test
let input = fromStr "hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = satisfy (fun c -> c = 'h') "h"
let parser3 = parser <|> parser2
runOnInput parser3 input |> _assertEqual (
Success (
'h',
{
lines = [| "hello" |]
position = { line = 0; column = 1 }
}
)
)
Success ('h', { lines = [|"hello"|] position = { line = 0 column = 1 } })
In [ ]:
let inline choice listOfParsers =
listOfParsers |> List.reduce (<|>)
In [ ]:
//// test
let input = fromStr "hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = satisfy (fun c -> c = 'h') "h"
let parser3 = choice [ parser; parser2 ]
runOnInput parser3 input |> _assertEqual (
Success (
'h',
{
lines = [| "hello" |]
position = { line = 0; column = 1 }
}
)
)
Success ('h', { lines = [|"hello"|] position = { line = 0 column = 1 } })
In [ ]:
let rec sequence parserList =
match parserList with
| [] -> returnP []
| head :: tail -> (lift2 cons) head (sequence tail)
In [ ]:
//// test
let input = fromStr "Hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = satisfy (fun c -> c = 'e') "e"
let parser3 = sequence [ parser; parser2 ]
runOnInput parser3 input |> _assertEqual (
Success (
[ 'H'; 'e' ],
{
lines = [| "Hello" |]
position = { line = 0; column = 2 }
}
)
)
Success (['H'; 'e'], { lines = [|"Hello"|] position = { line = 0 column = 2 } })
In [ ]:
let rec parseZeroOrMore parser input =
match runOnInput parser input with
| Failure (_, _, _) ->
[], input
| Success (firstValue, inputAfterFirstParse) ->
let subsequentValues, remainingInput = parseZeroOrMore parser inputAfterFirstParse
firstValue :: subsequentValues, remainingInput
In [ ]:
let inline many parser =
{
label = $"many {getLabel parser}"
parseFn = fun input -> Success (parseZeroOrMore parser input)
}
In [ ]:
//// test
let input = fromStr "hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = many parser
runOnInput parser2 input |> _assertEqual (
Success (
[],
{
lines = [| "hello" |]
position = { line = 0; column = 0 }
}
)
)
Success ([], { lines = [|"hello"|] position = { line = 0 column = 0 } })
In [ ]:
let inline many1 p =
p >>=
fun head ->
many p >>=
fun tail ->
returnP (head :: tail)
<?> $"many1 {getLabel p}"
In [ ]:
//// test
let input = fromStr "hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = many1 parser
runOnInput parser2 input |> _assertEqual (
Failure (
"many1 H",
"Unexpected 'h'",
{
currentLine = "hello"
line = 0
column = 0
}
)
)
Failure ("many1 H", "Unexpected 'h'", { currentLine = "hello" line = 0 column = 0 })
In [ ]:
let inline opt p =
let some = p |>> Some
let none = returnP None
(some <|> none)
<?> $"opt {getLabel p}"
In [ ]:
//// test
let input = fromStr "hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = opt parser
runOnInput parser2 input |> _assertEqual (
Success (
None,
{
lines = [| "hello" |]
position = { line = 0; column = 0 }
}
)
)
Success (None, { lines = [|"hello"|] position = { line = 0 column = 0 } })
In [ ]:
let inline (.>>) p1 p2 =
p1 .>>. p2
|> mapP fst
In [ ]:
let inline (>>.) p1 p2 =
p1 .>>. p2
|> mapP snd
In [ ]:
let inline between p1 p2 p3 =
p1 >>. p2 .>> p3
In [ ]:
//// test
let input = fromStr "[Hello]"
let parser =
between
(satisfy (fun c -> c = '[') "[")
(many (satisfy (fun c -> [ 'a' .. 'z' ] @ [ 'A' .. 'Z' ] |> List.contains c) "letter"))
(satisfy (fun c -> c = ']') "]")
runOnInput parser input |> _assertEqual (
Success (
[ 'H'; 'e'; 'l'; 'l'; 'o' ],
{
lines = [| "[Hello]" |]
position = { line = 0; column = 7 }
}
)
)
Success (['H'; 'e'; 'l'; 'l'; 'o'], { lines = [|"[Hello]"|] position = { line = 0 column = 7 } })
In [ ]:
let inline sepBy1 p sep =
let sepThenP = sep >>. p
p .>>. many sepThenP
|>> fun (p, pList) -> p :: pList
In [ ]:
let inline sepBy p sep =
sepBy1 p sep <|> returnP []
In [ ]:
//// test
let input = fromStr "Hello,World"
let parser = sepBy (many (satisfy (fun c -> c <> ',') "not comma")) (satisfy (fun c -> c = ',') "comma")
runOnInput parser input |> _assertEqual (
Success (
[ [ 'H'; 'e'; 'l'; 'l'; 'o' ]; [ 'W'; 'o'; 'r'; 'l'; 'd'; '\n' ] ],
{
lines = [| "Hello,World" |]
position = { line = 1; column = 0 }
}
)
)
Success ([['H'; 'e'; 'l'; 'l'; 'o']; ['W'; 'o'; 'r'; 'l'; 'd'; '\010']], { lines = [|"Hello,World"|] position = { line = 1 column = 0 } })
In [ ]:
let inline pchar charToMatch =
satisfy ((=) charToMatch) $"%c{charToMatch}"
In [ ]:
let inline anyOf listOfChars =
listOfChars
|> List.map pchar
|> choice
<?> $"anyOf %A{listOfChars}"
In [ ]:
//// test
let input = fromStr "Hello"
let parser = anyOf [ 'H'; 'e'; 'l'; 'o' ] |> many
runOnInput parser input |> _assertEqual (
Success (
[ 'H'; 'e'; 'l'; 'l'; 'o' ],
{
lines = [| "Hello" |]
position = { line = 0; column = 5 }
}
)
)
Success (['H'; 'e'; 'l'; 'l'; 'o'], { lines = [|"Hello"|] position = { line = 0 column = 5 } })
In [ ]:
let inline charListToStr charList =
charList |> List.toArray |> String
In [ ]:
let inline manyChars cp =
many cp
|>> charListToStr
In [ ]:
let inline manyChars1 cp =
many1 cp
|>> charListToStr
In [ ]:
//// test
let input = fromStr "Hello"
let parser = manyChars1 (anyOf [ 'H'; 'e'; 'l'; 'o' ])
runOnInput parser input |> _assertEqual (
Success (
"Hello",
{
lines = [| "Hello" |]
position = { line = 0; column = 5 }
}
)
)
Success ("Hello", { lines = [|"Hello"|] position = { line = 0 column = 5 } })
In [ ]:
let inline pstring str =
str
|> List.ofSeq
|> List.map pchar
|> sequence
|> mapP charListToStr
<?> str
In [ ]:
//// test
let input = fromStr "Hello"
let parser = pstring "Hello"
runOnInput parser input |> _assertEqual (
Success (
"Hello",
{
lines = [| "Hello" |]
position = { line = 0; column = 5 }
}
)
)
Success ("Hello", { lines = [|"Hello"|] position = { line = 0 column = 5 } })
In [ ]:
let whitespaceChar =
satisfy Char.IsWhiteSpace "whitespace"
In [ ]:
let spaces = many whitespaceChar
In [ ]:
let spaces1 = many1 whitespaceChar
In [ ]:
//// test
let input = fromStr " Hello"
let parser = spaces1 .>>. pstring "Hello"
runOnInput parser input |> _assertEqual (
Success (
([ ' '; ' ' ], "Hello"),
{
lines = [| " Hello" |]
position = { line = 0; column = 7 }
}
)
)
Success (([' '; ' '], "Hello"), { lines = [|" Hello"|] position = { line = 0 column = 7 } })
In [ ]:
let digitChar =
satisfy Char.IsDigit "digit"
In [ ]:
//// test
let input = fromStr "Hello"
let parser = digitChar
runOnInput parser input |> _assertEqual (
Failure (
"digit",
"Unexpected 'H'",
{
currentLine = "Hello"
line = 0
column = 0
}
)
)
Failure ("digit", "Unexpected 'H'", { currentLine = "Hello" line = 0 column = 0 })
In [ ]:
let pint =
let inline resultToInt (sign, digits) =
let i = int digits
match sign with
| Some ch -> -i
| None -> i
let digits = manyChars1 digitChar
opt (pchar '-') .>>. digits
|> mapP resultToInt
<?> "integer"
In [ ]:
//// test
run pint "-123"
|> _assertEqual (
Success (
-123,
{
lines = [| "-123" |]
position = { line = 0; column = 4 }
}
)
)
Success (-123, { lines = [|"-123"|] position = { line = 0 column = 4 } })
In [ ]:
let pfloat =
let inline resultToFloat (((sign, digits1), point), digits2) =
let fl = float $"{digits1}.{digits2}"
match sign with
| Some ch -> -fl
| None -> fl
let digits = manyChars1 digitChar
opt (pchar '-') .>>. digits .>>. pchar '.' .>>. digits
|> mapP resultToFloat
<?> "float"
In [ ]:
//// test
run pfloat "-123.45"
|> _assertEqual (
Success (
-123.45,
{
lines = [| "-123.45" |]
position = { line = 0; column = 7 }
}
)
)
Success (-123.45, { lines = [|"-123.45"|] position = { line = 0 column = 7 } })
In [ ]:
let inline createParserForwardedToRef<'a> () =
let mutable parserRef : Parser<'a> =
{
label = "unknown"
parseFn = fun _ -> failwith "unfixed forwarded parser"
}
let wrapperParser =
{ parserRef with
parseFn = fun input -> runOnInput parserRef input
}
wrapperParser, (fun v -> parserRef <- v)
In [ ]:
let inline (>>%) p x =
p
|>> fun _ -> x