Parser (Polyglot)¶

In [ ]:
#!import ../../lib/fsharp/Notebooks.dib
#!import ../../lib/fsharp/Testing.dib
In [ ]:
#!import ../../lib/fsharp/Common.fs
In [ ]:
open Common

TextInput¶

In [ ]:
type Position =
    {
        line : int
        column : int
    }
In [ ]:
let initialPos = { line = 0; column = 0 }
In [ ]:
let inline incrCol (pos : Position) =
    { pos with column = pos.column + 1 }
In [ ]:
let inline incrLine pos =
    { line = pos.line + 1; column = 0 }
In [ ]:
type InputState =
    {
        lines : string[]
        position : Position
    }
In [ ]:
let inline fromStr str =
    {
        lines =
            if str |> String.IsNullOrEmpty
            then [||]
            else str |> SpiralSm.split_string [| "\r\n"; "\n" |]
        position = initialPos
    }
In [ ]:
//// test

fromStr "" |> _assertEqual {
    lines = [||]
    position = { line = 0; column = 0 }
}
{ lines = [||]
  position = { line = 0
               column = 0 } }

In [ ]:
//// test

fromStr "Hello \n World" |> _assertEqual {
    lines = [| "Hello "; " World" |]
    position = { line = 0; column = 0 }
}
{ lines = [|"Hello "; " World"|]
  position = { line = 0
               column = 0 } }

In [ ]:
let inline currentLine inputState =
    let linePos = inputState.position.line
    if linePos < inputState.lines.Length
    then inputState.lines.[linePos]
    else "end of file"
In [ ]:
let inline nextChar input =
    let linePos = input.position.line
    let colPos = input.position.column

    if linePos >= input.lines.Length
    then input, None
    else
        let currentLine = currentLine input
        if colPos < currentLine.Length then
            let char = currentLine.[colPos]
            let newPos = incrCol input.position
            let newState = { input with position = newPos }
            newState, Some char
        else
            let char = '\n'
            let newPos = incrLine input.position
            let newState = { input with position = newPos }
            newState, Some char
In [ ]:
//// test

let newInput, charOpt = fromStr "Hello World" |> nextChar

newInput |> _assertEqual {
    lines = [| "Hello World" |]
    position = { line = 0; column = 1 }
}
charOpt |> _assertEqual (Some 'H')
{ lines = [|"Hello World"|]
  position = { line = 0
               column = 1 } }

Some 'H'

In [ ]:
//// test

let newInput, charOpt = fromStr "Hello\n\nWorld" |> nextChar

newInput |> _assertEqual {
    lines = [| "Hello"; ""; "World" |]
    position = { line = 0; column = 1 }
}
charOpt |> _assertEqual (Some 'H')
{ lines = [|"Hello"; ""; "World"|]
  position = { line = 0
               column = 1 } }

Some 'H'

Parser¶

In [ ]:
type Input = InputState
type ParserLabel = string
type ParserError = string

type ParserPosition =
    {
        currentLine : string
        line : int
        column : int
    }

type ParseResult<'a> =
    | Success of 'a
    | Failure of ParserLabel * ParserError * ParserPosition

type Parser<'a> =
    {
        label : ParserLabel
        parseFn : Input -> ParseResult<'a * Input>
    }
In [ ]:
let inline printResult result =
    match result with
    | Success (value, input) ->
        printfn $"%A{value}"
    | Failure (label, error, parserPos) ->
        let errorLine = parserPos.currentLine
        let colPos = parserPos.column
        let linePos = parserPos.line
        let failureCaret = $"{' ' |> string |> String.replicate colPos}^{error}"
        printfn $"Line:%i{linePos} Col:%i{colPos} Error parsing %s{label}\n%s{errorLine}\n%s{failureCaret}"
In [ ]:
let inline runOnInput parser input =
    parser.parseFn input
In [ ]:
let inline run parser inputStr =
    runOnInput parser (fromStr inputStr)
In [ ]:
let inline parserPositionFromInputState (inputState : Input) =
    {
        currentLine = currentLine inputState
        line = inputState.position.line
        column = inputState.position.column
    }
In [ ]:
let inline getLabel parser =
    parser.label
In [ ]:
let inline setLabel parser newLabel =
    {
        label = newLabel
        parseFn = fun input ->
            match parser.parseFn input with
            | Success s -> Success s
            | Failure (oldLabel, err, pos) -> Failure (newLabel, err, pos)
    }
In [ ]:
let (<?>) = setLabel
In [ ]:
let inline satisfy predicate label =
    {
        label = label
        parseFn = fun input ->
            let remainingInput, charOpt = nextChar input
            match charOpt with
            | None ->
                let err = "No more input"
                let pos = parserPositionFromInputState input
                Failure (label, err, pos)
            | Some first ->
                if predicate first
                then Success (first, remainingInput)
                else
                    let err = $"Unexpected '%c{first}'"
                    let pos = parserPositionFromInputState input
                    Failure (label, err, pos)
    }
In [ ]:
//// test

let input = fromStr "Hello"
let parser = satisfy (fun c -> c = 'H') "H"
runOnInput parser input |> _assertEqual (
    Success (
        'H',
        {
            lines = [| "Hello" |]
            position = { line = 0; column = 1 }
        }
    )
)
Success ('H', { lines = [|"Hello"|]
                position = { line = 0
                             column = 1 } })

In [ ]:
//// test

let input = fromStr "World"
let parser = satisfy (fun c -> c = 'H') "H"
runOnInput parser input |> _assertEqual (
    Failure (
        "H",
        "Unexpected 'W'",
        {
            currentLine = "World"
            line = 0
            column = 0
        }
    )
)
Failure ("H", "Unexpected 'W'", { currentLine = "World"
                                  line = 0
                                  column = 0 })

In [ ]:
let inline bindP f p =
    {
        label = "unknown"
        parseFn = fun input ->
            match runOnInput p input with
            | Failure (label, err, pos) -> Failure (label, err, pos)
            | Success (value1, remainingInput) -> runOnInput (f value1) remainingInput
    }
In [ ]:
let inline (>>=) p f = bindP f p
In [ ]:
//// test

let input = fromStr "Hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = parser >>= fun c -> satisfy (fun c -> c = 'e') "e"
runOnInput parser2 input |> _assertEqual (
    Success (
        'e',
        {
            lines = [| "Hello" |]
            position = { line = 0; column = 2 }
        }
    )
)
Success ('e', { lines = [|"Hello"|]
                position = { line = 0
                             column = 2 } })

In [ ]:
//// test

let input = fromStr "World"
let parser = satisfy (fun c -> c = 'W') "W"
let parser2 = parser >>= fun c -> satisfy (fun c -> c = 'e') "e"
runOnInput parser2 input |> _assertEqual (
    Failure (
        "e",
        "Unexpected 'o'",
        {
            currentLine = "World"
            line = 0
            column = 1
        }
    )
)
Failure ("e", "Unexpected 'o'", { currentLine = "World"
                                  line = 0
                                  column = 1 })

In [ ]:
let inline returnP x =
    {
        label = $"%A{x}"
        parseFn = fun input -> Success (x, input)
    }
In [ ]:
//// test

let input = fromStr "Hello"
let parser = returnP "Hello"
runOnInput parser input |> _assertEqual (
    Success (
        "Hello",
        {
            lines = [| "Hello" |]
            position = { line = 0; column = 0 }
        }
    )
)
Success ("Hello", { lines = [|"Hello"|]
                    position = { line = 0
                                 column = 0 } })

In [ ]:
let inline mapP f =
    bindP (f >> returnP)
In [ ]:
let (<!>) = mapP
In [ ]:
let inline (|>>) x f = f <!> x
In [ ]:
//// test

let input = fromStr "Hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = parser |>> string
runOnInput parser2 input |> _assertEqual (
    Success (
        "H",
        {
            lines = [| "Hello" |]
            position = { line = 0; column = 1 }
        }
    )
)
Success ("H", { lines = [|"Hello"|]
                position = { line = 0
                             column = 1 } })

In [ ]:
let inline applyP fP xP =
    fP >>=
        fun f ->
            xP >>=
                fun x ->
                    returnP (f x)
In [ ]:
let (<*>) = applyP
In [ ]:
let inline lift2 f xP yP =
    returnP f <*> xP <*> yP
In [ ]:
//// test

let input = fromStr "Hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = satisfy (fun c -> c = 'e') "e"
let parser3 = lift2 (fun c1 c2 -> string c1 + string c2) parser parser2
runOnInput parser3 input |> _assertEqual (
    Success (
        "He",
        {
            lines = [| "Hello" |]
            position = { line = 0; column = 2 }
        }
    )
)
Success ("He", { lines = [|"Hello"|]
                 position = { line = 0
                              column = 2 } })

In [ ]:
let inline andThen p1 p2 =
    p1 >>=
        fun p1Result ->
            p2 >>=
                fun p2Result ->
                    returnP (p1Result, p2Result)
    <?> $"{getLabel p1} andThen {getLabel p2}"
In [ ]:
let (.>>.) = andThen
In [ ]:
//// test

let input = fromStr "Hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = satisfy (fun c -> c = 'e') "e"
let parser3 = parser .>>. parser2
runOnInput parser3 input |> _assertEqual (
    Success (
        ('H', 'e'),
        {
            lines = [| "Hello" |]
            position = { line = 0; column = 2 }
        }
    )
)
Success (('H', 'e'), { lines = [|"Hello"|]
                       position = { line = 0
                                    column = 2 } })

In [ ]:
let inline orElse p1 p2 =
    {
        label = $"{getLabel p1} orElse {getLabel p2}"
        parseFn = fun input ->
            match runOnInput p1 input with
            | Success _ as result -> result
            | Failure _ -> runOnInput p2 input
    }
In [ ]:
let (<|>) = orElse
In [ ]:
//// test

let input = fromStr "hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = satisfy (fun c -> c = 'h') "h"
let parser3 = parser <|> parser2
runOnInput parser3 input |> _assertEqual (
    Success (
        'h',
        {
            lines = [| "hello" |]
            position = { line = 0; column = 1 }
        }
    )
)
Success ('h', { lines = [|"hello"|]
                position = { line = 0
                             column = 1 } })

In [ ]:
let inline choice listOfParsers =
    listOfParsers |> List.reduce (<|>)
In [ ]:
//// test

let input = fromStr "hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = satisfy (fun c -> c = 'h') "h"
let parser3 = choice [ parser; parser2 ]
runOnInput parser3 input |> _assertEqual (
    Success (
        'h',
        {
            lines = [| "hello" |]
            position = { line = 0; column = 1 }
        }
    )
)
Success ('h', { lines = [|"hello"|]
                position = { line = 0
                             column = 1 } })

In [ ]:
let rec sequence parserList =
    match parserList with
    | [] -> returnP []
    | head :: tail -> (lift2 cons) head (sequence tail)
In [ ]:
//// test

let input = fromStr "Hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = satisfy (fun c -> c = 'e') "e"
let parser3 = sequence [ parser; parser2 ]
runOnInput parser3 input |> _assertEqual (
    Success (
        [ 'H'; 'e' ],
        {
            lines = [| "Hello" |]
            position = { line = 0; column = 2 }
        }
    )
)
Success (['H'; 'e'], { lines = [|"Hello"|]
                       position = { line = 0
                                    column = 2 } })

In [ ]:
let rec parseZeroOrMore parser input =
    match runOnInput parser input with
    | Failure (_, _, _) ->
        [], input
    | Success (firstValue, inputAfterFirstParse) ->
        let subsequentValues, remainingInput = parseZeroOrMore parser inputAfterFirstParse
        firstValue :: subsequentValues, remainingInput
In [ ]:
let inline many parser =
    {
        label = $"many {getLabel parser}"
        parseFn = fun input -> Success (parseZeroOrMore parser input)
    }
In [ ]:
//// test

let input = fromStr "hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = many parser
runOnInput parser2 input |> _assertEqual (
    Success (
        [],
        {
            lines = [| "hello" |]
            position = { line = 0; column = 0 }
        }
    )
)
Success ([], { lines = [|"hello"|]
               position = { line = 0
                            column = 0 } })

In [ ]:
let inline many1 p =
    p >>=
        fun head ->
            many p >>=
                fun tail ->
                    returnP (head :: tail)
    <?> $"many1 {getLabel p}"
In [ ]:
//// test

let input = fromStr "hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = many1 parser
runOnInput parser2 input |> _assertEqual (
    Failure (
        "many1 H",
        "Unexpected 'h'",
        {
            currentLine = "hello"
            line = 0
            column = 0
        }
    )
)
Failure ("many1 H", "Unexpected 'h'", { currentLine = "hello"
                                        line = 0
                                        column = 0 })

In [ ]:
let inline opt p =
    let some = p |>> Some
    let none = returnP None
    (some <|> none)
    <?> $"opt {getLabel p}"
In [ ]:
//// test

let input = fromStr "hello"
let parser = satisfy (fun c -> c = 'H') "H"
let parser2 = opt parser
runOnInput parser2 input |> _assertEqual (
    Success (
        None,
        {
            lines = [| "hello" |]
            position = { line = 0; column = 0 }
        }
    )
)
Success (None, { lines = [|"hello"|]
                 position = { line = 0
                              column = 0 } })

In [ ]:
let inline (.>>) p1 p2 =
    p1 .>>. p2
    |> mapP fst
In [ ]:
let inline (>>.) p1 p2 =
    p1 .>>. p2
    |> mapP snd
In [ ]:
let inline between p1 p2 p3 =
    p1 >>. p2 .>> p3
In [ ]:
//// test

let input = fromStr "[Hello]"
let parser =
    between
        (satisfy (fun c -> c = '[') "[")
        (many (satisfy (fun c -> [ 'a' .. 'z' ] @ [ 'A' .. 'Z' ] |> List.contains c) "letter"))
        (satisfy (fun c -> c = ']') "]")
runOnInput parser input |> _assertEqual (
    Success (
        [ 'H'; 'e'; 'l'; 'l'; 'o' ],
        {
            lines = [| "[Hello]" |]
            position = { line = 0; column = 7 }
        }
    )
)
Success (['H'; 'e'; 'l'; 'l'; 'o'], { lines = [|"[Hello]"|]
                                      position = { line = 0
                                                   column = 7 } })

In [ ]:
let inline sepBy1 p sep =
    let sepThenP = sep >>. p
    p .>>. many sepThenP
    |>> fun (p, pList) -> p :: pList
In [ ]:
let inline sepBy p sep =
    sepBy1 p sep <|> returnP []
In [ ]:
//// test

let input = fromStr "Hello,World"
let parser = sepBy (many (satisfy (fun c -> c <> ',') "not comma")) (satisfy (fun c -> c = ',') "comma")
runOnInput parser input |> _assertEqual (
    Success (
        [ [ 'H'; 'e'; 'l'; 'l'; 'o' ]; [ 'W'; 'o'; 'r'; 'l'; 'd'; '\n' ] ],
        {
            lines = [| "Hello,World" |]
            position = { line = 1; column = 0 }
        }
    )
)
Success ([['H'; 'e'; 'l'; 'l'; 'o']; ['W'; 'o'; 'r'; 'l'; 'd'; '\010']], { lines = [|"Hello,World"|]
                                                                           position = { line = 1
                                                                                        column = 0 } })

In [ ]:
let inline pchar charToMatch =
    satisfy ((=) charToMatch) $"%c{charToMatch}"
In [ ]:
let inline anyOf listOfChars =
    listOfChars
    |> List.map pchar
    |> choice
    <?> $"anyOf %A{listOfChars}"
In [ ]:
//// test

let input = fromStr "Hello"
let parser = anyOf [ 'H'; 'e'; 'l'; 'o' ] |> many
runOnInput parser input |> _assertEqual (
    Success (
        [ 'H'; 'e'; 'l'; 'l'; 'o' ],
        {
            lines = [| "Hello" |]
            position = { line = 0; column = 5 }
        }
    )
)
Success (['H'; 'e'; 'l'; 'l'; 'o'], { lines = [|"Hello"|]
                                      position = { line = 0
                                                   column = 5 } })

In [ ]:
let inline charListToStr charList =
    charList |> List.toArray |> String
In [ ]:
let inline manyChars cp =
    many cp
    |>> charListToStr
In [ ]:
let inline manyChars1 cp =
    many1 cp
    |>> charListToStr
In [ ]:
//// test

let input = fromStr "Hello"
let parser = manyChars1 (anyOf [ 'H'; 'e'; 'l'; 'o' ])
runOnInput parser input |> _assertEqual (
    Success (
        "Hello",
        {
            lines = [| "Hello" |]
            position = { line = 0; column = 5 }
        }
    )
)
Success ("Hello", { lines = [|"Hello"|]
                    position = { line = 0
                                 column = 5 } })

In [ ]:
let inline pstring str =
    str
    |> List.ofSeq
    |> List.map pchar
    |> sequence
    |> mapP charListToStr
    <?> str
In [ ]:
//// test

let input = fromStr "Hello"
let parser = pstring "Hello"
runOnInput parser input |> _assertEqual (
    Success (
        "Hello",
        {
            lines = [| "Hello" |]
            position = { line = 0; column = 5 }
        }
    )
)
Success ("Hello", { lines = [|"Hello"|]
                    position = { line = 0
                                 column = 5 } })

In [ ]:
let whitespaceChar =
    satisfy Char.IsWhiteSpace "whitespace"
In [ ]:
let spaces = many whitespaceChar
In [ ]:
let spaces1 = many1 whitespaceChar
In [ ]:
//// test

let input = fromStr "  Hello"
let parser = spaces1 .>>. pstring "Hello"
runOnInput parser input |> _assertEqual (
    Success (
        ([ ' '; ' ' ], "Hello"),
        {
            lines = [| "  Hello" |]
            position = { line = 0; column = 7 }
        }
    )
)
Success (([' '; ' '], "Hello"), { lines = [|"  Hello"|]
                                  position = { line = 0
                                               column = 7 } })

In [ ]:
let digitChar =
    satisfy Char.IsDigit "digit"
In [ ]:
//// test

let input = fromStr "Hello"
let parser = digitChar
runOnInput parser input |> _assertEqual (
    Failure (
        "digit",
        "Unexpected 'H'",
        {
            currentLine = "Hello"
            line = 0
            column = 0
        }
    )
)
Failure ("digit", "Unexpected 'H'", { currentLine = "Hello"
                                      line = 0
                                      column = 0 })

In [ ]:
let pint =
    let inline resultToInt (sign, digits) =
        let i = int digits
        match sign with
        | Some ch -> -i
        | None -> i

    let digits = manyChars1 digitChar

    opt (pchar '-') .>>. digits
    |> mapP resultToInt
    <?> "integer"
In [ ]:
//// test

run pint "-123"
|> _assertEqual (
    Success (
        -123,
        {
            lines = [| "-123" |]
            position = { line = 0; column = 4 }
        }
    )
)
Success (-123, { lines = [|"-123"|]
                 position = { line = 0
                              column = 4 } })

In [ ]:
let pfloat =
    let inline resultToFloat (((sign, digits1), point), digits2) =
        let fl = float $"{digits1}.{digits2}"
        match sign with
        | Some ch -> -fl
        | None -> fl

    let digits = manyChars1 digitChar

    opt (pchar '-') .>>. digits .>>. pchar '.' .>>. digits
    |> mapP resultToFloat
    <?> "float"
In [ ]:
//// test

run pfloat "-123.45"
|> _assertEqual (
    Success (
        -123.45,
        {
            lines = [| "-123.45" |]
            position = { line = 0; column = 7 }
        }
    )
)
Success (-123.45, { lines = [|"-123.45"|]
                    position = { line = 0
                                 column = 7 } })

In [ ]:
let inline createParserForwardedToRef<'a> () =
    let mutable parserRef : Parser<'a> =
        {
            label = "unknown"
            parseFn = fun _ -> failwith "unfixed forwarded parser"
        }

    let wrapperParser =
        { parserRef with
            parseFn = fun input -> runOnInput parserRef input
        }

    wrapperParser, (fun v -> parserRef <- v)
In [ ]:
let inline (>>%) p x =
    p
    |>> fun _ -> x