理解 Monads 来自实践

本主题适用于中级到高级 F#开发人员

“什么是 Monads?” 是一个常见的问题。这很容易回答, 但就像 Hitchhikers 指导银河系一样,我们意识到我们不理解答案,因为我们不知道我们要求的是什么。

许多人认为理解 Monads 的方法是练习它们。作为程序员,我们通常不关心 Liskov 的替换原则,子类型或子类的数学基础。通过使用这些想法,我们获得了他们所代表的直觉。Monads 也是如此。

为了帮助你开始使用 Monads,此示例演示了如何构建 Monadic Parser Combinator 库。这可能有助于你入门,但理解将来自编写你自己的 Monadic 库。

足够的散文,代码的时间

解析器类型:

// A Parser<'T> is a function that takes a string and position
//  and returns an optionally parsed value and a position
//  A parsed value means the position points to the character following the parsed value
//  No parsed value indicates a parse failure at the position
type Parser<'T> = Parser of (string*int -> 'T option*int)

使用 Parser 的这个定义,我们定义了一些基本的解析器函数

// Runs a parser 't' on the input string 's'
let run t s =
  let (Parser tps) = t
  tps (s, 0)

// Different ways to create parser result
let succeedWith v p = Some v, p
let failAt        p = None  , p

// The 'satisfy' parser succeeds if the character at the current position 
//  passes the 'sat' function
let satisfy sat : Parser<char> = Parser <| fun (s, p) ->
  if p < s.Length && sat s.[p] then succeedWith s.[p] (p + 1)
  else failAt p

// 'eos' succeeds if the position is beyond the last character.
//  Useful when testing if the parser have consumed the full string
let eos : Parser<unit> = Parser <| fun (s, p) ->
  if p < s.Length then failAt p
  else succeedWith () p

let anyChar       = satisfy (fun _ -> true)
let char ch       = satisfy ((=) ch)
let digit         = satisfy System.Char.IsDigit
let letter        = satisfy System.Char.IsLetter

satisfy 是一个函数,如果我们没有通过 EOS 并且当前位置的字符通过 sat 函数,则给定 sat 函数会生成一个成功的解析器。使用 satisfy,我们创建了许多有用的字符解析器。

在 FSI 中运行:

> run digit "";;
val it : char option * int = (null, 0)
> run digit "123";;
val it : char option * int = (Some '1', 1)
> run digit "hello";;
val it : char option * int = (null, 0)

我们有一些基本的解析器。我们将使用解析器组合函数将它们组合成更强大的解析器

// 'fail' is a parser that always fails
let fail<'T>      = Parser <| fun (s, p) -> failAt p
// 'return_' is a parser that always succeed with value 'v'
let return_ v     = Parser <| fun (s, p) -> succeedWith v p

// 'bind' let us combine two parser into a more complex parser
let bind t uf     = Parser <| fun (s, p) ->
  let (Parser tps) = t
  let tov, tp = tps (s, p)
  match tov with
  | None    -> None, tp
  | Some tv ->
    let u = uf tv
    let (Parser ups) = u
    ups (s, tp)

名称和签名不是任意选择的, 但我们不会深思熟虑,而是让我们看看我们如何使用 bind 将解析器组合成更复杂的解析器:

> run (bind digit (fun v -> digit)) "123";;
val it : char option * int = (Some '2', 2)
> run (bind digit (fun v -> bind digit (fun u -> return_ (v,u)))) "123";;
val it : (char * char) option * int = (Some ('1', '2'), 2)
> run (bind digit (fun v -> bind digit (fun u -> return_ (v,u)))) "1";;
val it : (char * char) option * int = (null, 1)

这告诉我们的是 bind 允许我们将两个解析器组合成一个更复杂的解析器。由于 bind 的结果是一个解析器,反过来可以再次组合。

> run (bind digit (fun v -> bind digit (fun w -> bind digit (fun u -> return_ (v,w,u))))) "123";;
val it : (char * char * char) option * int = (Some ('1', '2', '3'), 3)

虽然我们将定义辅助函数来简化语法,但 bind 将是我们组合解析器的基本方法。

可以简化语法的一个方面是计算表达式 。它们很容易定义:

type ParserBuilder() =
  member x.Bind       (t, uf) = bind      t   uf
  member x.Return     v       = return_   v
  member x.ReturnFrom t       = t

// 'parser' enables us to combine parsers using 'parser { ... }' syntax
let parser = ParserBuilder()

FSI

let p = parser {
          let! v = digit
          let! u = digit
          return v,u
        }
run p "123"
val p : Parser<char * char> = Parser <fun:bind@49-1>
val it : (char * char) option * int = (Some ('1', '2'), 2)

这相当于:

> let p = bind digit (fun v -> bind digit (fun u -> return_ (v,u)))
run p "123";;
val p : Parser<char * char> = Parser <fun:bind@49-1>
val it : (char * char) option * int = (Some ('1', '2'), 2)

我们将要使用的另一个基本的解析器组合器是 orElse

// 'orElse' creates a parser that runs parser 't' first, if that is successful
//  the result is returned otherwise the result of parser 'u' is returned
let orElse t u    = Parser <| fun (s, p) ->
  let (Parser tps) = t
  let tov, tp = tps (s, p)
  match tov with
  | None    -> 
    let (Parser ups) = u
    ups (s, p)
  | Some tv -> succeedWith tv tp

这允许我们像这样定义 letterOrDigit

> let letterOrDigit = orElse letter digit;;
val letterOrDigit : Parser<char> = Parser <fun:orElse@70-1>
> run letterOrDigit "123";;
val it : char option * int = (Some '1', 1)
> run letterOrDigit "hello";;
val it : char option * int = (Some 'h', 1)
> run letterOrDigit "!!!";;
val it : char option * int = (null, 0)

关于 Infix 运算符的说明

对 FP 的一个共同关注点是使用不常用的中缀运算符,如 >>=>=><- 等。然而,大多数人并不关心使用+-*/%,这些都是众所周知的运算符,用于组成值。然而,FP 中的一个重要部分不仅是组成值,还包括函数。对于中级 FP 开发者来说,中缀运算符 >>=>=><- 是众所周知的,应该具有特定的签名和语义。

对于我们到目前为止定义的函数,我们将定义以下用于组合解析器的中缀运算符:

let (>>=)   t   uf  = bind t uf
let (<|>)   t   u   = orElse t u

所以 >>= 意味着 bind<|> 意味着 orElse

这允许我们组合解析器更简洁:

let letterOrDigit = letter <|> digit
let p = digit >>= fun v -> digit >>= fun u -> return_ (v,u)

为了定义一些高级解析器组合器,它们将允许我们解析更复杂的表达式,我们定义了一些更简单的解析器组合器:

// 'map' runs parser 't' and maps the result using 'm'
let map m t       = t >>= (m >> return_)
let (>>!) t m     = map m t
let (>>%) t v     = t >>! (fun _ -> v)

// 'opt' takes a parser 't' and creates a parser that always succeed but
//  if parser 't' fails the new parser will produce the value 'None'
let opt t         = (t >>! Some) <|> (return_ None)

// 'pair' runs parser 't' and 'u' and returns a pair of 't' and 'u' results
let pair t u      = 
  parser {
    let! tv = t
    let! tu = u
    return tv, tu
  }

我们已经准备好定义 manysepBy,它们在应用输入解析器时会更加高级,直到它们失败。然后 manysepBy 返回聚合结果:

// 'many' applies parser 't' until it fails and returns all successful
//  parser results as a list
let many t =
  let ot = opt t
  let rec loop vs = ot >>= function Some v -> loop (v::vs) | None -> return_ (List.rev vs)
  loop []

// 'sepBy' applies parser 't' separated by 'sep'. 
//  The values are reduced with the function 'sep' returns
let sepBy t sep     =
  let ots = opt (pair sep t)
  let rec loop v = ots >>= function Some (s, n) -> loop (s v n) | None -> return_ v
  t >>= loop

创建一个简单的表达式解析器

使用我们创建的工具,我们现在可以为像 1+2*3 这样的简单表达式定义解析器

我们从底部开始,为整数 pint 定义一个解析器

// 'pint' parses an integer
let pint = 
  let f s v = 10*s + int v - int '0'
  parser {
    let! digits = many digit
    return! 
      match digits with
      | [] -> fail
      | vs -> return_ (List.fold f 0 vs)
  }

我们尝试尽可能多地解析数字,结果是 char list。如果列表为空,我们 fail,否则我们将字符折叠成整数。

在 FSI 中测试 pint

> run pint "123";;
val it : int option * int = (Some 123, 3)

另外,我们需要解析用于组合整数值的不同类型的运算符:

// operator parsers, note that the parser result is the operator function 
let padd      = char '+' >>% (+)
let psubtract = char '-' >>% (-)
let pmultiply = char '*' >>% (*)
let pdivide   = char '/' >>% (/)
let pmodulus  = char '%' >>% (%)

FSI:

> run padd "+";;
val it : (int -> int -> int) option * int = (Some <fun:padd@121-1>, 1)

把它们绑定在一起:

// 'pmullike' parsers integers separated by operators with same precedence as multiply
let pmullike  = sepBy pint (pmultiply <|> pdivide <|> pmodulus)
// 'paddlike' parsers sub expressions separated by operators with same precedence as add
let paddlike  = sepBy pmullike (padd <|> psubtract)
// 'pexpr' is the full expression
let pexpr     =
  parser {
    let! v = paddlike
    let! _ = eos      // To make sure the full string is consumed
    return v
  }

在 FSI 中运行它:

> run pexpr "2+123*2-3";;
val it : int option * int = (Some 245, 9)

结论

通过定义 Parser<'T>return_bind 并确保它们遵守 monadic 定律, 我们构建了一个简单但强大的 Monadic Parser Combinator 框架。

Monads 和 Parsers 一起使用,因为 Parsers 在解析器状态下执行。Monads 允许我们在隐藏解析器状态的同时组合解析器,从而减少混乱并提高可组合性。

我们创建的框架很慢并且不会产生任何错误消息,这是为了保持代码简洁。 FParsec 提供可接受的性能以及出色的错误消息。

然而,仅凭一个例子无法理解 Monads。一个人必须练习 Monads。

以下是你可以尝试实施的 Monads 的一些示例,以达到你的理解:

  1. State Monad - 允许隐式环境状态被隐式携带
  2. Tracer Monad - 允许隐式携带跟踪状态。State Monad 的变种
  3. Turtle Monad - 用于创建 Turtle(Logos) 程序的 Monad。State Monad 的变种
  4. 继续 Monad - 一个协程 Monad。这方面的一个例子是 F#中的 async

要学习的最好方法是在你熟悉的域中提出 Monads 的应用程序。对我来说,这是解析器。

完整源代码:

// A Parser<'T> is a function that takes a string and position
//  and returns an optionally parsed value and a position
//  A parsed value means the position points to the character following the parsed value
//  No parsed value indicates a parse failure at the position
type Parser<'T> = Parser of (string*int -> 'T option*int)

// Runs a parser 't' on the input string 's'
let run t s =
  let (Parser tps) = t
  tps (s, 0)

// Different ways to create parser result
let succeedWith v p = Some v, p
let failAt        p = None  , p

// The 'satisfy' parser succeeds if the character at the current position 
//  passes the 'sat' function
let satisfy sat : Parser<char> = Parser <| fun (s, p) ->
  if p < s.Length && sat s.[p] then succeedWith s.[p] (p + 1)
  else failAt p

// 'eos' succeeds if the position is beyond the last character.
//  Useful when testing if the parser have consumed the full string
let eos : Parser<unit> = Parser <| fun (s, p) ->
  if p < s.Length then failAt p
  else succeedWith () p

let anyChar       = satisfy (fun _ -> true)
let char ch       = satisfy ((=) ch)
let digit         = satisfy System.Char.IsDigit
let letter        = satisfy System.Char.IsLetter

// 'fail' is a parser that always fails
let fail<'T>      = Parser <| fun (s, p) -> failAt p
// 'return_' is a parser that always succeed with value 'v'
let return_ v     = Parser <| fun (s, p) -> succeedWith v p

// 'bind' let us combine two parser into a more complex parser
let bind t uf     = Parser <| fun (s, p) ->
  let (Parser tps) = t
  let tov, tp = tps (s, p)
  match tov with
  | None    -> None, tp
  | Some tv ->
    let u = uf tv
    let (Parser ups) = u
    ups (s, tp)

type ParserBuilder() =
  member x.Bind       (t, uf) = bind      t   uf
  member x.Return     v       = return_   v
  member x.ReturnFrom t       = t

// 'parser' enables us to combine parsers using 'parser { ... }' syntax
let parser = ParserBuilder()

// 'orElse' creates a parser that runs parser 't' first, if that is successful
//  the result is returned otherwise the result of parser 'u' is returned
let orElse t u    = Parser <| fun (s, p) ->
  let (Parser tps) = t
  let tov, tp = tps (s, p)
  match tov with
  | None    -> 
    let (Parser ups) = u
    ups (s, p)
  | Some tv -> succeedWith tv tp

let (>>=) t uf    = bind t uf
let (<|>) t u     = orElse t u

// 'map' runs parser 't' and maps the result using 'm'
let map m t       = t >>= (m >> return_)
let (>>!) t m     = map m t
let (>>%) t v     = t >>! (fun _ -> v)

// 'opt' takes a parser 't' and creates a parser that always succeed but
//  if parser 't' fails the new parser will produce the value 'None'
let opt t         = (t >>! Some) <|> (return_ None)

// 'pair' runs parser 't' and 'u' and returns a pair of 't' and 'u' results
let pair t u      = 
  parser {
    let! tv = t
    let! tu = u
    return tv, tu
  }

// 'many' applies parser 't' until it fails and returns all successful
//  parser results as a list
let many t =
  let ot = opt t
  let rec loop vs = ot >>= function Some v -> loop (v::vs) | None -> return_ (List.rev vs)
  loop []

// 'sepBy' applies parser 't' separated by 'sep'. 
//  The values are reduced with the function 'sep' returns
let sepBy t sep     =
  let ots = opt (pair sep t)
  let rec loop v = ots >>= function Some (s, n) -> loop (s v n) | None -> return_ v
  t >>= loop

// A simplistic integer expression parser

// 'pint' parses an integer
let pint = 
  let f s v = 10*s + int v - int '0'
  parser {
    let! digits = many digit
    return! 
      match digits with
      | [] -> fail
      | vs -> return_ (List.fold f 0 vs)
  }

// operator parsers, note that the parser result is the operator function 
let padd      = char '+' >>% (+)
let psubtract = char '-' >>% (-)
let pmultiply = char '*' >>% (*)
let pdivide   = char '/' >>% (/)
let pmodulus  = char '%' >>% (%)

// 'pmullike' parsers integers separated by operators with same precedence as multiply
let pmullike  = sepBy pint (pmultiply <|> pdivide <|> pmodulus)
// 'paddlike' parsers sub expressions separated by operators with same precedence as add
let paddlike  = sepBy pmullike (padd <|> psubtract)
// 'pexpr' is the full expression
let pexpr     =
  parser {
    let! v = paddlike
    let! _ = eos      // To make sure the full string is consumed
    return v
  }