CPS 343/543 Lecture notes: Introduction to ML
Coverage: [EMLP] and [TLML]
Key language concepts in ML
- like Scheme, a statically scoped, functional
language with some imperative features,
but with a Pascal-like syntax
- pattern-directed invocation (pattern matching,
pattern-action rule oriented style of programming; as already seen
in Haskell)
- higher-order functions (map, o, foldl,
foldr)
- currying
- strong typing (ML is a strongly-typed programming language)
- type inference (even functions have types!)
- expressive type systems
(ML and Haskell have arguably the best in all of programming languages)
- a useful general-purpose programming language, it incorporates
- functional features from LISP,
- rule-based programming and pattern matching from PROLOG,
- Pascal-like syntax, and
- data abstraction from Smalltalk and C++
Core ML
- simple expressions
- primitive types: integer, real, boolean, character, string
- homogeneous lists
- list operators: :: (cons), @ (append)
- implode and explode
- functions are fun and have types
- higher-order functions:
functions that take functions as arguments (e.g.,
map, o, foldl, foldr)
- type system and structures, signatures, and functors
- : operator associates a type with a value or expression (e.g.,
1 : int)
Primitive types in ML
3;
(* val it = 3 : int *)
3.3;
(* val it = 3.3 : real *)
true;
(* val it = true : bool *)
"hello world";
(* val it = "hello world" : string *)
#"a";
(* val it = #"a" : char *)
Essentials
- character conversions: ord and chr functions
- string concatenation: ^ (hat) operator
(e.g., "hello " ^ "world")
- basic arithmetic: +, -, *,
/ (for reals), div (for ints), mod, and
~ for unary negative
- comparison operators: =,
<, >, <=, >=,
and <>
to compare ints, reals, characters, or strings
with one exception: reals may not be compared
using = or <>
- boolean operators: orelse, andalso (do
not confuse with and below), and not;
orelse and andalso use lazy evaluation
- if-then-else expressions: there is no if without an else, why?
- converting an infix operator to prefix (use op):
7 + 2; = (op +) (7 2);
7 - 2; = (op -) (7 2);
- comments: (* this is a comment *)
- = vs. val x =:
latter is not really an assignment in the variable sense
- running an ML program
- use "program.sml" (from within the system),
- $ sml < reverse.sml # from the command line, or
- $ sml reverse.sml # from the command line
- use the EOF character (on UNIX systems <crtl-d>) to
quit the interpreter
Lists
- lists are homogeneous (e.g., [1,2,3,4])
- tuples are heterogeneous (see below)
- nil and [] (the empty list) have the same
semantics
- can have lists of tuples, but each tuple in the list must
have the same type
- :: is the cons operator (and associates right-to-left)
- takes a head (element) and a tail (list)
- x::xs is a list of at least one element
- x::nil is a list of exactly one element; same as [x]
- x::y::excess is a list of at least two elements
- x::y::nil is a list of exactly two elements
- hd (for head) and tl (for tail) functions (analogs of
car and cdr, respectively)
- @ is the append operator
- takes two lists
- also inefficient as in Scheme
Tuples
- can be thought of as a
heterogeneous list or struct
- idea from relational databases
- a two-element tuple is a called a pair
- a three-element tuple is a called a triple (e.g.,
(1, "Larry", 3.76)
val it = (1,"Larry",3.76) : int * string * real
)
- accessing elements of a tuple: #1, #2,
..., #n (e.g., #2((1,"Larry",3.76)) returns
"Larry")
Functions
Some user-defined functions
(* simple functions
pattern-directed invocation
pattern matching
like cases in Scheme *)
fun square x = x*x;
fun fact 0 = 1
| fact n = n * fact (n-1);
(* the 0 returned in the first case
causes ML to use the type
"int list -> int" for sumlist *)
fun sumlist nil = 0
| sumlist (x::xs) = x + sumlist xs;
fun fib 0 = 1
| fib 1 = 1
| fib (n) = fib (n-1) + fib(n-2);
fun gcd (u, 0) = u
| gcd (u, v) = gcd (v, (u mod v));
fun gcd (u, v) = if v = 0 then u else gcd (v, (u mod v));
(* with pattern-directed invocation *)
fun reverse (nil) = nil
| reverse (x::xs) = reverse(xs) @ [x];
(* without pattern-directed invocation need a hd and tl,
or an if-then-else and hd and tl;
hd and tl are the analogs of car and cdr, respectively *)
fun reverse(nil) = nil
| reverse(L) = reverse(tl(L)) @ [hd(L)];
fun reverse (L) =
if L = nil then nil
else reverse (tl (L)) @ [hd (L)];
(* demonstration of as *)
fun reverse(L as x::xs) =
if L = nil then [] else reverse(xs) @ [x];
(* ref. [EMLP] pp. 84-88;
use difference lists technique *)
fun rev1(nil, M) = M
| rev1(x::xs, ys) = rev1(xs, x::ys);
fun reverse(L) = rev1(L, nil)
fun member (_, nil) = false
| member (e, x::xs) = (x = e) orelse member (e, xs)
fun insertineach (_, nil) = nil
| insertineach (item, x::xs) =
(item::x)::insertineach (item, xs);
(* notice how use of "let"
prevents re-computation *)
fun powerset ([]) = [nil]
| powerset (x::xs) =
let
val y = powerset (xs)
in
insertineach (x, y)@y
end;
Mergesort
(* ref. [EMLP] section 3.4 *)
fun split (nil) = (nil, nil)
| split (x::nil) = (nil, [x])
| split (x::y::excess) =
let
val (l, r) = split (excess)
in
(x::l, y::r)
(* (x::#1(split (excess)), y::(#2(split (excess)))); *)
end;
fun merge (L, nil) = L
| merge (nil, L) = L
| merge (left as l::ls, right as r::rs) =
if l < r then l::merge (ls, right)
else r::merge (left, rs);
fun mergesort (nil) = nil
| mergesort (x::nil) = x::nil
| mergesort (L) =
let
(* split it *)
val (left, right) = split (L);
(* mergesort each side *)
val leftsorted = mergesort (left);
val rightsorted = mergesort (right);
in
(* merge *)
merge (leftsorted, rightsorted)
end;
the < comparison operator in
the merge function causes ML
to use the type int list * int list -> int list
for merge
Mapping
fun ourmap (f, nil) = nil
| ourmap (f, x::xs) = f (x)::ourmap (f, xs);
fun square (x) = x*x;
ourmap (square, [1,2,3,4,5,6]);
fun squarelist (lon) = map square lon;
squarelist ([1,2,3,4,5,6]);
vs. val squarelist = map square;
Functional composition
ML's composition operator o
fun add3 (x) = x+3;
fun mult2 (x) = x*2;
val add3_then_mult2 = mult2 o add3;
val mult2_then_add3 = add3 o mult2;
add3_then_mult2 (3);
(* val it = 12 : int *)
mult2_then_add3 (3);
(* val it = 9 : int *)
val add3_then_mult2 = (op o) (mult2, add3);
(* op converts an infix operator to a prefix operator *)
(* be careful *)
(op *) (4, 5); (* doesn't work *)
(op * ) (4, 5); (* now it works, phew! *)
Fooooolding lists
(* foldl and foldr take a prefix binary function,
a base value of recursion, and a list, in that order *)
(* the standard ML foldl seems to have the wrong type!
foldl : ('a * 'b -> 'b) -> 'b -> 'a list -> 'b
-(1,-(2,-(3,-(4,0)))): ~2 *)
foldl associates from the left (e.g.,
+(+(+(+(0,1),2),3),4) = 10)
think of foldl as using the accumulator approach *)
foldl (op +) 0 [1,2,3,4];
(* -(-(-(-(0,1),2),3),4) = ~10 *)
foldl (op -) 0 [1,2,3,4];
(* foldr : ('a * 'b -> 'b) -> 'b -> 'a list -> 'b
foldr associates from the right (e.g.,
(1 :: (2 :: (3 :: []))))
(1 - (2 - (3 - (4 - 0)))) = ~2 *)
foldr (op -) 0 [1,2,3,4];
val sumlist = foldr (op +) 0;
Putting it all together: higher-order functions
- now we use these concepts to define our own implode function
implode (explode ("apple"));
(* val it = "apple" : string *)
val exploded = explode ("apple");
(* val exploded = [#"a",#"p",#"p",#"l",#"e"] : char list *)
val apple = implode ([#"a",#"p",#"p",#"l",#"e"]);
(* val apple = "apple" : string *)
(* first attempt at implementing our own version of implode *)
fun combine (x) = foldr (op ^) #"" x;
why will this not work?
(* str converts from char to string *)
(* char list -> string *)
val ourimplode = (foldr op ^ "") o (map str); (* this works *)
- converting a string representing an integer to an integer
(* example: 123 = (3+0) + (2*10) + (1*100) *)
fun char2int (c, v) = ord (c) - ord (#"0") + 10*v;
(* val char2int = fn : char * int -> int *)
(* char2int (#"1", char2int (#"2", char2int (#"3", 0))) *)
foldr char2int 0 (explode ("123"));
(* val it = 321 : int *)
(* char2int (#"3", char2int (#"2", char2int (#"1", 0))) *)
foldl char2int 0 (explode ("123"));
(* val it = 123 : int *)
fun string2int(s) = foldl char2int 0 (explode (s));
Anonymous or literal functions
(fn (n) => n+1) (5)
(* val it = 6 : int *)
fun addtwo (n) = n + 2;
(* val addtwo = fn : int -> int *)
map addtwo [1,2,3];
(* val it = [3,4,5] : int list *)
map (fn n => n+2) [1,2,3];
(* val it = [3,4,5] : int list *)
why use an anonymous function? see definition of string2int below
see [EMLP] §5.1.3 (pp. 129-130) for more information
converting a string representing an integer to an integer
fun helper (initChar, oursum) = ord (initChar) - ord (#"0") + 10*oursum;
(* int * char -> int *)
fun string2int (x) = foldl helper 0 (explode(x));
fun string2int (x) =
foldl (fn (c, r) => ord (c) - ord (#"0") + 10*r) 0 (explode x);
Exceptions
exception NegativeInt;
fun power e 0 = if e < 0 then raise NegativeInt else 0
| power e 1 = if e > 0 then raise NegativeInt else 1
| power 0 b = 1
| power 1 b = b
| power e b = if e > 0 then raise NegativeInt else b*power (e-1) b;
power 3 ~2;
(*
uncaught exception NegativeInt
raised at: power.sml:6.40-6.54
*)
Types
- type introduces a new name for an existing type (e.g.,
type Point = (int * int);
-- can be parameterized (like a template in C++)
type ('a, 'b) Mapping = ('a * 'b) list;
-- recursive types not permitted
type Tree = (int * Tree list)
)
- datatype introduces a new type (e.g.,
-- a variant record or a union of structs
-- comparable to define-datatype
datatype Bool = True | False;
datatype Colors = Red | Green | Blue | Orange | Yellow;
-- can be parameterized (like a template in C++)
datatype 'a Student = New | Id of 'a;
-- can be recursive
datatype Natural = Zero | Succ of Natural;
datatype IntTree = Leaf of int | Node of IntTree * int * IntTree;
-- can be parameterized and recursive
datatype 'a List = Nil | Cons of 'a * ('a List);
)
ML's type system
ML has a very powerful type system. A type system is
a language support for creating new types.
See [EMLP] Chapter 6 for more information.
(* like typedef in C *)
type id = int;
type name = string;
type age = int;
type gender = char;
type rate = real;
type employee = (id * name * gender * age * rate);
type employee = (id * name * gender * age * real);
val lucia = (1, "Lucia", #"f", 46, 45.56): employee;
val lewis = (2, "Lewis", #"m", 64, 7.25): employee;
type company = employee list;
val udcps = [lucia, lewis]: company;
type point = (real * real);
type rectangle = (point * point * point * point);
(* like a template in C++ *)
type ('domain_type, 'range_type) mapping = ('domain_type * 'range_type) list;
val emp_mapping = [(1, "Lucia"), (2, "Larry")]: (int, string) mapping;
val floor = [(2.1, 2), (2.2, 2)]: (real, int) mapping;
datatype daysofourlives = Sun | Mon | Tue | Wed | Thu | Fri | Sat;
fun onholiday(day) = (day = Sun) orelse (day = Sat);
onholiday(Mon);
onholiday(Sat);
(* like the define-datatype construct from [EOPL] *)
datatype bintreeofints = Empty | Node of bintreeofints * int * bintreeofints;
val ourbintreeofints = Node (
Node (
Node (Empty, 1, Empty), 7, Node (Empty, 2, Empty)),
6,
Node (Node (Empty, 3, Empty), 8,
Node (Node (Empty, 5, Empty), 4, Node (Empty, 10, Empty))));
(* if inorder returns a sorted list,
then its parameter is a binary search tree *)
fun inorder (Empty) = nil
| inorder (Node (left, i, right)) =
inorder (left) @ [i] @ inorder (right);
fun preorder (Empty) = nil
| preorder (Node (left, i, right)) =
[i] @ preorder (left) @ preorder (right);
fun postorder (Empty) = nil
| postorder (Node (left, i, right)) =
postorder (left) @ postorder (right) @ [i];
inorder (ourbintreeofints);
preorder (ourbintreeofints);
postorder (ourbintreeofints);
(* parameterized datatype *)
datatype 'd bintree = Empty2 | Node2 of 'd bintree * 'd * 'd bintree;
fun inorder (Empty2) = nil
| inorder (Node2 (left, i, right)) =
inorder (left) @ [i] @ inorder (right);
fun preorder (Empty2) = nil
| preorder (Node2 (left, i, right)) =
[i] @ preorder (left) @ preorder (right);
fun postorder (Empty2) = nil
| postorder (Node2 (left, i, right)) =
postorder (left) @ postorder (right) @ [i];
val ourbintree = Node2 (
Node2 (Node2 (Empty2, "the", Empty2), "type", Node2 (Empty2, "is", Empty2)),
"cat",
Node2 (Node2 (Empty2, "called", Empty2), "bintree", Node2 (Empty2, "and", Empty2)));
inorder (ourbintree);
preorder (ourbintree);
postorder (ourbintree);
ML structures
- a collection of datatypes and functions:
like a class from object-oriented programming
- use of pre-defined ML structures: TextIO,
Char, String, List, Math
- to prevent over-riding of functions use fully-qualified name
- see [EMLP] Chapter 8 for more information.
File I/O
- among the impure features of ML; these have side-effects
- use of "print" command
- only prints strings
- use Int.toString, Real.toString, and so on,
for proper casting
- use ^ (hat) for string concatenation
- option datatype has two values (NONE and SOME)
- use isSome(x) to determine the value of an option variable
- use valOf(x) to extract value of an option variable
- string option list != string list
without pattern matching
reading from files, tokenizing
writing to files
codes
val ourinstream = TextIO.openIn ("input.txt");
TextIO.closeIn (ourinstream);
TextIO.output (TextIO.openOut ("output.txt"), "hello universe");
open TextIO;
val ourin = openIn ("input.txt");
(* val ourin = - : instream *)
val line = inputLine (ourin);
(* val line = SOME "the quick brown fox ran slowly.\n" : string option *)
isSome (line);
(* val it = true : bool *)
val line = inputLine (ourin);
(* val line = SOME "kewl\n" : string option *)
isSome (line);
(* val it = true : bool *)
val line = inputLine (ourin);
(* val line = SOME "cool\n" : string option *)
isSome (line);
(* val it = true : bool *)
val line = inputLine (ourin);
(* val line = SOME "cooler\n" : string option *)
isSome (line);
(* val it = true : bool *)
val line = inputLine (ourin);
(* val line = SOME "cooler in even in the winter\n" : string option *)
isSome (line);
(* val it = true : bool *)
val line = inputLine (ourin);
(* val line = NONE : string option *)
isSome (line);
(* val it = false : bool *)
Reference types
- like those in C
- ! in ML is analog to * in C
- these run contrary to the spirit of functional programming
val x = ref 0;
(* val x = ref 0 : int ref *)
x;
(* val it = ref 0 : int ref *)
x := !x + 1;
(* val it = () : unit *)
!x;
(* val it = 1 : int *)
x;
(* val it = ref 1 : int ref *)
val name_ptr = ref "hello";
(* val name_ptr = ref "hello": string ref *)
!name_ptr;
(* val it = "hello" : string *)
val x = ref 1;
(* val x = ref 1 : int ref *)
!x;
(* val it = 1 : int *)
x := !x + 1;
(* val it = () : unit *)
x;
(* val it = ref 2 : int ref *)
!x;
(* val it = 2 : int *)
val x = 4;
(* val x = 4 : int *)
val y = ref x;
(* val y = ref 4 : int ref *)
!y;
(* val it = 4 : int *)
val x = ref 1;
(* val x = ref 1 : int ref *)
val y = ref x;
(* val y = ref (ref 1) : int ref ref *)
x := !x + 1;
!x;
(* val it = 2 : int *)
!y;
(* val it = ref 2 : int ref *)
!(!y);
(* val it = 2 : int *)
References
| [EMLP] |
J.D. Ullman.
Elements of ML Programming.
Prentice Hall, Upper Saddle River, NJ, Second edition, 1997. |
| [TLML] |
M. Felleisen and D.P. Friedman.
The Little MLer.
MIT Press, Cambridge, MA, 1997.
|
|