(* ================================================================== Cours 4 : Programmation fonctionnelle pour la science des donnees Partie 1 : Regression polynomiale ================================================================== *) #directory "/home/thiry/Bureau/ML" #load "graphiques.cmo" open Graphiques (* ================================================================== Introduction ================================================================== * On a des points (x_i, y_i) et on cherche les coefficients (a_0, ..., a_d) du polynome P(x) = a_0 + a_1 x + ... + a_d x^d qui minimise l'erreur quadratique : E = sum_i (P(x_i) - y_i)^2 La solution est donnee par les equations normales : (X^T X) beta = X^T y ou X est la matrice de Vandermonde (X_{i,j} = x_i^j). ================================================================== *) (* ------------------------------------------------------------------ 1. Operations matricielles (type mat = float list list) ------------------------------------------------------------------ *) type mat = float list list type vect = float list let transposer : mat -> mat = function | [] -> [] | [] :: _ -> [] | lignes -> List.init (List.length (List.hd lignes)) (fun j -> List.map (fun ligne -> List.nth ligne j) lignes) let mul_mat : mat -> mat -> mat = fun a b -> let b' = transposer b in List.map (fun ligne_a -> List.map (fun col_b -> List.fold_left2 (fun s x y -> s +. x *. y) 0. ligne_a col_b ) b' ) a let mul_mat_vect : mat -> vect -> vect = fun m v -> List.map (fun ligne -> List.fold_left2 (fun s x y -> s +. x *. y) 0. ligne v) m let map_mat f = List.map (List.map f) let add_mat a b = List.map2 (List.map2 (+.)) a b let scale_mat s = map_mat (fun x -> s *. x) (* Vecteurs *) let dot u v = List.fold_left2 (fun s x y -> s +. x *. y) 0. u v let add_vect u v = List.map2 (+.) u v let scale_vect s = List.map (fun x -> s *. x) (* ------------------------------------------------------------------ 2. Resolution de systeme lineaire : methode de Gauss avec pivot ------------------------------------------------------------------ *) (* Pivot de Gauss : transforme [A | b] en une matrice triangulaire *) let pivot_gauss (a : mat) (b : vect) : mat * vect = let n = List.length a in let a = Array.of_list (List.map Array.of_list a) in let b = Array.of_list b in for k = 0 to n - 2 do (* Recherche du pivot maximal *) let max_i = ref k in let max_v = ref (abs_float a.(k).(k)) in for i = k + 1 to n - 1 do let v = abs_float a.(i).(k) in if v > !max_v then (max_v := v; max_i := i) done; (* Echange des lignes si necessaire *) if !max_i <> k then begin let tmp = a.(k) in a.(k) <- a.(!max_i); a.(!max_i) <- tmp; let tmp = b.(k) in b.(k) <- b.(!max_i); b.(!max_i) <- tmp end; (* Elimination *) for i = k + 1 to n - 1 do let factor = a.(i).(k) /. a.(k).(k) in for j = k to n - 1 do a.(i).(j) <- a.(i).(j) -. factor *. a.(k).(j) done; b.(i) <- b.(i) -. factor *. b.(k) done done; let a = Array.to_list (Array.map Array.to_list a) in let b = Array.to_list b in (a, b) (* Substitution arriere pour resoudre le systeme triangulaire *) let substitution_arriere (a : mat) (b : vect) : vect = let n = List.length a in let x = Array.make n 0. in for i = n - 1 downto 0 do let ligne = List.nth a i in let somme = ref 0. in for j = i + 1 to n - 1 do somme := !somme +. (List.nth ligne j) *. x.(j) done; x.(i) <- (List.nth b i -. !somme) /. (List.nth ligne i) done; Array.to_list x (* Resout le systeme A x = b *) let resous_systeme a b = let (a_tri, b_tri) = pivot_gauss a b in substitution_arriere a_tri b_tri (* ------------------------------------------------------------------ 3. Construction de la matrice de Vandermonde ------------------------------------------------------------------ *) let vandermonde (xs : float list) (degre : int) : mat = List.map (fun x -> List.init (degre + 1) (fun j -> x ** float_of_int j) ) xs (* ------------------------------------------------------------------ 4. Regression polynomiale ------------------------------------------------------------------ *) let regression_poly (xs : float list) (ys : float list) (degre : int) : vect = let x = vandermonde xs degre in let xt = transposer x in let xtx = mul_mat xt x in let xty = mul_mat_vect xt ys in resous_systeme xtx xty (* ------------------------------------------------------------------ 5. Evaluation du polynome ------------------------------------------------------------------ *) let evalue_poly coeffs x = List.fold_left (fun (acc, xp) c -> (acc +. c *. xp, xp *. x) ) (0., 1.) coeffs |> fst let erreur_quadratique xs ys coeffs = let n = float_of_int (List.length ys) in List.fold_left2 (fun s x y -> let p = evalue_poly coeffs x in s +. (p -. y) ** 2. ) 0. xs ys /. n let (>>=) m f = List.concat (List.map f m) let return x = [x] (* --- Exemple 1 : regression lineaire (degre 1) --- *) (* On genere des donnees bruitees autour de y = 3x + 2 *) let genere_donnees_lineaires n = List.init n (fun i -> let x = float_of_int i /. 10. in let y = 3. *. x +. 2. +. (Random.float 0.4 -. 0.2) in (x, y) ) let () = Random.self_init (); Printf.printf "=== Regression polynomiale ===\n\n" (* --- Exemple 1 : regression lineaire (degre 1) --- *) (* On genere des donnees bruitees autour de y = 3x + 2 *) let genere_donnees_lineaires n = List.init n (fun i -> let x = float_of_int i /. 10. in let y = 3. *. x +. 2. +. (Random.float 0.4 -. 0.2) in (x, y) ) (* --- Exemple 2 : regression quadratique (degre 2) --- *) (* y = 0.5 x^2 - 2x + 3 *) let genere_donnees_quadratiques n = List.init n (fun i -> let x = float_of_int i /. 5. -. 2. in let y = 0.5 *. x ** 2. -. 2. *. x +. 3. +. (Random.float 0.6 -. 0.3) in (x, y) ) (* Donnees pour les exemples *) let donnees_lineaires = genere_donnees_lineaires 20 let donnees_quad = genere_donnees_quadratiques 30 let xs_lineaires = List.map fst donnees_lineaires let ys_lineaires = List.map snd donnees_lineaires let xs_quad = List.map fst donnees_quad let ys_quad = List.map snd donnees_quad (* --- Exemple 1 : regression lineaire (degre 1) --- *) let () = Printf.printf "--- Exemple 1 : Regression lineaire (degre 1) ---\n"; let coeffs = regression_poly xs_lineaires ys_lineaires 1 in Printf.printf "Coefficients (a0 + a1*x) :\n"; List.iteri (fun i c -> Printf.printf " a%d = %.6f\n" i c) coeffs; Printf.printf "Erreur quadratique : %.6f\n" (erreur_quadratique xs_lineaires ys_lineaires coeffs); Printf.printf "\n x y_reel y_pred\n"; List.iter (fun (x, y) -> Printf.printf " %.2f %.2f %.4f\n" x y (evalue_poly coeffs x) ) donnees_lineaires (* --- Exemple 2 : regression quadratique (degre 2) --- *) let () = Printf.printf "\n--- Exemple 2 : Regression quadratique (degre 2) ---\n"; let coeffs = regression_poly xs_quad ys_quad 2 in Printf.printf "Coefficients (a0 + a1*x + a2*x^2) :\n"; List.iteri (fun i c -> Printf.printf " a%d = %.6f\n" i c) coeffs; Printf.printf "Erreur quadratique : %.6f\n" (erreur_quadratique xs_quad ys_quad coeffs) (* --- Exemple 3 : comparaison de modeles (degres 1 a 10) --- *) let () = Printf.printf "\n--- Exemple 3 : Comparaison de modeles ---\n"; let degres = [1; 2; 3; 5; 10] in List.iter (fun d -> let c = regression_poly xs_quad ys_quad d in Printf.printf "Degre %2d : err = %.6f\n" d (erreur_quadratique xs_quad ys_quad c) ) degres (* --- Exemple 4 : fonction quelconque --- *) (* Approximation de sin(x) sur [0, pi] par un polynome de degre 5 *) let () = Printf.printf "\n--- Exemple 4 : Approximation de sin(x) ---\n"; let n = 50 in let xs = List.init n (fun i -> float_of_int i *. Float.pi /. float_of_int (n - 1)) in let ys = List.map sin xs in let coeffs = regression_poly xs ys 5 in Printf.printf "Coefficients du polynome approchant sin(x) sur [0, pi] :\n"; List.iteri (fun i c -> Printf.printf " a%d = %.6f\n" i c) coeffs; Printf.printf "\n x sin(x) approx erreur\n"; List.iter (fun x -> let exact = sin x in let approx = evalue_poly coeffs x in Printf.printf " %.2f %.4f %.4f %+.4f\n" x exact approx (approx -. exact) ) (List.filteri (fun i _ -> i mod 10 = 0) xs) (* --- Exemple 5 : Regression avec la monade Liste (tous les degres) --- *) let tous_les_modeles xs ys degres = degres >>= fun d -> let c = regression_poly xs ys d in let err = erreur_quadratique xs ys c in return (d, err, c) let () = Printf.printf "\n--- Exemple 5 : Exploration avec la monade Liste ---\n"; let degres = [1; 2; 3; 4; 5; 6; 7; 8; 9; 10] in let modeles = tous_les_modeles xs_quad ys_quad degres in Printf.printf "Tous les modeles et leur erreur :\n"; List.iter (fun (d, err, _) -> Printf.printf " degre %2d : erreur = %.6f\n" d err ) modeles; (* Meilleur modele : erreur minimale *) match List.sort (fun (_, e1, _) (_, e2, _) -> compare e1 e2) modeles with | [] -> () | (d, err, coeffs) :: _ -> Printf.printf "\nMeilleur modele : degre %d (erreur = %.6f)\n" d err (* ------------------------------------------------------------------ Exercices pour le cours ------------------------------------------------------------------ * 1. Ajouter une colonne de 1 a la matrice pour le terme constant (deja fait via Vandermonde). 2. Regression ponderee : ajouter des poids aux points (WLS). 3. Regularisation (ridge) : (X^T X + lambda*I) beta = X^T y 4. Validation croisee : separer les donnees en train/test. 5. Regression avec fonctions de base non polynomiales (exponentielles, sinusoides, RBF). 6. Comparer avec une implementation utilisant des matrices (tableaux 2D) pour de meilleures performances. *) (* ------------------------------------------------------------------ Bonus : Verification avec un exemple simple ------------------------------------------------------------------ *) let test_simple () = (* Points alignes parfaitement : y = 2x + 1 *) let xs = [0.; 1.; 2.; 3.; 4.] in let ys = [1.; 3.; 5.; 7.; 9.] in let c = regression_poly xs ys 1 in Printf.printf "\n--- Verification ---\n"; Printf.printf " y = 2x + 1 -> a0 = %.2f, a1 = %.2f\n" (List.nth c 0) (List.nth c 1); Printf.printf " (attendu : a0 = 1.0, a1 = 2.0)\n"; Printf.printf " Erreur : %.0f (doit etre 0)\n" (erreur_quadratique xs ys c) let () = test_simple () (* ------------------------------------------------------------------ Graphiques SVG ------------------------------------------------------------------ *) let () = (* Donnees quadratiques + polynome ajuste *) let coeffs_quad = regression_poly xs_quad ys_quad 2 in let pts_donnees = List.map (fun (x, y) -> (x, y)) donnees_quad in let courbe_quad = List.init 100 (fun i -> let x = -2. +. float i *. 4. /. 99. in (x, evalue_poly coeffs_quad x) ) in (* Nuage des donnees *) nuage ~fichier:"regression_donnees.svg" ~titre:"Donnees (quadratiques)" ~xlab:"x" ~ylab:"y" [pts_donnees]; (* Courbe du polynome ajuste *) courbe_entrainement ~fichier:"regression_courbe.svg" ~titre:"Polynome ajuste (degre 2)" ~xlab:"x" ~ylab:"y" [{ points = courbe_quad; legende = "P(x)" }] let () = (* Barres : comparaison des erreurs par degre *) let degres = [1; 2; 3; 4; 5; 6; 7; 8; 9; 10] in let modeles = tous_les_modeles xs_quad ys_quad degres in let etiq = List.map (fun (d, _, _) -> string_of_int d) modeles in let vals = List.map (fun (_, err, _) -> err) modeles in barres ~fichier:"regression_comparaison.svg" ~titre:"Erreur par degre du polynome" ~xlab:"Degre" ~ylab:"Erreur quadratique" etiq vals let () = (* Approximation de sin(x) *) let n = 50 in let xs_sin = List.init n (fun i -> float_of_int i *. Float.pi /. float_of_int (n - 1)) in let ys_sin = List.map sin xs_sin in let coeffs_sin = regression_poly xs_sin ys_sin 5 in let courbe_sin = List.init 100 (fun i -> let x = float i *. Float.pi /. 99. in (x, sin x) ) in let courbe_approx = List.init 100 (fun i -> let x = float i *. Float.pi /. 99. in (x, evalue_poly coeffs_sin x) ) in courbe_entrainement ~fichier:"regression_sin.svg" ~titre:"Approximation de sin(x) (degre 5)" ~xlab:"x" ~ylab:"y" [ { points = courbe_sin; legende = "sin(x)" }; { points = courbe_approx; legende = "approx" } ]