(* ==================================================================
   Cours 4 (suite) : Classification lineaire (logistic regression)
   ================================================================== *)

#directory "/home/thiry/Bureau/ML"
#load "graphiques.cmo"
open Graphiques

(* ------------------------------------------------------------------
   Principe
   ------------------------------------------------------------------ *
   On a des points (x, y) de deux couleurs differentes.
   On cherche la droite (ou le hyperplan) qui les separe au mieux :
       z = w0 + w1*x + w2*y
       prediction = sigmoid(z)   ->  0 ou 1

   La fonction sigmoid transforme un reel en probabilite entre 0 et 1 :
       sigmoid(z) = 1 / (1 + exp(-z))

   On ajuste les poids w par descente de gradient pour minimiser
   l'erreur de classification (cross-entropy).
   ------------------------------------------------------------------ *)

(* ------------------------------------------------------------------
   1. Fonctions de base
   ------------------------------------------------------------------ *)

let sigmoid z = 1. /. (1. +. exp (-.z))

(* Prediction : transforme (x,y) en probabilite d'appartenir a la classe 1 *)
let prediction (w0, w1, w2) (x, y) =
  sigmoid (w0 +. w1 *. x +. w2 *. y)

(* ------------------------------------------------------------------
   2. Fonction de cout (cross-entropy)
   ------------------------------------------------------------------ *)

let cout (w0, w1, w2) donnees =
  let n = float_of_int (List.length donnees) in
  List.fold_left (fun somme (x, y, cible) ->
    let p = prediction (w0, w1, w2) (x, y) in
    (* Cross-entropy : -[cible*log(p) + (1-cible)*log(1-p)] *)
    let eps = 1e-15 in  (* evite log(0) *)
    let p = max eps (min (1. -. eps) p) in
    somme -. (cible *. log p +. (1. -. cible) *. log (1. -. p))
  ) 0. donnees /. n

(* ------------------------------------------------------------------
   3. Descente de gradient
   ------------------------------------------------------------------ *)

let gradient (w0, w1, w2) donnees =
  let n = float_of_int (List.length donnees) in
  List.fold_left (fun (g0, g1, g2) (x, y, cible) ->
    let p = prediction (w0, w1, w2) (x, y) in
    let err = p -. cible in
    (g0 +. err, g1 +. err *. x, g2 +. err *. y)
  ) (0., 0., 0.) donnees
  |> fun (g0, g1, g2) -> (g0 /. n, g1 /. n, g2 /. n)

let descente_gradient donnees taux pas_init n_iter =
  let rec boucle w t =
    if t >= n_iter then w
    else
      let taux_decay = taux /. (1. +. 0.001 *. float_of_int t) in
      let (g0, g1, g2) = gradient w donnees in
      let (w0, w1, w2) = w in
      boucle (w0 -. taux_decay *. g0,
              w1 -. taux_decay *. g1,
              w2 -. taux_decay *. g2) (t + 1)
  in
  boucle (0., 0., 0.) 0

(* ------------------------------------------------------------------
   4. Generation de donnees
   ------------------------------------------------------------------ *)

(* Nuage de points 2D avec deux classes *)
let genere_nuage n_par_classe =
  let rec points cls n acc =
    if n <= 0 then acc
    else
      let x = Random.float 10. -. 5.
      and y = Random.float 10. -. 5. in
      let (x, y, cible) =
        if cls = 0 then
          (x -. 2., y -. 2., 0.)
        else
          (x +. 2., y +. 2., 1.)
      in
      (* Ajouter un peu de bruit *)
      let x = x +. Random.float 1. -. 0.5
      and y = y +. Random.float 1. -. 0.5 in
      points cls (n - 1) ((x, y, cible) :: acc)
  in
  points 0 n_par_classe [] @ points 1 n_par_classe []

(* ------------------------------------------------------------------
   5. Evaluation
   ------------------------------------------------------------------ *)

let exactitude w donnees =
  let bonnes = List.filter (fun (x, y, cible) ->
    let p = prediction w (x, y) in
    (p >= 0.5 && cible = 1.) || (p < 0.5 && cible = 0.)
  ) donnees in
  100. *. float_of_int (List.length bonnes) /. float_of_int (List.length donnees)

(* ------------------------------------------------------------------
   6. Affichage du nuage et de la frontiere
   ------------------------------------------------------------------ *)

(* Frontiere de decision : w0 + w1*x + w2*y = 0  ->  y = -(w0 + w1*x) / w2 *)
let affiche_nuage donnees w =
  let xs = List.map (fun (x, _, _) -> x) donnees in
  let ys = List.map (fun (_, y, _) -> y) donnees in
  let x_min = List.fold_left min max_float xs -. 1.
  and x_max = List.fold_left max ~-.max_float xs +. 1.
  and y_min = List.fold_left min max_float ys -. 1.
  and y_max = List.fold_left max ~-.max_float ys +. 1. in
  let largeur = 60 in
  let hauteur = 20 in
  for ligne = 0 to hauteur - 1 do
    let y = y_max -. float_of_int ligne *. (y_max -. y_min) /. float_of_int hauteur in
    let buf = Buffer.create largeur in
    for col = 0 to largeur - 1 do
      let x = x_min +. float_of_int col *. (x_max -. x_min) /. float_of_int largeur in
      let dans_classe1 = prediction w (x, y) >= 0.5 in
      (* Verifier s'il y a un point de donnees proche *)
      let proche = List.exists (fun (dx, dy, c) ->
        let d = sqrt ((dx -. x) ** 2. +. (dy -. y) ** 2.) in
        d < 0.3
      ) donnees in
      if proche then
        let cible = List.find (fun (dx, dy, _) ->
          sqrt ((dx -. x) ** 2. +. (dy -. y) ** 2.) < 0.3
        ) donnees |> fun (_, _, c) -> c in
        Buffer.add_char buf (if cible = 1. then '+' else '.')
      else if dans_classe1 then
        Buffer.add_char buf ' '
      else
        Buffer.add_char buf '#'
    done;
    Printf.printf "  %s\n" (Buffer.contents buf)
  done

(* ------------------------------------------------------------------
   7. Exemples
   ------------------------------------------------------------------ *)

let () =
  Random.self_init ();
  Printf.printf "=== Classification lineaire (logistic regression) ===\n\n";
  Printf.printf "  sigmoid(z) = 1 / (1 + exp(-z))\n";
  Printf.printf "  decision : w0 + w1*x + w2*y = 0\n\n";

  (* --- Exemple 1 : nuage de points --- *)
  Printf.printf "--- Exemple 1 : Nuage de 2 classes ---\n";
  let donnees = genere_nuage 30 in
  let n_classe0 = List.length (List.filter (fun (_, _, c) -> c = 0.) donnees)
  and n_classe1 = List.length (List.filter (fun (_, _, c) -> c = 1.) donnees) in
  Printf.printf "  Classe 0 (.) : %d points\n" n_classe0;
  Printf.printf "  Classe 1 (+) : %d points\n\n" n_classe1;

  (* --- Exemple 2 : Entrainement --- *)
  Printf.printf "--- Exemple 2 : Descente de gradient ---\n";
  let w = descente_gradient donnees 0.1 0. 2000 in
  let (w0, w1, w2) = w in
  Printf.printf "  Poids trouves : w0 = %.4f, w1 = %.4f, w2 = %.4f\n" w0 w1 w2;
  Printf.printf "  Frontiere : y = %.4f + %.4f * x\n"
    (-.w0 /. w2) (-.w1 /. w2);
  Printf.printf "  Exactitude : %.1f %%\n" (exactitude w donnees);

  (* --- Exemple 3 : Visualisation --- *)
  Printf.printf "\n--- Exemple 3 : Carte de decision ---\n";
  Printf.printf "  '#' = classe 0, '+' = classe 1, '.'/'+' = donnees\n\n";
  affiche_nuage donnees w;
  Printf.printf "\n  (La frontiere separe les zones # des zones blanches)\n"

(* ------------------------------------------------------------------
   Exercices pour le cours
   ------------------------------------------------------------------ *
   1. Regularisation L2 (ridge) : ajouter lambda * norme(w)^2 au cout

   2. Classification multiclasse : softmax au lieu de sigmoid

   3. Features polynomiales : ajouter x^2, xy, y^2 comme entrees
      (classifieur non lineaire sans changer le code)

   4. Early stopping : arreter si le cout n'amelioration plus

   5. Mini-batch SGD : au lieu de tout le jeu de donnees, utiliser
      des sous-ensembles aleatoires

   6. Matrice de confusion : afficher les vrais/faux positifs/negatifs
   *)

(* ------------------------------------------------------------------
   Bonus : verification avec des donnees lineairement separables
   ------------------------------------------------------------------ *)

let () =
  Printf.printf "\n=== Bonus : donnees parfaitement separables ===\n";
  let parfaites = [
    (-3., -3., 0.); (-2.5, -3.5, 0.); (-4., -2., 0.); (-3.5, -2.5, 0.);
    ( 3.,  3., 1.); ( 2.5,  3.5, 1.); ( 4.,  2., 1.); ( 3.5,  2.5, 1.);
  ] in
  let w = descente_gradient parfaites 0.1 0. 500 in
  Printf.printf "  Exactitude : %.1f %%\n" (exactitude w parfaites)

(* ------------------------------------------------------------------
   Graphiques SVG
   ------------------------------------------------------------------ *)

let () =
  (* Re-entrainement avec historique du cout *)
  let donnees = genere_nuage 30 in
  let n_iter = 2000 in
  let historique = ref [] in
  let rec boucle w t =
    if t >= n_iter then w
    else
      let taux_decay = 0.1 /. (1. +. 0.001 *. float_of_int t) in
      let (g0, g1, g2) = gradient w donnees in
      let (w0, w1, w2) = w in
      let w' = (w0 -. taux_decay *. g0, w1 -. taux_decay *. g1, w2 -. taux_decay *. g2) in
      if t mod 10 = 0 then
        historique := (float_of_int t, cout w' donnees) :: !historique;
      boucle w' (t + 1)
  in
  let w = boucle (0., 0., 0.) 0 in

  (* Courbe d'apprentissage *)
  courbe_entrainement ~fichier:"classifieur_loss.svg"
    ~titre:"Descente de gradient (cross-entropy)"
    ~xlab:"Iteration" ~ylab:"Cross-entropy"
    [{ points = List.rev !historique; legende = "Entrainement" }];

  (* Nuage des donnees separes par classe *)
  let cls0 = List.filter_map (fun (x, y, c) -> if c = 0. then Some (x, y) else None) donnees in
  let cls1 = List.filter_map (fun (x, y, c) -> if c = 1. then Some (x, y) else None) donnees in
  nuage ~fichier:"classifieur_nuage.svg"
    ~titre:"Donnees d'entrainement"
    ~xlab:"x" ~ylab:"y"
    [cls0; cls1];

  (* Frontiere de decision : w0 + w1*x + w2*y = 0 *)
  let (w0, w1, w2) = w in
  let pts_frontiere = List.init 100 (fun i ->
    let x = -5. +. float i *. 10. /. 99. in
    let y = (-.w0 -. w1 *. x) /. w2 in
    (x, y)
  ) in
  courbe_entrainement ~fichier:"classifieur_frontiere.svg"
    ~titre:"Frontiere de decision"
    ~xlab:"x" ~ylab:"y"
    [{ points = pts_frontiere; legende = "w0 + w1*x + w2*y = 0" }]