## Copyright (C) 2021 Stefano Guidoni <ilguido@users.sf.net>
## Copyright (C) 2024 Andreas Bertsatos <abertsatos@biol.uoa.gr>
## Copyright (C) 2025 Swayam Shah <swayamshah66@gmail.com>
##
## This file is part of the statistics package for GNU Octave.
##
## This program is free software; you can redistribute it and/or modify it under
## the terms of the GNU General Public License as published by the Free Software
## Foundation; either version 3 of the License, or (at your option) any later
## version.
##
## This program is distributed in the hope that it will be useful, but WITHOUT
## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
## details.
##
## You should have received a copy of the GNU General Public License along with
## this program; if not, see <http://www.gnu.org/licenses/>.

classdef ClusterCriterion < handle
  ## -*- texinfo -*-
  ## @deftp {statistics} ClusterCriterion
  ##
  ## A clustering evaluation object.
  ##
  ## The @code{ClusterCriterion} is a superclass for clustering evaluation
  ## objects, which are created by the @code{evalclusters} function.  It is not
  ## meant to be instantiated directly.
  ##
  ## @seealso{evalclusters, CalinskiHarabaszEvaluation, DaviesBouldinEvaluation,
  ## GapEvaluation, SilhouetteEvaluation}
  ## @end deftp

  properties (GetAccess = public, SetAccess = protected)
    ## -*- texinfo -*-
    ## @deftp {ClusterCriterion} {property} ClusteringFunction
    ##
    ## Clustering algorithm
    ##
    ## A character vector or a function handle specifying the clustering
    ## algorithm used to generate the clustering solutions.  It can be empty if
    ## the clustering solutions are passed as an input matrix.  This property is
    ## read-only.
    ##
    ## @end deftp
    ClusteringFunction = "";

    ## -*- texinfo -*-
    ## @deftp {ClusterCriterion} {property} CriterionName
    ##
    ## Name of the evaluation criterion
    ##
    ## A character vector specifying the name of the criterion used to evaluate
    ## the clustering solutions.  This property is read-only.
    ##
    ## @end deftp
    CriterionName = "";

    ## -*- texinfo -*-
    ## @deftp {ClusterCriterion} {property} CriterionValues
    ##
    ## Criterion values
    ##
    ## A numeric vector containing the values generated by the evaluation
    ## criterion for each clustering solution.  This property is read-only.
    ##
    ## @end deftp
    CriterionValues = [];

    ## -*- texinfo -*-
    ## @deftp {ClusterCriterion} {property} InspectedK
    ##
    ## List of the number of clusters
    ##
    ## A numeric vector containing the list of the number of clusters evaluated.
    ## This property is read-only.
    ##
    ## @end deftp
    InspectedK = [];

    ## -*- texinfo -*-
    ## @deftp {ClusterCriterion} {property} Missing
    ##
    ## Missing values
    ##
    ## A logical vector indicating which observations in the data matrix contain
    ## missing values (@code{NaN}).  This property is read-only.
    ##
    ## @end deftp
    Missing = [];

    ## -*- texinfo -*-
    ## @deftp {ClusterCriterion} {property} NumObservations
    ##
    ## Number of observations
    ##
    ## An integer specifying the number of non-missing observations in the data
    ## matrix.  This property is read-only.
    ##
    ## @end deftp
    NumObservations = 0;

    ## -*- texinfo -*-
    ## @deftp {ClusterCriterion} {property} OptimalK
    ##
    ## Optimal number of clusters
    ##
    ## An integer specifying the optimal number of clusters based on the
    ## evaluation criterion.  This property is read-only.
    ##
    ## @end deftp
    OptimalK = 0;

    ## -*- texinfo -*-
    ## @deftp {ClusterCriterion} {property} OptimalY
    ##
    ## Optimal clustering solution
    ##
    ## A numeric vector representing the clustering solution that corresponds to
    ## the optimal number of clusters.  This property is read-only.
    ##
    ## @end deftp
    OptimalY = [];

    ## -*- texinfo -*-
    ## @deftp {ClusterCriterion} {property} X
    ##
    ## Data used for clustering
    ##
    ## A numeric matrix containing the data used for clustering.  This property
    ## is read-only.
    ##
    ## @end deftp
    X = [];
  endproperties

  properties (Access = protected)
    N = 0; # number of observations
    P = 0; # number of variables
    ClusteringSolutions = []; #
    OptimalIndex = 0; # index of the optimal K
  endproperties

  methods (Access = public)

    ## -*- texinfo -*-
    ## @deftypefn {ClusterCriterion} {@var{obj} =} ClusterCriterion (@var{x}, @var{clust}, @var{KList})
    ##
    ## Create a @qcode{ClusterCriterion} object.
    ##
    ## @code{ClusterCriterion} is a superclass and is not meant to be
    ## instantiated directly.  Use @code{evalclusters} instead.
    ##
    ## @seealso{evalclusters}
    ## @end deftypefn
    function this = ClusterCriterion (x, clust, KList)
      ## parsing input data
      if (! ismatrix (x) || ! isnumeric (x))
        error ("ClusterCriterion: X must be a numeric matrix.");
      endif
      this.X = x;
      this.N = rows (this.X);
      this.P = columns (this.X);
      ## look for missing values
      for iter = 1:this.N
        if (any (isnan (x(iter, :))))
          this.Missing(iter) = true;
        else
          this.Missing(iter) = false;
        endif
      endfor
      ## number of usable observations
      this.NumObservations = sum (this.Missing == false);

      ## parsing the clustering algorithm
      if (ischar (clust))
        if (any (strcmpi (clust, {"kmeans", "linkage", "gmdistribution"})))
          this.ClusteringFunction = lower (clust);
        else
          error ("ClusterCriterion: unknown clustering algorithm '%s'.", clust);
        endif
      elseif (isa (clust, "function_handle"))
        this.ClusteringFunction = clust;
      elseif (ismatrix (clust))
        if (isnumeric (clust)  && (length (size (clust)) == 2) && ...
            (rows (clust) == this.N))
          this.ClusteringFunction = "";
          this.ClusteringSolutions = clust(find (this.Missing == false), :);
        else
          error ("ClusterCriterion: invalid matrix of clustering solutions.");
        endif
      else
        error ("ClusterCriterion: invalid argument.");
      endif

      ## parsing the list of cluster sizes to inspect
      this.InspectedK = parseKList (this, KList);
    endfunction

    ## -*- texinfo -*-
    ## @deftypefn {ClusterCriterion} {@var{obj} =} addK (@var{obj}, @var{k})
    ##
    ## Add a new list of cluster numbers to evaluate.
    ##
    ## @code{addK} adds a new list of cluster numbers, @var{k}, to the
    ## @qcode{ClusterCriterion} object.
    ##
    ## @end deftypefn
    function this = addK (this, k)

      ## if there is not a clustering function, then we are using a predefined
      ## set of clustering solutions, hence we cannot redefine the number of
      ## solutions
      if (isempty (this.ClusteringFunction))
        warning (strcat ("ClusterCriterion.addK: cannot redefine the list", ...
                         " of cluster numbers to evaluate when there is", ...
                         " not a clustering function"));
        return;
      endif

      ## otherwise go on
      newList = this.parseKList ([this.InspectedK k]);

      ## check if the list has changed
      if (length (newList) == length (this.InspectedK))
        warning ("ClusterCriterion.addK: the list has not changed");
      else
        ## update ClusteringSolutions and CriterionValues
        ClusteringSolutions_tmp = zeros (this.NumObservations, ...
                                    length (newList));
        CriterionValues_tmp = zeros (length (newList), 1);
        for iter = 1 : length (this.InspectedK)
          idx = find (newList == this.InspectedK(iter));

          if (! isempty (idx))
            ClusteringSolutions_tmp(:, idx) = this.ClusteringSolutions(:, iter);
            CriterionValues_tmp(idx) = this.CriterionValues(iter);
          endif
        endfor
        this.ClusteringSolutions = ClusteringSolutions_tmp;
        this.CriterionValues = CriterionValues_tmp;

        ## reset the old results
        this.OptimalK = 0;
        this.OptimalY = [];
        this.OptimalIndex = 0;

        ## update the list of cluster numbers to evaluate
        this.InspectedK = newList;
      endif
    endfunction

    ## -*- texinfo -*-
    ## @deftypefn {ClusterCriterion} {@var{h} =} plot (@var{obj})
    ##
    ## Plot the clustering evaluation values.
    ##
    ## @code{plot} generates a plot of the criterion values against the number
    ## of clusters.
    ##
    ## The optimal number of clusters is marked with an asterisk.
    ##
    ## The optional return value, @var{h}, is a graphics handle to the plot.
    ##
    ## @end deftypefn
    function h = plot (this)
      yLabel = sprintf ("%s value", this.CriterionName);
      h = gca ();
      hold on;
      plot (this.InspectedK, this.CriterionValues, "bo-");
      plot (this.OptimalK, this.CriterionValues(this.OptimalIndex), "b*");
      xlabel ("number of clusters");
      ylabel (yLabel);
      hold off;
    endfunction

    ## -*- texinfo -*-
    ## @deftypefn {ClusterCriterion} {@var{obj} =} compact (@var{obj})
    ##
    ## Create a compact clustering evaluation object.
    ##
    ## This method is not yet implemented.
    ##
    ## @end deftypefn
    function this = compact (this)
      warning ("ClusterCriterion.compact: this method is not yet implemented.");
    endfunction

  endmethods

  methods (Access = private)
    ## check if a list of cluster sizes is correct
    function retList = parseKList (this, KList)
      if (isnumeric (KList) && isvector (KList) && all (find (KList > 0)) && ...
          all (floor (KList) == KList))
        retList = unique (KList);
      else
        error (strcat ("ClusterCriterion: the list of cluster sizes", ...
                       " must be an array of positive integer numbers."));
      endif
    endfunction
  endmethods
endclassdef

## Test input validation
%!error <ClusterCriterion: X must be a numeric matrix.> ...
%! ClusterCriterion ("1", "kmeans", [1:6])
%!error <ClusterCriterion: unknown clustering algorithm 'k'.> ...
%! ClusterCriterion ([1, 2, 1, 3, 2, 4, 3], "k", [1:6])
%!error <ClusterCriterion: invalid matrix of clustering solutions.> ...
%! ClusterCriterion ([1, 2, 1; 3, 2, 4], 1, [1:6])
%!error <ClusterCriterion: invalid argument.> ...
%! ClusterCriterion ([1, 2, 1; 3, 2, 4], ones (2, 2, 2), [1:6])
