package flare.analytics.cluster { import flare.util.Arrays; import flare.util.Property; import flare.util.Sort; import flare.vis.data.Data; import flare.vis.data.DataList; import flare.vis.data.EdgeSprite; import flare.vis.data.NodeSprite; import flare.vis.data.Tree; import flare.vis.operator.Operator; /** * Base class for clustering operators that construct a hierarchical * clustering tree. Provides methods for taking a sequence of * merge operations by a clustering algorithm and constructing a * resulting cluster tree. * *

Once a clustering has been computed, each node included in the * analysis will be annotated with both its cluster membership * using the name indicated by the clusterField property. * By default, this class will attempt to pick an optimal level of the * cluster tree at which to break up items into discrete clusters. * However, clients can invoke the labelNodes method with * a specific merge number which indicates the point at which to "cut" * the cluster tree into discrete sub-clusters. This class also annotates * nodes with a sequence number using the name indicated by the * sequenceField property. The sequence number allows items * to be sorted in a way that attempts to preserve the clustered structure. * Additionally, the clusterTree property will return a * flare.vis.data.Tree instance that can be used to * visualize the structure of the cluster tree.

*/ public class HierarchicalCluster extends Operator { /** @private */ protected var _idx:Property = Property.$("props.sequence"); /** @private */ protected var _com:Property = Property.$("props.cluster"); /** @private */ protected var _qvals:Array; /** @private */ protected var _merges:MergeEdge; /** @private */ protected var _size:int; /** @private */ protected var _tree:Tree; /** @private */ protected var _group:String = Data.NODES; /** The data group to cluster. */ public function get group():String { return _group; } public function set group(g:String):void { _group = g; } /** The property in which to store cluster indices. This property * is used to annotate nodes with the community they belong to * (indicated as an integer index). The default value * is "props.cluster". */ public function get clusterField():String { return _com.name; } public function set clusterField(f:String):void { _com = Property.$(f); } /** The property in which to store sequence indices. This property * is used to annotate nodes with their sequence index along the * computed cluster tree. This value can be used to sort the nodes * in a way that best preserves community structure. The default value * is "props.sequence". */ public function get sequenceField():String { return _idx.name; } public function set sequenceField(f:String):void { _idx = Property.$(f); } /** Computed criterion values for each merge in the cluster tree. */ public function get criteria():Array { return _qvals; } /** The cluster tree of detected community structures. The leaf nodes * correspond to each of the nodes in the input graph, and include the * same data property. Non-leaf nodes indicate merges * made by the clustering algorithm, and have data * properties that include the merge number (1 for the * first merger, 2 for the second, etc), the criterion * value computed for that merge, and the size of the * cluster rooted at that node (the number of descendants in the * cluster tree). */ public function get clusterTree():Tree { return _tree; } // -------------------------------------------------------------------- /** * Creates a new HierarchicalCluster instance. */ public function HierarchicalCluster() { super(); } /** * Labels nodes with their cluster membership, determined by * the given merge number. If the merge number is less than * zero or unprovided, the merge number that maximizes the * clustering criteria will be assumed. * @param merge the merge number at which to compute the clusters */ public function labelNodes(merge:int=-1):void { if (merge < 0) merge = Arrays.maxIndex(_qvals); var com:int, idx:int; var helper:Function = function(n:NodeSprite):void { var nn:NodeSprite; if (n.childDegree && n.data.merge > merge) { for (var i:int=0; i= r.props.size) { p.addChildEdge(new EdgeSprite(p, l)); p.addChildEdge(new EdgeSprite(p, r)); } else { p.addChildEdge(new EdgeSprite(p, r)); p.addChildEdge(new EdgeSprite(p, l)); } p.data.merge = ii + 1; p.data.criterion = _qvals[ii]; p.props.size = 2 + l.props.size + r.props.size; delete map[j]; } // build and sort array of cluster roots var roots:Array = []; for each (var n:NodeSprite in map) roots.push(n); roots.sort(Sort.$("-props.size")); // merge cluster roots into final tree, if needed if (roots.length == 1) { p = NodeSprite(roots[0]); } else { p = new NodeSprite(); p.data.merge = ii; for each (n in roots) { p.addChildEdge(new EdgeSprite(p, n)); } } tree.root = p; return tree; } } // end of class HierarchicalCluster }