"""
The SCIMES core package
"""
import numpy as np
import random
from itertools import combinations, cycle
from matplotlib import pyplot as plt
from astropy.io import fits
from astropy.table import Column
from sklearn.metrics import silhouette_score
from sklearn.manifold import spectral_embedding
from sklearn.cluster.k_means_ import k_means
def mat_smooth(Mat, S2Nmat, s2nlim = 3, scalpar = None, lscal = False):
"""
Estimate the scaling parameter and rescale
the affinity matrix through a Gaussian kernel.
Parameters
-----------
Mat: numpy array
The affinity matrix to be rescaled.
S2Nmat: numpy array
Signal-to-noise ratio affinity matrix.
If rms is np.nan S2Nmat is np.nan and
the scaling parameter is searched between
the 5 largest gaps.
s2nlim: int or float
Signal-to-noise limit above which the
scaling parameter is calculated
Needed only if S2Nmat is not np.nan.
scalpar: float
User-defined scaling parameter.
lscal: boll
Rescale the matrix using a local
scaling approach.
Return
-------
NM: numpy array
Rescaled affinity matrix.
sigmas: float
The estimated scaling parameter.
"""
# Using estimated global scaling
if scalpar is None and lscal == False:
if not np.isnan(np.median(S2Nmat)):
sMat = Mat[S2Nmat > s2nlim]
Aff = np.unique(sMat.ravel())[1::]
psigmas = (Aff+np.roll(Aff,-1))/2
psigmas = psigmas[1:-1]
diff = np.roll(Aff,-1)-Aff
diff = diff[1:-1]
sel_diff_ind = np.argmax(diff)
else:
Aff = np.unique(Mat.ravel())[1::]
psigmas = (Aff+np.roll(Aff,-1))/2
psigmas = psigmas[1:-1]
diff = np.roll(Aff,-1)-Aff
diff = diff[1:-1]
# Old method
sel_diff_ind = np.min(np.argsort(diff)[::-1][0:5])
sigma = psigmas[sel_diff_ind]**2
# New method:
# larger difference range
# taking the affinity value closer to the std of all affinities
#sel_diff_ind = np.argsort(diff)[::-1][0:10]
#spsigmas = psigmas[sel_diff_ind]
#print "-- Affinity standard deviation:", np.std(Aff)
#sigmas = spsigmas[np.argmin(np.abs(spsigmas - np.std(Aff)))]
#sigmas = sigmas**2
print("-- Estimated scaling parameter: %f" % np.sqrt(sigma))
# Using local scaling
if scalpar is None and lscal == True:
print("-- Local scaling")
dr = np.std(Mat, axis=0)
sigmar = np.tile(dr,(Mat.shape[0],1))
sigma = sigmar*sigmar.T
# Using user-defined scaling parameter
if scalpar:
print("-- User defined scaling parameter: %f" % scalpar)
sigma = scalpar**2
NM = np.exp(-(Mat**2)/sigma)
NM[range(NM.shape[0]), range(NM.shape[1])] = 0
return NM, sigma
def aff_matrix(num, trk, allleavidx, allbrcidx, brclevels, dictchildrens, props):
"""
Generate the affinity matrices.
Parameters
-----------
num: int
Number of non isolated leaves.
trk: int
Dummy index of the trunk.
allbrcidx: list
List of all branches (parents)
indexes within the dendrogram.
brclevels: list
Dendrogram levels of all branches.
dictchildrens: dictionary
Descendants of all branches within
the dendrogram.
props: list of lists
Properties of all leaf parents and
ancestors within the dendrogram.
Return
-------
WAs: numpy array
Clustering criteria affinity matrices.
"""
print("- Creating affinity matrices")
Widx = np.zeros((num,num), dtype='int')+trk
WAs = np.zeros((len(props),num,num))
brclevels = np.asarray(brclevels)
allbrcidx = np.asarray(allbrcidx)
allleavidx = np.asarray(allleavidx)
allleavpos = np.arange(len(allleavidx))
"""
for l in np.unique(brclevels):
lbrcidx = allbrcidx[brclevels == l]
for p in lbrcidx:
pleavidx = np.asarray(dictchildrens[str(p)])
pleavpos = allleavpos[np.in1d(allleavidx,pleavidx)]
x,y = np.meshgrid(pleavpos,pleavpos)
Widx[x,y] = p
"""
allbrcidx = allbrcidx[np.argsort(brclevels)]
for p in allbrcidx:
pleavidx = np.asarray(dictchildrens[str(p)])
pleavpos = allleavpos[np.in1d(allleavidx,pleavidx)]
x,y = np.meshgrid(pleavpos,pleavpos)
Widx[x,y] = p
Widx[np.diag_indices(num)] = 0
for j,prop in enumerate(props):
prop = np.asarray(prop)
Wprop = prop[Widx.ravel()].reshape([num,num])
Wprop[np.diag_indices(num)] = 0
WAs[j,:,:] = Wprop
return WAs
def guessk(Mat, thresh = 0.2):
"""
Guess the number of clusters by couting
the connected blocks in the affinity matrix.
Parameters
-----------
Mat: numpy array
The rescaled affinity matrix to guess the
number of cluster from.
thresh: float
The threshold to mask the matrix and count
the blocks.
Return
-------
kguess: int
Number of guessed clusters.
"""
M = 1*Mat
M[M < thresh] = 0
M[M > 0] = 1
np.fill_diagonal(M, 1)
guess_clusters = np.zeros(M.shape[0])
for i in range(M.shape[0]):
guess_clusters[i] = sum(M[i,:])
kguess = 1
i = 0
while i < len(guess_clusters)-1:
curr = int(guess_clusters[i])
if curr != 1:
kguess = kguess+1
i = i + curr
"""
np.fill_diagonal(M, 0)
D = np.zeros(M.shape)
for i in range(D.shape[0]):
D[i,i] = sum(M[i,:])
Lap = D - M
eigv = np.abs(np.linalg.eigvals(Lap))
kguess2 = len(np.where(eigv == 0)[0])
"""
return kguess
def clust_cleaning(allleavidx, allclusters, dictpars, dictchilds, dictancests, savebranches = False):
"""
Remove clusters that do not corresponds to
isolated dendrogram branches.
Parameters
-----------
allleavidx: list
List of all leaf indexes within the
dendrogram.
allclusters: list
List of dendrogram indexes that
correspond to significant objects
(i.e. clusters).
dictpars: dictionary
Parents and ancestors of all leaves
within the dendrogram.
dictchilds: dictionary
Children of all branches
within the dendrogram.
savebranches: bool
Retain all isolated branches usually discarded
by the cluster analysis.
Return
-------
cores_idx: list
The final cluster dendrogram indexes.
"""
if savebranches == True:
print("SAVE_BRANCHES triggered, all isolated branches will be retained")
cores_idx = []
_, ucidx = np.unique(allclusters, return_index=True)
uclusters = allclusters[np.sort(ucidx)]
rallclusters = np.zeros(len(allclusters), dtype=np.int32)
for j,ucluster in enumerate(uclusters):
rallclusters[allclusters == ucluster] = j
for cluster in set(rallclusters):
# Selecting the cluster
clust_idx = rallclusters == cluster
# Leaves and levels in that cluster
clust_leaves_idx = np.asarray(allleavidx)[clust_idx]
all_par_list = []
for cli in clust_leaves_idx:
par_list = dictpars[str(cli)]
par_list = par_list[0:-1] #The lowest, the trunk, is not considered
all_par_list = all_par_list + par_list
all_par_list = list(set(all_par_list))
core_clust_num = []
clust_core_num = []
for i in range(len(all_par_list)):
sel_par = all_par_list[i]
core_leaves_idx = dictchilds[str(sel_par)]
# Leaves in the core but not in the cluster
core_clust = list(set(core_leaves_idx) - set(clust_leaves_idx))
core_clust_num.append(len(core_clust))
# Leaves in the cluster but not in the core
clust_core = list(set(clust_leaves_idx) & set(core_leaves_idx))
clust_core_num.append(len(clust_core))
# The selected core must not contain other leaves than
# those of the cluster, plus it is the one with the highest
# number of cluster leaves contained
core_clust_num = np.asarray(core_clust_num)
core_clust_num0 = np.where(core_clust_num == 0)
if len(core_clust_num0[0]) > 0:
all_par_list = np.asarray(all_par_list)
all_par_list0 = all_par_list[core_clust_num0]
all_par_list0 = np.asarray(all_par_list0)
clust_core_num = np.asarray(clust_core_num)
clust_core_num0 = clust_core_num[core_clust_num0]
clust_core_num0 = np.asarray(clust_core_num0)
if savebranches == True:
sel_num = np.zeros(len(all_par_list0))
for par in all_par_list0:
ancests = dictancests[str(par)]
cancests = list(set(ancests) & set(all_par_list0))
sel_num[all_par_list0 == par] = len(cancests)
cores = all_par_list0[sel_num <= 1]
else:
max_num = clust_core_num0 == max(clust_core_num0)
cores = all_par_list0[max_num]
cores_idx = cores_idx + cores.tolist()
else:
print("Unassignable cluster %i" % cluster)
return cores_idx
def make_asgncube(dendro, asgn_idx, header, collapse = True):
"""
Create a label cube with only the cluster (cloudster) IDs included.
Parameters
----------
dendro: 'astrodendro.dendrogram.Dendrogram' instance
The clusterized dendrogram.
header : `fits.Header`
The header of the output assignment cube. Should be the same
header that the dendrogram was generated from.
collapse : bool
Collapsed (2D) version of the assignment cube.
Requires matplotlib.
Return
-------
asgn = 'astropy.io.fits.PrimaryHDU' instance
Cube of labels.
"""
data = dendro.data.squeeze()
# Making the assignment cube
asgn = np.zeros(data.shape, dtype=np.int32)-1
for i in asgn_idx:
asgn[dendro[i].get_mask(shape = asgn.shape)] = i
# Write the fits file
asgn = fits.PrimaryHDU(asgn.astype('short'), header)
# Collapsed version of the asgn cube
if collapse:
asgn_map = np.amax(asgn.data, axis = 0)
plt.matshow(asgn_map, origin = "lower")
cbar = plt.colorbar()
cbar.ax.get_yaxis().labelpad = 15
cbar.ax.set_ylabel('Structure label', rotation=270)
return asgn
def cloudstering(dendrogram, catalog, criteria, user_k, user_ams, user_scalpars, user_iter,
save_isol_leaves, save_clust_leaves, save_branches, blind, rms, s2nlim, locscal):
"""
SCIMES main function. It collects parents/children
of all structrures within the dendrogram, and their
properties. It calls the affinity matrix-related
functions (for creation, rescaling, cluster counting),
and it runs several time the actual spectral clustering
routine by calculating every time the silhouette of the
current configuration. Input parameter are passed by the
SpectralCloudstering class.
Parameters
-----------
dendrogram: 'astrodendro.dendrogram.Dendrogram' instance
The dendrogram to clusterize.
catalog: 'astropy.table.table.Table' instance
A catalog containing all properties of the dendrogram
structures. Generally generated with ppv_catalog module.
header: 'astropy.io.fits.header.Header' instance
The header of the fits data the dendrogram was
generated from. Necessary to obtain the assignment cubes.
criteria: list of strings
Clustering criteria referred to the structure properties
in the catalog (default ['volume', 'luminosity']).
user_k: int
The expected number of clusters, if not provided
it will be guessed automatically through the eigenvalues
of the unsmoothed affinity matrix.
user_ams: numpy array
User provided affinity matrix. Whether this is not
furnish it is automatically generated through the
volume and/or luminosity criteria.
user_scalpars: list of floats
User-provided scaling parameters to smooth the
affinity matrices.
user_iter: int
User-provided number of k-means iterations.
save_isol_leaves: bool
Consider the isolated leaves (without parent)
as individual 'clusters'. Useful for low
resolution data where the beam size
corresponds to the size of a Giant
Molecular Cloud.
save_clust_leaves: bool
Consider unclustered leaves as individual
'clusters'. This keyword will not include
the isolated leaves without parents.
save_all_leaves: bool
Trigger both save_isol_leaves and
save_clust_leaves.
save_branches: bool
Retain all isolated branches usually discarded
by the cluster analysis.
save_all: bool
Trigger all save_isol_leaves,
save_clust_leaves, and save_branches.
rms: int or float
Noise level of the observation. Necessary tolist
calculate the scaling parameter above a certain
signal-to-noise ratio.
s2nlim: int or float
Signal-to-noise limit above which the
scaling parameter is calculated. Needed
only if rms is not np.nan.
blind: bool
Show the affinity matrices.
Matplotlib required.
locscaling: bool
Smooth the affinity matrices using a local
scaling technique.
Return
-------
clusts: list
The dendrogram branch indexes corresponding to the
identified clusters
catalog: 'astropy.table.table.Table' instance
The input catalog updated with dendrogram structure
parent, ancestor, number of leaves, and type
('T', trunks or branches without parent; 'B', branches
with parent; 'L', leaves).
AMs: numpy array
The affinity matrices calculated by the algorithm
escalpars: list
Estimated scaling parameters for the different
affinity matrixes
silhouette: float
Silhouette of the best cluster configuration
"""
# Collecting all connectivity and other information into more handy lists
all_structures_idx = np.arange(len(catalog[criteria[0]].data), dtype='int')
all_levels = []
brc_levels = []
all_leav_names = []
all_leav_idx = []
all_brc_names = []
all_brc_idx = []
all_parents = []
all_children = []
all_struct_names = []
all_ancestors = []
all_struct_ancestors = []
all_struct_parents = []
all_struct_types = []
nleaves = []
trunk_brs_idx = []
two_clust_idx = []
mul_leav_idx = []
s2ns = []
for structure_idx in all_structures_idx:
s = dendrogram[structure_idx]
all_levels.append(s.level)
s2ns.append(dendrogram[structure_idx].height/rms)
all_struct_names.append(str(s.idx))
all_struct_ancestors.append(s.ancestor.idx)
if s.parent:
all_struct_parents.append(s.parent.idx)
else:
all_struct_parents.append(-1)
nleaves.append(len(s.sorted_leaves()))
ancestors = []
anc = s.parent
while anc != None:
ancestors.append(anc.idx)
anc = anc.parent
ancestors.append(s.idx)
all_ancestors.append(ancestors)
# If structure is a leaf find all the parents
if s.is_leaf and s.parent != None:
par = s.parent
all_leav_names.append(str(s.idx))
parents = []
while par != None:
parents.append(par.idx)
par = par.parent
parents.append(len(catalog[criteria[0]].data)) # This is the trunk!
all_parents.append(parents)
# If structure is a brach find all its leaves
if s.is_branch:
brc_levels.append(s.level)
all_brc_idx.append(s.idx)
all_brc_names.append(str(s.idx))
children = []
for leaf in s.sorted_leaves():
children.append(leaf.idx)
all_children.append(children)
# Trunk branches
if s.parent == None:
trunk_brs_idx.append(s.idx)
all_leav_idx = all_leav_idx + children
if s.children[0].is_branch or s.children[1].is_branch:
mul_leav_idx = mul_leav_idx + children
else:
two_clust_idx.append(s.idx)
all_struct_types.append('T')
else:
all_struct_types.append('B')
else:
all_struct_types.append('L')
two_clust_idx = np.unique(two_clust_idx).tolist()
dict_parents = dict(zip(all_leav_names,all_parents))
dict_children = dict(zip(all_brc_names,all_children))
dict_ancestors = dict(zip(all_struct_names,all_ancestors))
all_levels.append(-1)
all_levels = np.asarray(all_levels)
# Retriving needed properties from the catalog
# and adding fake "trunk" properties
props = []
for crit in criteria:
prop = catalog[crit].data.tolist()
tprop = sum(catalog[crit].data[trunk_brs_idx])
prop.append(tprop)
props.append(prop)
s2ns.append(1)
props.append(s2ns)
# Generating affinity matrices if not provided
if user_ams == None:
AMs = aff_matrix(len(all_leav_idx), len(catalog[criteria[0]].data), \
all_leav_idx, all_brc_idx, brc_levels, dict_children, props)
if blind == False:
# Showing all affinity matrices
for i, crit in enumerate(criteria):
plt.matshow(AMs[i,:,:])
plt.title('"'+crit+'" affinity matrix', fontsize = 'medium')
plt.xlabel('leaf index')
plt.ylabel('leaf index')
plt.colorbar()
else:
AMs = user_ams
S2Nmat = AMs[-1,:,:]
AMs = AMs[:-1,:,:]
# Check if the affinity matrix has more than 2 elements
# otherwise return everything as clusters ("save_all").
if AMs.shape[1] <= 2:
print("--- Not necessary to cluster. 'save_all' keyword triggered")
all_leaves = []
for leaf in dendrogram.leaves:
all_leaves.append(leaf.idx)
clusts = all_leaves
return clusts, AMs
# Check whether the affinity matrix scaling parameter
# are provided by the user, if so use them, otherwise
# calculate them
"""
scpars = np.zeros(len(criteria))
if user_scalpars is not None:
print("- Using user-provided scaling parameters")
user_scalpars = np.asarray(user_scalpars)
scpars[0:len(user_scalpars)] = user_scalpars
"""
scpars = np.array(user_scalpars)
print("- Start spectral clustering")
# Selecting the criteria and merging the matrices
escalpars = []
AM = np.ones(AMs[0,:,:].shape)
for i, crit in enumerate(criteria):
print("-- Rescaling %s matrix" % crit)
AMc, sigma = mat_smooth(AMs[i,:,:], S2Nmat, s2nlim = s2nlim,
scalpar = scpars[i], lscal = locscal)
AM = AM*AMc
escalpars.append(sigma)
# Making the reduced affinity matrices
mul_leav_mat = []
for mli in mul_leav_idx:
mul_leav_mat.append(all_leav_idx.index(mli))
mul_leav_mat = np.asarray(mul_leav_mat)
rAM = AM[mul_leav_mat,:]
rAM = rAM[:,mul_leav_mat]
if blind == False:
# Showing the final affinity matrix
plt.matshow(AM)
plt.colorbar()
plt.title('Final Affinity Matrix')
plt.xlabel('leaf index')
plt.ylabel('leaf index')
# Guessing the number of clusters
# if not provided
if user_k == 0:
kg = guessk(rAM)
else:
kg = user_k-len(two_clust_idx)
print("-- Guessed number of clusters = %i" % (kg+len(two_clust_idx)))
if kg > 1:
print("-- Number of k-means iteration: %i" % user_iter)
# Find the best cluster number
sils = []
min_ks = max(2,kg-15)
max_ks = min(kg+15,rAM.shape[0]-1)
clust_configs = []
for ks in range(min_ks,max_ks):
try:
evecs = spectral_embedding(rAM, n_components=ks,
eigen_solver='arpack',
random_state=222,
eigen_tol=0.0, drop_first=False)
_, all_clusters, _ = k_means(evecs, ks, random_state=222, n_init=user_iter)
sil = silhouette_score(evecs, np.asarray(all_clusters), metric='euclidean')
clust_configs.append(all_clusters)
except np.linalg.LinAlgError:
sil = 0
sils.append(sil)
# Use the best cluster number to generate clusters
best_ks = sils.index(max(sils))+min_ks
print("-- Best cluster number found through SILHOUETTE (%f)= %i" % (max(sils), best_ks+len(two_clust_idx)))
silhoutte = max(sils)
all_clusters = clust_configs[np.argmax(sils)]
else:
print("-- Not necessary to cluster")
all_clusters = np.zeros(len(all_leaves_idx), dtype = np.int32)
clust_branches = clust_cleaning(mul_leav_idx, all_clusters, dict_parents, dict_children, dict_ancestors, savebranches = save_branches)
clusts = clust_branches + two_clust_idx
print("-- Final cluster number (after cleaning) %i" % len(clusts))
# Calculate the silhouette after cluster cleaning
#fclusts_idx = np.ones(len(mul_leav_idx))
fclusts_idx = -1*all_clusters
i = 1
for clust in clusts:
i += 1
fleavs = dendrogram[clust].sorted_leaves()
fleavs_idx = []
for fleav in fleavs:
fleavs_idx.append(fleav.idx)
fleavs_idx = np.asarray(fleavs_idx)
# Find the position of the cluster leaves
pos = np.where(np.in1d(mul_leav_idx,fleavs_idx))[0]
fclusts_idx[pos] = i
oldclusts = np.unique(fclusts_idx[fclusts_idx < 0])
for oldclust in oldclusts:
fclusts_idx[fclusts_idx == oldclust] = np.max(fclusts_idx)+1
evecs = spectral_embedding(rAM, n_components=ks,
eigen_solver='arpack',
random_state=222,
eigen_tol=0.0, drop_first=False)
sil = silhouette_score(evecs, fclusts_idx, metric='euclidean')
print("-- Final clustering configuration silhoutte %f" % sil)
all_struct_types = np.asarray(all_struct_types)
all_struct_parents = np.asarray(all_struct_parents)
# Add the isolated leaves to the cluster list, if required
if save_isol_leaves:
isol_leaves = all_structures_idx[(all_struct_parents == -1) & (all_struct_types == 'L')]
clusts = clusts + list(isol_leaves)
print("SAVE_ISOL_LEAVES triggered. Isolated leaves added.")
print("-- Total cluster number %i" % len(clusts))
# Add the unclustered leaves within clusters to the cluster list, if required
if save_clust_leaves:
isol_leaves = all_structures_idx[(all_struct_parents == -1) & (all_struct_types == 'L')]
all_leaves = []
for leaf in dendrogram.leaves:
all_leaves.append(leaf.idx)
clust_leaves = []
for clust in clusts:
for leaf in dendrogram[clust].sorted_leaves():
clust_leaves.append(leaf.idx)
unclust_leaves = list(set(all_leaves)-set(clust_leaves + list(isol_leaves)))
clusts = clusts + unclust_leaves
print("SAVE_CLUST_LEAVES triggered. Unclustered leaves added.")
print("-- Total cluster number %i" % len(clusts))
# Update the catalog with new information
catalog['parent'] = all_struct_parents
catalog['ancestor'] = all_struct_ancestors
catalog['n_leaves'] = nleaves
catalog['structure_type'] = all_struct_types
return clusts, catalog, AMs, escalpars, silhoutte
[docs]class SpectralCloudstering(object):
"""
Apply the spectral clustering to find the best
cloud segmentation out from a dendrogram.
Parameters
-----------
dendrogram: 'astrodendro.dendrogram.Dendrogram' instance
The dendrogram to clusterize.
catalog: 'astropy.table.table.Table' instance
A catalog containing all properties of the dendrogram
structures. Generally generated with ppv_catalog module.
header: 'astropy.io.fits.header.Header' instance
The header of the fits data the dendrogram was
generated from. Necessary to obtain the assignment cubes.
criteria: list of strings
Clustering criteria referred to the structure properties
in the catalog (default ['volume', 'luminosity']).
user_k: int
The expected number of clusters, if not provided
it will be guessed automatically through the eigenvalues
of the unsmoothed affinity matrix.
user_ams: numpy array
User provided affinity matrix. Whether this is not
furnish it is automatically generated through the
volume and/or luminosity criteria.
user_scalpars: list of floats
User-provided scaling parameters to smooth the
affinity matrices.
user_iter: int
User-provided number of k-means iterations.
save_isol_leaves: bool
Consider the isolated leaves (without parent)
as individual 'clusters'. Useful for low
resolution data where the beam size
corresponds to the size of a Giant
Molecular Cloud.
save_clust_leaves: bool
Consider unclustered leaves as individual
'clusters'. This keyword will not include
the isolated leaves without parents.
save_all_leaves: bool
Trigger both save_isol_leaves and
save_clust_leaves.
save_branches: bool
Retain all isolated branches usually discarded
by the cluster analysis.
save_all: bool
Trigger all save_isol_leaves,
save_clust_leaves, and save_branches.
rms: int or float
Noise level of the observation. Necessary to
calculate the scaling parameter above a certain
signal-to-noise ratio.
s2nlim: int or float
Signal-to-noise limit above which the
scaling parameter is calculated. Needed
only if rms is not np.nan.
blind: bool
Show the affinity matrices.
Matplotlib required.
locscaling: bool
Smooth the affinity matrices using a local
scaling technique. This does not work well ...
Return
-------
clusters: list
The dendrogram branch indexes corresponding to the
identified clusters
catalog: 'astropy.table.table.Table' instance
The input catalog updated with dendrogram structure
parent, ancestor, number of leaves, and type
('T', trunks or branches without parent; 'B', branches
with parent; 'L', leaves).
affmats: numpy array
The affinity matrices calculated by the algorithm
escalpars: list
Estimated scaling parameters for the different
affinity matrixes
silhouette: float
Silhouette of the best cluster configuration
clusters_asgn: astropy.io.fits.hdu.image.PrimaryHDU
Assignment cube generated by the 'cluster'-type
structures.
trunks_asgn: astropy.io.fits.hdu.image.PrimaryHDU
Assignment cube generated by the 'trunk'-type
structures.
leaves_asgn: astropy.io.fits.hdu.image.PrimaryHDU
Assignment cube generated by the 'leaf'-type
structures.
"""
def __init__(self, dendrogram, catalog, header, criteria = ['volume', 'luminosity'],
user_k = None, user_ams = None, user_scalpars = None, user_iter = None,
save_isol_leaves = False, save_clust_leaves = False, save_branches = False,
save_all_leaves = False, save_all = False, blind = False, rms = np.nan, s2nlim = 3,
locscaling = False):
self.dendrogram = dendrogram
self.header = header
# Checking for user defined criteria existence in the catalog
for i, crit in enumerate(criteria):
if (crit not in catalog.colnames) & (crit != 'volume') & (crit != 'luminosity'):
print("WARNING: %s not in the catalog, removed from the criteria list" % crit)
criteria.pop(criteria.index(crit))
if len(criteria) == 0:
print("WARNING: criteria list empty, running on default criteria list, [volume, luminosity]")
criteria = ['volume', 'luminosity']
# Check for default criteria existence in the catalog
if ('luminosity' not in catalog.colnames) and ('luminosity' in criteria):
print("WARNING: adding luminosity = flux to the catalog.")
catalog['luminosity'] = catalog['flux']
if ('volume' not in catalog.colnames) and ('volume' in criteria):
print("WARNING: adding volume = pi * radius^2 * v_rms to the catalog.")
catalog['volume'] = np.pi*catalog['radius']**2*catalog['v_rms']
if len(criteria) > 1:
print("WARNING: clustering will be performed on the Aggregated matrix")
if save_all_leaves is True:
print("SAVE_ALL_LEAVES triggered: isolated leaves and unclustered leaves will be retained")
save_clust_leaves, save_isol_leaves = True, True
if save_all is True:
print("SAVE_ALL triggered: isolated leaves, unclustered leaves, and unclustered branches will be retained")
save_branches, save_clust_leaves, save_isol_leaves = True, True, True
self.criteria = criteria
self.user_k = user_k or 0
self.user_ams = user_ams
self.user_scalpars = user_scalpars or [None]*len(criteria)
self.user_iter = user_iter or 1
self.locscaling = locscaling
self.save_all = save_all
self.save_isol_leaves = save_isol_leaves
self.save_clust_leaves = save_clust_leaves
self.save_branches = save_branches
self.save_all_leaves = save_all_leaves
self.save_all = save_all
self.blind = blind
self.rms = rms
self.s2nlim = s2nlim
# Default colors in case plot_connected_colors is called before showdendro
self.colors = cycle('rgbcmyk')
self.clusters, self.catalog, self.affmats, self.escalpars, self.silhouette = cloudstering(self.dendrogram,
catalog,
self.criteria,
self.user_k,
self.user_ams,
self.user_scalpars,
self.user_iter,
self.save_isol_leaves,
self.save_clust_leaves,
self.save_branches,
self.blind,
self.rms,
self.s2nlim,
self.locscaling)
print("Generate assignment cubes...")
# Automatically generate assignment cubes
# ... of clusters
self.clusters_asgn = make_asgncube(self.dendrogram, self.clusters, self.header, collapse = False)
# ... of trunks
trunks = np.where(self.catalog['structure_type'] == 'T')[0]
self.trunks_asgn = make_asgncube(self.dendrogram, trunks, self.header, collapse = False)
# ... of leaves
leaves = np.where(self.catalog['structure_type'] == 'L')[0]
self.leaves_asgn = make_asgncube(self.dendrogram, leaves, self.header, collapse = False)
[docs] def showdendro(self, cores_idx=[], savefile=None):
"""
Show the clustered dendrogram.
Every color correspond to a
different cluster.
"""
dendro = self.dendrogram
if len(cores_idx) == 0:
cores_idx = self.clusters
# For the random colors
r = lambda: random.randint(0,255)
p = dendro.plotter()
fig = plt.figure(figsize=(14, 8))
ax = fig.add_subplot(111)
ax.set_yscale('log')
cols = []
# Plot the whole tree
p.plot_tree(ax, color='black')
for i in range(len(cores_idx)):
col = '#%02X%02X%02X' % (r(),r(),r())
cols.append(col)
p.plot_tree(ax, structure=[dendro[cores_idx[i]]], color=cols[i], lw=3)
ax.set_title("Final clustering configuration")
ax.set_xlabel("Structure")
ax.set_ylabel("Flux")
self.colors = cols
if savefile:
fig.savefig(savefile)
[docs] def plot_connected_clusters(self, **kwargs):
from plotting import dendroplot_clusters
return dendroplot_clusters(self.clusters, self.dendrogram, self.catalog,
colors=self.colors,
**kwargs)
if __name__ == '__main__':
SpectralCloudstering().run()