#!/usr/sfw/bin/python # -*- coding: iso-8859-15 -*- # C:\Python27\python.exe [adresse du fichier PartitionDistance.py]\PartitionDistance.py ProjetInfoling2012PartitionDeReference.txt ProjetInfoling2012PartitionExemple.txt import sys, os, re, string, time from math import * #------------------------------ # Partition Distance #------------------------------ # Two partition files given as parameters: # One element per line, # elements in the same cluster appear on consecutive lines, # clusters are separated by empty lines. #------------------------------ def openPartition(filename,format): fd = open(filename,"r") parti=[] lines = fd.readlines() # One element per line, # elements in the same cluster appear on consecutive lines, # clusters are separated by empty lines. set=[] for line in lines: # clean line: res2=re.search("^([^\n]+)[\n]*",line) if res2 : line=res2.group(1) if ((len(line)<1) or (line==" ") or (line=="\n")) and (len(set)>0): # add the set to the partition and start a new one: parti.append(set) set=[] else: # append an element to the set: set.append(line) if len(set)>0: parti.append(set) set=[] fd.close() return parti # partition given as a list of lists def ClustersToDico(partition): parti = {} i=0 for set in partition: for element in set: parti[element]=i i+=1 #print len(parti.keys()) return parti # part1 and part2: dico associating to each element its class number # parti1 and parti2: lists of clusters (cluster=list of elements) # When both partitions don't have the same number of elements, add singletons def completePartitions(part1,part2,parti1,parti2): for element in part1: if not(part2.has_key(element)): part2[element]=len(parti2) newElement=[] newElement.append(element) parti2.append(newElement) # part1 and part2: dico associating to each element its class number # parti1 and parti2: lists of clusters (cluster=list of elements) # http://en.wikipedia.org/wiki/Rand_index#Adjusted_Rand_index def adjustedRand(part1,part2,parti1,parti2): distance=0 matrix=[] i=0 while i0: selem+=element*(element-1)/2 j+=1 #print "compar",total,len(parti1[i]) a.append(total) i+=1 #print matrix srow=0 scol=0 for element in a: if element>0: srow+=element*(element-1)/2 for element in b: if element>0: scol+=element*(element-1)/2 expected=(srow*scol)*2.0/(1.0*n*(n-1)) #print selem," - ",expected, "-",srow,"/",scol,"n",n index=(selem-expected)/(0.5*(srow+scol)-expected) return 1-index # part1 and part2: dico associating to each element its class number # parti1 and parti2: lists of clusters (cluster=list of elements) # http://en.wikipedia.org/wiki/Rand_index#Rand_index def Rand(part1,part2,parti1,parti2): aa=0 bb=0 cc=0 dd=0 elements=part1.keys() i=0 while i