unit Unit1; interface uses Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs, StdCtrls, ComCtrls, ExtCtrls, Gauges, Grids, Math; type //============================================================================= Noeud= class private Fils : array[32..255] of Noeud; //liste de fils Lettre : integer; //lettre contenue dans ce noeud de l'arbre Entier : boolean; //flag indiquant si le mot est entier Etiquet : integer; //flag indiquant si le mot est entier Found : string; NbFound : integer; public constructor create; destructor destroy ; end; { Noeud } //arbre d'ordre 26 Arbre = class private tete : noeud ; public constructor create ; destructor destroy ;override ; procedure charger(NomFic : string) ; function rajouter_mot(LeMot : string;etiquet:integer;courant:Noeud):Noeud; procedure fromStrings(chaines:Tstrings);//chaines are like word,etiquet function Trouve(LeMot: string;courant:Noeud) : Noeud; function toString(en_cours : noeud;trouve:string) : string; function compter_terminaux(en_cours : noeud) : integer ; procedure updateNode(courant:Noeud;found:string;nbfound:integer); end ; PhyloNode= class private public Fils : Array [0..1] of PhyloNode; //liste de fils weight : real; //length of the edge leading to the node angle : real; //angle of this edge minangle : real; //minimal angle below this node maxangle : real; //maximal angle below this node size : integer; //font size of the label of the node r : integer; //red color of the label of the node g : integer; //green color of the label of the node b : integer; //blue color of the label of the node x : real; //coordinates of the node in a precomputing of the drawing //(before stretching to adapt to the window size) y : real; refnumber : integer; //number of the line to look for the nb of occurences in Frequency.lines name : string; constructor create() ; destructor destroy ; procedure draw(image:TImage); procedure computeDrawing(x:real;y:real); function toString():string; function sortLeaves(sofar:integer):integer; end; { Noeud } PhyloTree= class private public r:integer; //red color of the edges g:integer; b:integer; xmin:real; //x min among all nodes of the tree (computed during drawing precomputing) xmax:real; //... ymin:real; ymax:real; root : PhyloNode; angle:real; //general angle to add to the tree (to rotate it) constructor create() ; destructor destroy ; procedure draw(Image:Timage); //drawing of the tree using the precomputed positions procedure computeDrawing(); //precomputing of the positions of the node function toString():string; function sortLeaves(sofar:integer):integer; //computes the angles of all edges of the tree in a bottom up procedure //EqualAngle algorithm = sortLeaves + computeDrawing + draw end; { Noeud } //============================================================================= TForm1 = class(TForm) RichEdit1: TRichEdit; OuvrirTexte: TButton; OpenDialog1: TOpenDialog; Gauge1: TGauge; Button1: TButton; Edit1: TEdit; Label1: TLabel; Frequencies: TRichEdit; Button2: TButton; StringGrid1: TStringGrid; ListBox1: TListBox; ListBox2: TListBox; Nexus: TRichEdit; Antidico: TCheckBox; AntidicoWords: TRichEdit; Label2: TLabel; ColorR: TRichEdit; ColorG: TRichEdit; ColorB: TRichEdit; Font: TRichEdit; RichEdit3: TRichEdit; splitstree: TButton; OpenDialog2: TOpenDialog; OrderedTaxa: TListBox; Label3: TLabel; RadioGroup1: TRadioGroup; WindowLabel: TLabel; WindowSize: TEdit; AntiDicoWordsEng: TRichEdit; AntidicoENG: TCheckBox; Parametre: TLabel; ListBox3: TListBox; TheFreqs: TRichEdit; found: TRichEdit; Label093: TLabel; Label4: TLabel; Label5: TLabel; gardes: TLabel; Button3: TButton; Label6: TLabel; Newick: TRichEdit; ShowTree: TButton; MatDist: TStringGrid; Color: TButton; OtherFreq: TRichEdit; LangEng: TRichEdit; LangFra: TRichEdit; function closestPoint(image:Timage;node:PhyloNode;rx:integer;ry:integer):PhyloNode; function treeFromString(chaine:string):PhyloTree; function fromString(chaine:string):PhyloNode; function elague(machin:string):string; function sansAccent(machin:string):string; procedure Langue(); procedure RemplitFreq(Courant: Noeud); procedure onlyMostFreq(Courant: Noeud;sofar:string); procedure FormCreate(Sender: TObject); procedure FormDestroy(Sender: TObject); procedure OuvrirTexteClick(Sender: TObject); procedure Button1Click(Sender: TObject); procedure Button2Click(Sender: TObject); procedure ColorClick(Sender: TObject); procedure RadioGroup1Click(Sender: TObject); procedure Label5Click(Sender: TObject); procedure UPGMA(Sender: TObject); procedure TextOutAngle(Canvas: TCanvas; P: TPoint; Angle: integer; const s: string); procedure ShowTreeClick(Sender: TObject); procedure splitstreeClick(Sender: TObject); private { Déclarations privées } public { Déclarations publiques } end; var Form1: TForm1; dico1,lantidico,lantidicoENG : arbre ; forbidden,replace,replaceby:array [1..50] of string; tree: PhyloTree; nbwords: integer; lang: TRichedit; implementation uses Unit2; {$R *.DFM} function TForm1.closestPoint(image:Timage;node:PhyloNode;rx:integer;ry:integer):PhyloNode; var tempNode:phylonode; a,b:real; begin result:=node; a:=(image.width-2*form2.horiz.position)/(tree.xmax-tree.xmin); b:=(image.height-2*form2.vertic.position)/(tree.ymax-tree.ymin); if node.fils[0]<>nil then begin result:=closestPoint(image,node.fils[0],rx,ry); tempNode:=closestPoint(image,node.fils[1],rx,ry); if ( abs(a*(tempNode.x-tree.xmin)+form2.horiz.position-rx) +abs(b*(tempNode.y-tree.ymin)+form2.vertic.position-ry) ) < ( abs(a*(result.x-tree.xmin)+form2.horiz.position-rx) +abs(b*(result.y-tree.ymin)+form2.vertic.position-ry) ) then result:=tempNode; end end; procedure TForm1.TextOutAngle(Canvas: TCanvas; P: TPoint; Angle: integer; const s: string); { display the text S with angle (0 to 360°) on the canvas at P } var LogRec: TLogFont; OldFont, NewFont: HFont; w, h, Col: integer; ACos, ASin : Extended; begin with Canvas do begin PenPos:=P; if length(s)=0 then exit; GetObject(Font.Handle, SizeOf(LogRec), @LogRec); LogRec.lfEscapement := Angle*10; LogRec.lfOrientation := Angle*10; LogRec.lfOutPrecision := Out_TT_Only_Precis; NewFont := CreateFontIndirect(LogRec); OldFont := SelectObject(Canvas.Handle, NewFont); SinCos(Angle*pi/180, ASin, ACos); w:=TextWidth(s); if Brush.style=bsSolid then begin { going another delphi bug (do not paint the rectangle of textout with brush on some printer (screen ok) } h:=TextHeight(s); col:=Pen.color; Pen.color:=brush.color; Polygon([P, Point(P.x+round(h*Asin), P.y+round(h*Acos)), Point(P.x+round(w*Acos+h*Asin), P.y+round(-w*Asin+h*Acos)), Point(P.x+round(w*Acos), P.y+round(-w*Asin) ), P ]); Pen.Color:=Col; end; Windows.TextOut(Canvas.Handle, P.X, P.Y, pchar(S), length(s)); MoveTo( round(p.x+w*ACos), round(p.y-w*ASin) ); // new pen position at end of text NewFont := SelectObject(Canvas.Handle, OldFont); DeleteObject(NewFont); end; end; function TForm1.elague (machin:string):string; var caract,chaine:string; i:integer; begin i:=1; while (i<50) and (forbidden[i]<>'') do begin while pos(forbidden[i],machin)>0 do begin machin:=copy(machin,0,pos(forbidden[i],machin)-1) +' '+copy(machin,pos(forbidden[i],machin)+length(forbidden[i]),length(machin)); end; inc(i); end; while pos(' ',machin)>0 do begin machin:=copy(machin,0,pos(' ',machin)-1) +copy(machin,pos(' ',machin)+1,length(machin)); end; result:=machin end; function TForm1.sansAccent(machin:string):string; var caract,chaine:string; i:integer; begin i:=1; while (i<50) and (replace[i]<>'') do begin while pos(replace[i],machin)>0 do begin machin:=copy(machin,0,pos(replace[i],machin)-1) +replaceby[i]+copy(machin,pos(replace[i],machin)+length(replace[i]),length(machin)); end; inc(i); end; result:=machin end; constructor PhyloNode.create(); var c : integer ; begin for c:=0 to 1 do Fils[c]:=nil; weight:=0; angle:=0; name:=''; size:=10; r:=0; g:=0; b:=0; x:=50000; y:=50000; refnumber:=0; end ; constructor PhyloTree.create(); begin root:=PhyloNode.create; angle:=0; r:=200; g:=210; b:=255; xmin:=100000; ymin:=100000; xmax:=-1; ymax:=-1; end ; //Recursively computes a PhyloTree from a newick string function TForm1.treeFromString(chaine:string):PhyloTree; begin result:=PhyloTree.create; result.root:=fromString(chaine); end; //Recursively computes a tree (PhyloNode) from a newick string function TForm1.fromString(chaine:string):PhyloNode; var temp:string; continue:boolean; open,closed,i:integer; begin open:=0; closed:=0; i:=2; if chaine[1]<>'(' then begin result:=PhyloNode.create(); result.name:=chaine; for i:=1 to Stringgrid1.ColCount-1 do begin if Stringgrid1.Cells[0,i]=chaine then begin result.refnumber:=i; end; end; end else begin i:=2; continue:=true; while (i<=length(chaine)) and (continue) do begin if chaine[i]='(' then inc(open); if chaine[i]=')' then inc(closed); if chaine[i]=',' then begin if open=closed then continue:=false else inc(i); end else inc(i); end; //i is the position of the comma "in the middle" result:=PhyloNode.create(); //showmessage('fromstring '+copy(chaine,2,i-2)); result.fils[0]:=fromString(copy(chaine,2,i-2)); chaine:=copy(chaine,i+1,length(chaine)); //showmessage('fromstring '+copy(chaine,1,length(chaine)-1)); result.fils[1]:=fromString(copy(chaine,1,length(chaine)-1)); end; end; destructor PhyloTree.destroy ; begin root.destroy; inherited destroy; end; destructor PhyloNode.destroy ; var c : integer ; begin for c:=0 to 200 do if fils[c]<>nil then fils[c].destroy ; inherited destroy ; end ; function PhyloTree.toString:string; begin root.toString; end; function PhyloNode.toString:string; var c : integer ; begin if Fils[0]<>nil then result:='('+Fils[0].toString+','+Fils[1].toString+')'//+':'+floattostr(angle) else result:=name+':'+floattostr(angle); end; //Compute the angles of the edges through depth-first search function PhyloNode.sortLeaves(sofar:integer):integer; var c : integer ; isleaf:boolean; begin result:=sofar; isleaf:=true; for c:=0 to 1 do begin if Fils[c]<>nil then begin isleaf:=false; result:=Fils[c].sortLeaves(result); end; end; if isleaf then begin angle:=sofar*2*pi/(form1.richedit1.lines.count+1)+tree.angle; minangle:=angle; maxangle:=angle; weight:=1; result:=sofar+1; end else begin if fils[0].minanglefils[1].maxangle then maxangle:=fils[0].maxangle else maxangle:=fils[1].maxangle; angle:=(minangle+maxangle)/2; end; end; function PhyloTree.sortLeaves(sofar:integer):integer; begin result:=root.sortleaves(sofar); end; procedure PhyloTree.computeDrawing(); begin root.computeDrawing(50000,50000) end; { Precompute through depth-first search the positions of the nodes, assuming all edges have length 50 It will then be rescaled to be drawn. Positions of the labels are NOT computed here. x and y gives the position of the currently treated node. } procedure PhyloNode.computeDrawing(x:real;y:real); var newx,newy:integer; begin if Fils[0]<>nil then begin Fils[0].x:=Math.floor(x+50*cos(Fils[0].angle)); Fils[0].y:=Math.floor(y+50*sin(Fils[0].angle)); if self=tree.root then begin Fils[0].x:=x; Fils[0].y:=y; end; if Fils[0].xtree.xmax then tree.xmax:=Fils[0].x; if Fils[0].y>tree.ymax then tree.ymax:=Fils[0].y; Fils[0].computedrawing(Fils[0].x,Fils[0].y); end; if Fils[1]<>nil then begin Fils[1].x:=Math.floor(x+50*cos(Fils[1].angle)); Fils[1].y:=Math.floor(y+50*sin(Fils[1].angle)); if Fils[1].xtree.xmax then tree.xmax:=Fils[1].x; if Fils[1].y>tree.ymax then tree.ymax:=Fils[1].y; Fils[1].computedrawing(Fils[1].x,Fils[1].y); end; end; procedure PhyloTree.draw(image:TImage); begin image.Canvas.Rectangle(0,0,image.width,image.height); root.draw(image); end; { Compute through depth-first search the positions of the nodes, adapting the scale to fit the image given as input, using the width and height computed in the precomputing computeDrawing. This step should be fast as it is executed each time the window is rescaled or the tree is rotated. Positions of the labels ARE computed here. } procedure PhyloNode.draw(image:TImage); var a,b,newx,newy:real; langle,nombre,importance,max,min,i,level:integer; begin max:=0; min:=strtoint(copy(form1.frequencies.lines[1],pos(';',form1.frequencies.lines[1])+1,length(form1.frequencies.lines[1]))); //Compute the min and max number of occurrences of words treated, to color properly the labels later. for i:=1 to form1.Frequencies.lines.count-1 do begin importance:=strtoint(copy(form1.frequencies.lines[i],pos(';',form1.frequencies.lines[i])+1,length(form1.frequencies.lines[i]))); if importance>max then max := importance; if importancenil then begin if Fils[i].name<>'' then begin //Compute the labels: fonts and position Image.Canvas.Brush.style := BsClear; { First retrieve the nb of occurrences of the word (in the Richedit Frequencies, at the line stored in the property refnumber of the PhyloNode) } nombre:=Fils[i].refnumber; //showmessage(Fils[i].name+'-'+inttostr(Fils[i].refnumber)+'-'+copy(form1.frequencies.lines[nombre],pos(';',form1.frequencies.lines[nombre])+1,length(form1.frequencies.lines[nombre]))); importance:=strtoint(copy(form1.frequencies.lines[nombre],pos(';',form1.frequencies.lines[nombre])+1,length(form1.frequencies.lines[nombre]))); level:=Math.floor(1+9.99999*((math.log2(importance)-math.log2(min))/(math.log2(max)-math.log2(min)))); //Use Label1 and its autosize property to know the width and height of the label. form2.Label1.Caption:=Fils[i].name; form2.label1.font.color:=RGB(strtoint(form1.colorR.lines[level]),strtoint(form1.colorG.lines[level]),strtoint(form1.colorB.lines[level])); form2.label1.font.size:=8+level; //Compute the position of the upper left corner of the label newx:=(Fils[i].x-tree.xmin)*a+(form2.label1.width+4)/2*cos(angle)-form2.label1.Width/2+form2.horiz.position; newy:=(Fils[i].y-tree.ymin)*b+(form2.label1.Height+4)/2*sin(angle)-form2.label1.Height/2+form2.vertic.position; Image.Canvas.Font:=form2.label1.font; if Form2.RadioButton2.Checked then //Display the label horizontally Image.canvas.TextOut(Math.Floor(newx),Math.Floor(newy),Fils[i].name) else begin //Display the label with the same angle as the edge langle:=180-Math.floor(Fils[i].angle/pi*180); if ((langle>=90) and (langle<=180)) or ((langle>=-180) and (langle<=-90)) then langle:=langle+180; //to avoid the labels being upside down Form1.TextOutAngle(Image.canvas, Point(Math.floor(a*(Fils[i].x-tree.xmin))+form2.horiz.position,Math.floor(b*(Fils[i].y-tree.ymin)+form2.vertic.position)),langle,Fils[i].name); end; Image.Canvas.Brush.style := BsSolid; end; //draw the edge to the currently treated son: image.Canvas.MoveTo(Math.floor((x-tree.xmin)*a)+form2.horiz.position,Math.floor((y-tree.ymin)*b)+form2.vertic.position); image.Canvas.LineTo(Math.floor(a*(Fils[i].x-tree.xmin))+form2.horiz.position,Math.floor(b*(Fils[i].y-tree.ymin)+form2.vertic.position)); //recursively apply the drawing to the son: Fils[i].draw(image); end; end; end; constructor noeud.create ; var c : integer ; begin for c:=32 to 255 do fils[c]:=nil ; lettre:=0; etiquet:=0; entier:=false; found:=''; nbfound:=0; end ; destructor noeud.destroy ; var c : integer ; begin for c:=32 to 255 do if fils[c]<>nil then fils[c].destroy ; inherited destroy ; end ; constructor arbre.create ; var i : integer ; c : integer ; begin tete:=noeud.create; end ; destructor arbre.destroy ; begin tete.destroy ; inherited destroy ; end ; procedure arbre.updateNode(courant:Noeud;found:string;nbfound:integer); begin showmessage(courant.found+'-'+inttostr(courant.nbfound)); courant.found:=found; courant.nbfound:=nbfound; end; function arbre.rajouter_mot(LeMot : string;etiquet:integer;courant:Noeud):Noeud; var lettre : integer ; begin if LeMot='' then begin courant.entier:=true; courant.etiquet:=etiquet; courant.nbfound:=0; result:=courant; exit ; end; lettre:=Ord(LeMot[1]); if (courant.fils[lettre]=nil) then //il faut créer cette lettre dans l'arbre begin courant.fils[lettre]:=noeud.create ; courant.lettre:=lettre ; // la lettre est maintenant dans l'arbre end ; delete(LeMot,1,1) ; // on efface la lettre du mot puisqu'elle est déjà dans l'arbre result:=rajouter_mot(LeMot,etiquet,courant.fils[lettre]) ; // et on rajoute le reste end ; procedure arbre.charger(NomFic : string) ; var s,chemin : string ; f : textfile ; begin // chargement du dico chemin:=ExtractFilePath(Application.ExeName); {le chemin de l'appli, AVEC l'antislash final} assignFile(f,chemin+nomFic) ; reset(f) ; repeat readln(f,s) ; rajouter_mot(s,0,tete) ; // et on rajoute le mot until eof(f) ; closeFile(f) ; end ; function arbre.trouve(LeMot : string;courant:Noeud) : noeud ; var lettre : integer ; begin trouve:=nil; if LeMot='' then //--- incomplet ; tester aussi "entier" if courant.Entier then trouve:=courant else trouve:=nil else begin lettre:=Ord(LeMot[1]); delete(LeMot,1,1) ; if (lettre>31) and (lettre<256) then if (courant.fils[lettre]=nil) then trouve:=nil else begin trouve:=trouve(LeMot,courant.fils[lettre]) ; end ; end ; end ; function arbre.compter_terminaux(en_cours : noeud) : integer ; var i : integer ; total : integer ; begin // fonction valable aussi pour un arbre non équilibré total:=0 ; for i:=32 to 255 do begin if en_cours.fils[i]<>nil then inc(total,compter_terminaux(en_cours.fils[i])) ; end; if total=0 then inc(total) ; compter_terminaux:=total ; end ; { function arbre.toString(en_cours : noeud) : string ; var i:integer; chaine:string; c1,c2,c3:string; begin chaine:=''; if en_cours.Entier then c1:='1' else c1:='0'; chaine:=chaine+('['+Chr(en_cours.Lettre)+','+c1+','+inttostr(en_cours.Etiquet)+']('); for i:=32 to 255 do begin if en_cours.fils[i]<>nil then begin chaine:=chaine+toString(en_cours.fils[i]); end; end; chaine:=chaine+')'; result:=chaine; end ; } procedure arbre.fromStrings(chaines:Tstrings);//chaines are like word,etiquet var i:integer; chaine:string; c1,c2,c3:string; begin for i:=0 to chaines.Count-1 do begin rajouter_mot(copy(chaines.Strings[i],0,pos(',',chaines.Strings[i])-1), strtoint(copy(chaines.Strings[i],pos(',',chaines.Strings[i])+1,length(chaines.Strings[i]))),tete) end; end; function arbre.toString(en_cours : noeud;trouve:string) : string ; var i:integer; chaine:string; c1,c2,c3:string; begin if en_cours.Entier then begin chaine:=trouve+','+inttostr(en_cours.etiquet)+','; end; for i:=32 to 255 do begin if en_cours.fils[i]<>nil then begin //showmessage(inttostr(en_cours.etiquet)+' '+trouve+'='+trouve+chr(i)); chaine:=chaine+toString(en_cours.fils[i],trouve+chr(i)); end; end; result:=chaine; end ; //============================================================================= procedure TForm1.Remplitfreq(Courant: Noeud); var i,nombre:integer; begin if Courant.Entier then begin nombre:=courant.nbfound; thefreqs.Lines[nombre]:=inttostr(strtoint(thefreqs.lines[nombre])+1); end; for i:=32 to 255 do begin if courant.fils[i]<>nil then begin Remplitfreq(courant.fils[i]); end; end; end; procedure TForm1.OnlyMostFreq(Courant: Noeud;sofar:string); var j,i,nombre:integer; begin if Courant.Entier then begin nombre:=courant.nbfound; if (nombre>=strtoint(edit1.text)) then begin if not(((antidico.Checked) and ((sofar='') or (lantidico.Trouve(sofar,lantidico.tete)<>nil))) or ((antidicoENG.Checked) and ((sofar='') or (lantidicoENG.Trouve(sofar,lantidicoENG.tete)<>nil)))) then begin j:=0; Frequencies.lines.add(sofar+';'+inttostr(nombre)); //LOAD OTHER FREQUENCIES : { while (jsofar) do begin inc(j); end; try Frequencies.lines.add(sofar+';'+inttostr(Math.floor(strtofloat(copy(otherfreq.lines[j],pos(';',otherfreq.lines[j])+1,length(otherfreq.lines[j])))))); except frequencies.lines.add(sofar+';10'); end; } found.lines.add(courant.Found); richedit1.lines.add(sofar+','+inttostr(found.lines.count-1)); end; end; end; for i:=32 to 255 do begin if courant.fils[i]<>nil then begin OnlyMostFreq(courant.fils[i],sofar+char(i)); end; end; end; procedure TForm1.Langue(); begin Form1.ouvrirTexte.caption:=lang.lines[0]; Form1.Label1.caption:=lang.lines[1]; Form1.Label093.caption:=lang.lines[2]; Form1.Antidico.caption:=lang.lines[3]; Form1.AntidicoENG.caption:=lang.lines[4]; Form1.Button1.caption:=lang.lines[5]; Form1.gardes.caption:=lang.lines[6]; Form1.Radiogroup1.caption:=lang.lines[7]; Form1.RadioGroup1.items[0]:=lang.lines[8]; Form1.RadioGroup1.items[1]:=lang.lines[9]; Form1.RadioGroup1.items[2]:=lang.lines[10]; Form1.RadioGroup1.items[3]:=lang.lines[11]; Form1.WindowLabel.caption:=lang.lines[12]; Form1.Button2.caption:=lang.lines[13]; Form1.Richedit3.clear; Form1.Richedit3.lines.add(lang.lines[14]); Form1.splitstree.caption:=lang.lines[15]; Form1.Color.caption:=lang.lines[16]; Form1.Button3.caption:=lang.lines[17]; Form1.ShowTree.caption:=lang.lines[18]; Form1.Label3.caption:=lang.lines[19]; Form1.Label6.caption:=lang.lines[20]; Form1.Label2.caption:=lang.lines[21]; Form1.Label4.caption:=lang.lines[22]; Form1.Label5.caption:=lang.lines[23]; Form1.parametre.caption:=lang.lines[36]; end; procedure TForm1.FormCreate(Sender: TObject); var i:integer; begin lang:=LangEng; langue(); if fileexists(extractfilepath(application.exename)+'AntidicoFRA.txt') then begin antidicoWords.lines.loadfromfile(extractfilepath(application.exename)+'\AntidicoFRA.txt'); end; if fileexists(extractfilepath(application.exename)+'AntidicoENG.txt') then begin AntiDicoWordsEng.lines.loadfromfile(extractfilepath(application.exename)+'\AntidicoENG.txt'); end; dico1:=arbre.create ; forbidden[1]:=','; forbidden[2]:=';'; forbidden[3]:='.'; forbidden[4]:='('; forbidden[5]:=')'; forbidden[6]:=':'; forbidden[7]:='?'; forbidden[8]:='!'; forbidden[9]:='/'; forbidden[10]:='_'; //forbidden[11]:='-'; forbidden[11]:='_'; forbidden[12]:=''''; forbidden[13]:='`'; forbidden[14]:='’'; forbidden[15]:='…'; forbidden[16]:='»'; forbidden[17]:='«'; forbidden[18]:='"'; forbidden[19]:='['; forbidden[20]:=']'; forbidden[21]:='{'; forbidden[22]:='}'; forbidden[23]:=char(9); for i:=24 to 50 do begin forbidden[i]:=''; end; replace[1]:='é'; replaceby[1]:='e'; replace[2]:='è'; replaceby[2]:='e'; replace[3]:='ê'; replaceby[3]:='e'; replace[4]:='ë'; replaceby[4]:='e'; replace[5]:='ç'; replaceby[5]:='c'; replace[6]:='à'; replaceby[6]:='a'; replace[7]:='â'; replaceby[7]:='a'; replace[8]:='ù'; replaceby[8]:='u'; replace[9]:='ô'; replaceby[9]:='o'; replace[10]:='û'; replaceby[10]:='u'; replace[11]:='œ'; replaceby[11]:='oe'; replace[12]:='Œ'; replaceby[12]:='oe'; replace[13]:='Ê'; replaceby[13]:='e'; replace[14]:='Ë'; replaceby[14]:='e'; replace[15]:='Ç'; replaceby[15]:='c'; replace[16]:='À'; replaceby[16]:='a'; replace[17]:='Â'; replaceby[17]:='a'; replace[18]:='Ù'; replaceby[18]:='u'; replace[19]:='Ô'; replaceby[19]:='o'; replace[20]:='Û'; replaceby[20]:='u'; replace[21]:='É'; replaceby[21]:='e'; replace[22]:='È'; replaceby[22]:='e'; for i:=23 to 50 do begin replace[i]:=''; replaceby[i]:=''; end; lantidico:=arbre.create ; lantidicoENG:=arbre.create ; for i:=0 to antidicoWords.Lines.count-1 do begin lantidico.rajouter_mot(lowercase(antidicoWords.lines[i]),0,lantidico.tete); end; for i:=0 to antidicoWordsENG.Lines.count-1 do begin lantidicoENG.rajouter_mot(lowercase(antidicoWordsENG.lines[i]),0,lantidicoENG.tete); end; end; procedure TForm1.FormDestroy(Sender: TObject); begin Dico1.Destroy; end; procedure TForm1.OuvrirTexteClick(Sender: TObject); var maxfreq,size,i,j,isfound,sofar:integer; temp,chaine:string; nodefound,lenoeud:noeud; begin //otherfreq.lines.LoadFromFile('C:\Sites\PG\Blog\200803TreeCloud\ExperiencesVariees\PNAuthorsCoeff.txt'); if opendialog1.Execute then begin OuvrirTexte.Enabled:=false; richedit1.Lines.loadfromfile(opendialog1.FileName); size:=0; nbwords:=0; maxfreq:=0; gauge1.MaxValue:=richedit1.Lines.count; gauge1.MinValue:=0; for i:=0 to richedit1.lines.count-1 do begin gauge1.MaxValue:=richedit1.Lines.count+2*maxfreq; gauge1.progress:=i+1; chaine := richedit1.lines[i]+' '; while pos(' ',chaine)>0 do begin inc(nbwords); temp:=lowercase(elague(copy(chaine,0,pos(' ',chaine)-1))); nodefound:=dico1.trouve(temp,dico1.tete); if nodefound=nil then begin lenoeud:=dico1.rajouter_mot(temp,size,dico1.tete); lenoeud.nbfound:=1 ; lenoeud.found:=inttostr(nbwords)+','; inc(size); end else begin nodefound.found:=nodefound.Found+(inttostr(nbwords))+','; nodefound.nbfound:=nodefound.nbfound+1; if nodefound.nbfound>maxfreq then maxfreq:=nodefound.nbfound; end; chaine := copy(chaine,pos(' ',chaine)+1,length(chaine)); end; end; richedit1.lines.clear; thefreqs.lines.clear; thefreqs.lines.add(lang.lines[33]); for i:=0 to maxfreq do begin thefreqs.lines.add('0'); gauge1.Progress:=gauge1.progress+1; end; RemplitFreq(dico1.tete); sofar:=0; for i:=thefreqs.lines.count-1 downto 1 do begin gauge1.Progress:=gauge1.progress+1; if (sofar<100) and (sofar+strtoint(thefreqs.Lines[i])>=100) then edit1.Text:=inttostr(i); sofar:=sofar+strtoint(thefreqs.Lines[i]); thefreqs.lines[i]:=inttostr(i)+lang.lines[34]+thefreqs.lines[i]+lang.lines[35]+inttostr(i)+lang.lines[34]+inttostr(sofar); end; { edit1.Text:=inttostr(1+Math.floor(nbwords/Math.LnXP1(size-1)/100)); chaine:=dico1.toString(dico1.tete,''); while pos(',',chaine)>0 do begin temp:=copy(chaine,0,pos(',',chaine)); chaine:=copy(chaine,pos(',',chaine)+1,length(chaine)); temp:=temp+copy(chaine,0,pos(',',chaine)-1); chaine:=copy(chaine,pos(',',chaine)+1,length(chaine)); richedit1.Lines.add(temp); end; } //showmessage(inttostr(length(dico1.toString(dico1.tete)))); //showmessage(inttostr(dico1.compter_terminaux(dico1.tete))) end; end; procedure TForm1.Button1Click(Sender: TObject); var mot:string; ref,i,j,nbvirg:integer; begin i:=0; Frequencies.lines.clear; Frequencies.lines.add(' ; '); found.lines.clear;//Contains the locations where most frequent words are found. richedit1.lines.clear;//word+','+pointer to the corresponding line in "found". onlyMostFreq(dico1.tete,''); gauge1.MinValue:=0; gauge1.MaxValue:=Frequencies.Lines.count; gardes.Visible:=true; gardes.caption:=inttostr(frequencies.lines.count-1)+lang.lines[6]; { while inil))) or ((antidicoENG.Checked) and ((mot='') or (lantidicoENG.Trouve(mot,lantidicoENG.tete)<>nil))) then begin //!!found.lines[ref]:=''; richedit1.Lines.delete(i); end else begin Frequencies.lines.add( copy(richedit1.Lines[i],0,1+pos(',',richedit1.Lines[i])-2) +';'+inttostr(nbvirg) ); inc(i); end; end; end; } end; procedure TForm1.Button2Click(Sender: TObject); var minii,minjj,i,j,ii,jj,ci,cj,ref,mindist,compteur,window,n12,n21,n1,n2:integer; tempdist,distmax,totdist:real; chaine,temp,ligne:string; begin button3.enabled:=true; StringGrid1.ColCount:=richedit1.Lines.Count+1; StringGrid1.RowCount:=richedit1.Lines.Count+1; gauge1.MinValue:=0; gauge1.MaxValue:=richedit1.lines.count; distmax:=-1; for i:=0 to richedit1.lines.count-1 do begin gauge1.progress:=1+i; Stringgrid1.Cells[0,i+1]:=copy(richedit1.lines[i],0,pos(',',richedit1.lines[i])-1); Stringgrid1.Cells[i+1,0]:=Stringgrid1.Cells[0,i+1]; Stringgrid1.Cells[i+1,i+1]:='0'; ref:=strtoint(copy(richedit1.Lines[i], 1+pos(',',richedit1.Lines[i]), length(richedit1.Lines[i]))); listbox1.Items.Clear; chaine:=found.lines[ref]; n1:=0; ci:=1; temp:=''; while ci<=length(chaine) do begin if chaine[ci]<>',' then temp:=temp+chaine[ci] else begin listbox1.Items.add(temp); temp:=''; inc(n1); end; inc(ci); end; listbox3.Items:=listbox1.items; for j:=0 to richedit1.Lines.Count-1 do begin //compute min distance between i and j listbox2.clear; ref:=strtoint(copy(richedit1.Lines[j], 1+pos(',',richedit1.Lines[j]), length(richedit1.Lines[j]))); listbox2.Items.Clear; chaine:=found.lines[ref]; n2:=0; cj:=1; temp:=''; while cj<=length(chaine) do begin if chaine[cj]<>',' then temp:=temp+chaine[cj] else begin listbox2.Items.add(temp); temp:=''; inc(n2); end; inc(cj); end; if (radiogroup1.ItemIndex>=1) then begin //compute cooccurence distance between word1 and word2 window:=Math.floor(strtoint(Windowsize.Text)/2); ii:=0; jj:=0; n12:=0; while (ii0) and (n2>0) then stringgrid1.Cells[i+1,j+1]:=floattostr(Math.Log2(101)-(Math.Log2(1+max(100*n12/n1,100*n21/n2)))) else stringgrid1.Cells[i+1,j+1]:=floattostr(Math.Log2(101)); end else if radioGroup1.ItemIndex=2 then begin if (n1>0) and (n2>0) then stringgrid1.Cells[i+1,j+1]:=floattostr(1-(max(n12/n1,n21/n2))) else stringgrid1.Cells[i+1,j+1]:=floattostr(1); end else if radioGroup1.ItemIndex=3 then begin if (n12>0) and (n21>0) then begin tempdist:=(min(Math.Log2(n1/n12),Math.Log2(n2/n21)))/ //max(1/p(y|x),1/p(x|y)) (max(Math.Log2(nbwords/(n1*window*2)),Math.Log2(nbwords/(n2*window*2)))); // max(1/p(x),1/p(y)) if tempdist>=distmax then distmax:=tempdist; stringgrid1.Cells[i+1,j+1]:=floattostr(tempdist); end else stringgrid1.Cells[i+1,j+1]:='inf'; end; stringgrid1.Cells[j+1,i+1]:=stringgrid1.Cells[i+1,j+1]; end else begin //compute minimum text distance between word1 and word2 totdist:=0; listbox1.Items:=listbox3.items; for compteur:=1 to max(1,strtoint(WindowSize.text)) do begin ii:=0; minii:=0; jj:=0; minjj:=0; //showmessage(inttostr(listbox1.items.count)+' '+inttostr(listbox2.items.count)); mindist:=abs(strtoint(listbox1.items[ii])-strtoint(listbox2.items[jj])); while (ii0 do ligne:=copy(ligne,0,pos('''',ligne)-1)+' '+copy(ligne,pos('''',ligne)+1,length(ligne)); { if length(ligne)>13 then ligne:=copy(ligne,1,12); } nexus.lines.add('['+inttostr(i)+'] '''+ligne+''''); end; nexus.lines.add('; '); nexus.lines.add('END; [Taxa]'); nexus.lines.add('BEGIN Distances;'); nexus.lines.add('DIMENSIONS ntax='+inttostr(Stringgrid1.ColCount-1)+';'); nexus.lines.add('FORMAT labels=left diagonal triangle=both;'); nexus.lines.add('MATRIX'); for i:=1 to Stringgrid1.ColCount-1 do begin ligne:=sansaccent(Stringgrid1.Cells[0,i]); while pos('''',ligne)>0 do ligne:=copy(ligne,0,pos('''',ligne)-1)+' '+copy(ligne,pos('''',ligne)+1,length(ligne)); { if length(ligne)>13 then ligne:=copy(ligne,1,12); } ligne:=''''+ligne+''''; while length(ligne)<15 do ligne:=ligne+' '; ligne:='['+inttostr(i)+'] '+ligne; for j:=1 to Stringgrid1.ColCount-1 do begin chaine := StringGrid1.Cells[i,j]; while pos(',',chaine)>0 do chaine:=copy(chaine,0,pos(',',chaine)-1)+'.'+copy(chaine,pos(',',chaine)+1,length(chaine)); ligne := ligne+' '+chaine; end; nexus.lines.add(ligne); end ; nexus.lines.add(';'); nexus.lines.add('END; [Distances]'); end; procedure TForm1.ColorClick(Sender: TObject); var minii,minjj,min,max,nombre,i,j,ref,numero,importance,level:integer; chaine:string; begin If opendialog2.execute then begin max:=0; min:=strtoint(copy(frequencies.lines[1],pos(';',frequencies.lines[1])+1,length(frequencies.lines[1])));; for i:=1 to Frequencies.lines.count-1 do begin importance:=strtoint(copy(frequencies.lines[i],pos(';',frequencies.lines[i])+1,length(frequencies.lines[i]))); if importance>max then max := importance; if importance'BEGIN Trees;') do begin inc(i); end; while (i'END; [Trees]') do begin nexus.lines.Delete(i); end; nexus.lines.Delete(i); while (i'TRANSLATE') do begin inc(i); end; inc(i); //showmessage('found TRANSLATE at line '+inttostr(i)); while (i';') do begin nombre:=strtoint(copy(nexus.lines[i],1,pos(' ',nexus.lines[i])-1)); //showmessage('nombre '+inttostr(nombre)+'<'+inttostr(OrderedTaxa.Lines.Count+1)); chaine:=copy(nexus.lines[i],pos('''',nexus.lines[i])+1,length(nexus.lines[i])); chaine:=copy(chaine,0,pos('''',chaine)-1); for j:=1 to Stringgrid1.ColCount-1 do begin if sansaccent(Stringgrid1.Cells[0,j])=chaine then begin //showmessage('-'+Stringgrid1.Cells[0,j]+'-'+chaine+'-'+inttostr(j)); OrderedTaxa.items[nombre]:=inttostr(j); //showmessage(OrderedTaxa.items[nombre]); end; end; inc(i); end; while (i'VERTICES') do begin inc(i); end; inc(i); while (i';') do begin numero:=strtoint(copy(nexus.lines[i],1,pos(' ',nexus.lines[i])-1)); if OrderedTaxa.Items[numero]<>' ' then begin //Go search the frequency of the taxa nombre:=strtoint(OrderedTaxa.items[numero]); importance:=strtoint(copy(frequencies.lines[nombre],pos(';',frequencies.lines[nombre])+1,length(frequencies.lines[nombre]))); level:=Math.floor(1+9.99999*((math.log2(importance)-math.log2(min))/(math.log2(max)-math.log2(min)))); nexus.lines[i]:=copy(nexus.lines[i],0,pos('s=n',nexus.lines[i])+2)+ ' c='+colorR.lines[level]+' '+colorG.lines[level]+' '+colorB.lines[level]+ ' b='+colorR.lines[level]+' '+colorG.lines[level]+' '+colorB.lines[level]+','; end; inc(i); end; while (i'VLABELS') do begin inc(i); end; inc(i); while (i';') do begin numero:=strtoint(copy(nexus.lines[i],1,pos(' ',nexus.lines[i])-1)); if OrderedTaxa.Items[numero]<>' ' then begin //Go search the frequency of the taxa nombre:=strtoint(OrderedTaxa.items[numero]); importance:=strtoint(copy(frequencies.lines[nombre],pos(';',frequencies.lines[nombre])+1,length(frequencies.lines[nombre]))); level:=Math.floor(1+9.99999*((math.log2(importance)-math.log2(min))/(math.log2(max)-math.log2(min)))); nexus.lines[i]:=copy(nexus.lines[i],0,pos('f=''',nexus.lines[i])+1)+ Font.Lines[level]+','; end; inc(i); end; end; nexus.lines.savetofile(opendialog1.filename+'.colored.nexus'); winexec(PChar('C:\Program Files\SplitsTree\Splitstree.exe -i "'+opendialog1.filename+'.colored.nexus'+'"'),1); end; procedure TForm1.RadioGroup1Click(Sender: TObject); begin if radioGroup1.ItemIndex=0 then begin windowSize.text:=inttostr(min(3,strtoint(edit1.text))); windowLabel.visible:=false; end else begin windowSize.text:='50'; windowLabel.Visible:=true; end; end; procedure TForm1.Label5Click(Sender: TObject); begin winexec(PChar('explorer "'+lang.lines[32]+'"'),1) end; procedure TForm1.UPGMA(Sender: TObject); var trees:array of string; larbre1,larbre2:PhyloNode; mini,minj,i,j,k:integer; mind,d:real; ligne:string; begin //StringGrid1.visible:=false; { nexus.lines.loadfromfile(APplication.exename+'.txt'); stringgrid1.colcount:=nexus.lines.count; stringgrid1.rowcount:=nexus.lines.count; for i:=0 to nexus.lines.count-1 do begin j:=0; ligne:=nexus.Lines[i]; while pos(';',ligne)>0 do begin stringgrid1.cells[i,j]:=copy(ligne,0,pos(';',ligne)-1); ligne:=copy(ligne,pos(';',ligne)+1,length(ligne)); inc(j); end; end; } //showmessage('gridok'); showtree.visible:=true; Button3.enabled:=false; //UPGMA algorithm: { What is computed here is the tree directly in the newick format (as a string) It will then be transformed into a PhyloTree object The parts of the tree computed so far, during the execution of UPGMA are stored in the array trees. The algorithm here is in O(n^3) even if we could do better (O(n²)) by storing (and updating) for each line the position of the minimum distance of the line. } setlength(trees,richedit1.Lines.Count); for k:=0 to richedit1.Lines.Count-1 do begin trees[k]:=Stringgrid1.cells[0,k+1]; end; for k:=0 to richedit1.Lines.Count-2 do begin //Find minimum distance in the distance table mind:=strtofloat(Stringgrid1.Cells[1,2]); mini:=0; minj:=1; //showmessage('Etape'+inttostr(k)); for i:=0 to richedit1.Lines.Count-1-k do begin for j:=i+1 to richedit1.Lines.Count-1-k do begin d:=strtofloat(Stringgrid1.Cells[i+1,j+1]); if d