### Better definitions and illustrations for boxplots

Georg Troska <georg.troska <at> uni-dortmund.de>

2016-02-10 10:31:30 GMT

Dear root-developers, dear users,
I learnt the advantages of root when I worked for HEP-experiments in the
past. Nowerdays I'm no longer working for HEP but still like to use root
for process-control purposes of larger datasets
The people in the company I'm working for like box-plots. So I started to
implement first classes creating boxplots on top of TH2s. Since a while
the "candle" option is available for the Draw-method, but I'm not
satisfied with it. Reasons:
* The Definition of the candle could be improved (especially the length
of the whiskers and the non-existance of so called outliers
* no grouping if possible. I like to use trend-charts (some value vs the
time. When using grouping you can compare two values vs. the time, see my
example)
I would like to provide you a simplified version of the boxplot-class I'm
using. I have anther complilable version running under Linux, but this
here runs in CINT without compilation (I have tested it with root 5.32 and
5.34)
I would appreciate it very much if you could think about to implement
these features in a better way by using THistPainter (what I'm doing here
is a bit -~~poor~~- )
In fact there are several more Features one can implement into a candle
such as:
* Kinks in the candle to show the significance of the median
* Show all values in the candle (underlying scatter plot)
* Show a vertical histogram next to each candle
* Possiblity to scale the box-width
* Write numbers on at the side of the candle to provide the numbers for
mean, median, iqr, etc...
Please tell me what you think about this, and let me know. If you are
interested I can provide some code also
Thanks
Georg

#ifndef BOXPLOT_H
#define BOXPLOT_H
/* BoxPlot class by Dr. G. Troska & Dr. T. Neddermann */
/* Adapted for windows, including multi-group operations, speratorlines and outliers */
/* Version from 2016-02-09 */
#include <TLine.h>
#include <TBox.h>
class Candle{
protected:
TLine *_l;
TBox *_b;
TLine *_ml;
TLine *_ll;
TLine *_ul;
TLine *_medl;
TPolyMarker *_outmarker;
float _outliers[1000];
int _nOutliers;
float _mean;
float _median;
float _m25;
float _mm;
float _p25;
float _pp;
float _bin;
float _width;
void setLines() {
_l->SetX1(_bin);
_l->SetX2(_bin);
_l->SetY1(_mm);
_l->SetY2(_pp);
_b->SetX1(_bin-_width/2); _b->SetX2(_bin+_width/2);
_b->SetY1(_m25); _b->SetY2(_p25);
_ml->SetX1(_bin-_width/2); _ml->SetX2(_bin+_width/2);
_ml->SetY1(_mean); _ml->SetY2(_mean);
_medl->SetX1(_bin-_width/2); _medl->SetX2(_bin+_width/2);
_medl->SetY1(_median); _medl->SetY2(_median);
_ll->SetX1(_bin-_width/2); _ll->SetX2(_bin+_width/2);
_ll->SetY1(_mm); _ll->SetY2(_mm);
_ul->SetX1(_bin-_width/2); _ul->SetX2(_bin+_width/2);
_ul->SetY1(_pp); _ul->SetY2(_pp);
cout << "Created candle at: " << _bin << endl;
}
void setOutlier() {
for (int i = 0; i< _nOutliers;i++) {
_outmarker->SetPoint(i,_bin, _outliers[i]);
//cout << "Setting outlier to" << _bin << " " << _outliers[i] << endl;
}
}
public:
Candle (float bin, float median, float mean, float m25, float p25, float mm,float pp, float *outliers,
int nOutliers, float width=0.5) : _bin(bin), _mean(mean), _median(median), _m25(m25), _p25(p25),
_mm(mm), _pp(pp), _outliers(outliers), _nOutliers(nOutliers), _width(width){
/*if (m75 > m25) return;
if (m25 > mean) return;
if (mean > p25) return;
if (p25 > p75) return;
*/
//The Vertical Line
_l = new TLine();
//The box
_b = new TBox();
_b->SetFillColor(0);
//The mean line;
_ml = new TLine();
//The median line;
_medl = new TLine();
//The lower line;
_ll = new TLine();
//The upper line;
_ul = new TLine();
//cout << "NOutlier: " << nOutliers << endl;
for (int i = 0; i < nOutliers; i++) {
//cout << " " << outliers[i] << endl;
_outliers[i] = outliers[i];
}
//outlierer
_outmarker = new TPolyMarker();
_outmarker->SetMarkerStyle(kMultiply);
_outmarker->SetMarkerSize(.7);
}
~Candle() {
delete _l;
delete _b;
delete _ml;
delete _ll;
delete _ul;
delete _medl;
}
void SetBin(float bin) {_bin = bin; }
void SetMean(float mean) { _mean = mean; }
void SetLower25(float m25) { _m25 = m25; }
void SetLower99(float mm) { _mm = mm; }
void SetUpper25(float p25) { _p25 = p25; }
void SetUpper99(float pp) { _pp = pp; }
void SetLineColor(Color_t lcolor) {
_l->SetLineColor(lcolor);
_b->SetLineColor(lcolor);
_ml->SetLineColor(lcolor);
_ll->SetLineColor(lcolor);
_ul->SetLineColor(lcolor);
_medl->SetLineColor(lcolor);
_outmarker->SetMarkerColor(lcolor);
}
void SetLineWidth(Width_t w) {
_l->SetLineWidth( w);
_b->SetLineWidth( w);
_ml->SetLineWidth( w);
_ll->SetLineWidth( w);
_ul->SetLineWidth( w);
_medl->SetLineWidth( w);
}
void Draw() {
setLines();
setOutlier();
_l->Draw();
_b->Draw("l");
_ml->Draw();
_ll->Draw();
_ul->Draw();
_medl->Draw();
_outmarker->Draw();
}
};
class BoxPlot {
protected:
TH2D * _h[16];
TH2D * _drawHisto;
int _nHist;
Color_t _color[16];
Width_t _width[16];
Candle* _candles[1000];
TLine* _separatorLine[100];
bool _doSeparatorLines;
int _nSeparatorLines;
int _nCandles;
bool getQuantiles(TH1D* h, float &mm, float &m25, float &median, float &p25, float &pp, float
*outliers, int &nOutliers) {
int entries = h->Integral();
float counter = 0;
if (entries < 1 ) return false;
for (int i = 1; i < h->GetNbinsX(); i++) {
float myBin = h->GetBinContent(i);
if (counter/entries < 0.25 && (counter + myBin)/entries >=0.25) m25 = h->GetBinCenter(i); // Lower
edge of box
if (counter/entries < 0.5 && (counter + myBin)/entries >=0.5) median = h->GetBinCenter(i); //Median
if (counter/entries < 0.75 && (counter + myBin)/entries >=0.75) p25 = h->GetBinCenter(i); //Uppeder
edge of box
counter += myBin;
}
//Inter quantile range
float iqr = p25 - m25;
//Lower end of whisker
int bin = h->FindBin(m25-1.5*iqr);
//but extending only to the lowest data value within this range
while (h->GetBinContent(bin) == 0 && bin <= h->GetNbinsX()) bin++;
mm = h->GetBinCenter(bin);
//upder end of whisker
bin = h->FindBin(p25+1.5*iqr);
while (h->GetBinContent(bin) == 0 && bin >= h->GetNbinsX()) bin--;
pp = h->GetBinCenter(bin);
// Outliers
nOutliers = 0;
for (int i = 0; i < h->GetNbinsX(); i++) {
if (h->GetBinContent(i) > 0 && (h->GetBinCenter(i) < mm || h->GetBinCenter(i) > pp) ) {
for (int j=0; j < h->GetBinContent(i); j++) {
outliers[nOutliers] = h->GetBinCenter(i);
nOutliers++;
}
}
}
return true;
}
public:
BoxPlot() : _nCandles(0), _nHist(0),_doSeparatorLines(false),_nSeparatorLines(0) {
for (int i = 0 ; i < 1000; i++) _candles[i] = NULL;
_drawHisto = NULL;
}
BoxPlot(TH2D *h) : _nCandles(0), _nHist(1),_doSeparatorLines(false),_nSeparatorLines(0) {
_h[0] = new TH2D(*h);
_drawHisto = new TH2D(*h);
_color[0] = 1;
_width[0] = 1;
for (int i = 0 ; i < 1000; i++) _candles[i] = NULL;
}
BoxPlot(const char* title, TH2D *h) : _nCandles(0),
_nHist(1),_doSeparatorLines(false),_nSeparatorLines(0) {
_h[0] = new TH2D(*h);
_drawHisto = new TH2D(*h);
_color[0] = 1;
_width[0] = 1;
_drawHisto->SetTitle(title);
for (int i = 0 ; i < 1000; i++) _candles[i] = NULL;
}
BoxPlot(const char* title, TH2D *h1, TH2D *h2) : _nCandles(0),
_nHist(2),_doSeparatorLines(false),_nSeparatorLines(0) {
_h[0] = new TH2D(*h1);
_h[1] = new TH2D(*h2);
_drawHisto = new TH2D(*h1);
_color[0] = 1;
_width[0] = 1;
_color[1] = 1;
_width[1] = 1;
_drawHisto->SetTitle(title);
for (int i = 0 ; i < 1000; i++) _candles[i] = NULL;
}
int addHistogram(const char* title, TH2D *h) {
_h[_nHist] = new TH2D(*h);
if (_nHist == 0) {
_drawHisto = h;
_drawHisto->SetTitle(title);
}
_color[_nHist] = 1;
_width[_nHist] = 1;
_nHist++;
}
void enableSeparatorLines(bool en=true) {_doSeparatorLines = en; }
void Draw() {
_drawHisto->SetStats(kFALSE);
TLegend *leg = new TLegend(0.6,0.7,0.85,0.85);
leg->SetFillColor(0);
int nBins = _drawHisto->GetNbinsX();
_drawHisto->Draw("colz");
TH1D *proj;
float mm, m25, med, p25, pp = 0.;
float outliers[1000],
int nOutliers;
int candleCounter = 0;
for (int group = 0; group < _nHist; group++) {
cout << "Group: " << group << endl;
TH2D *myHisto = _h[group];
myHisto->SetLineColor(_color[group]);
myHisto->SetLineWidth(_width[group]);
for (int i = 1; i < nBins+1; i++) {
proj = myHisto->ProjectionY("_py",i,i);
int nOutliers = 0;
if (proj->GetEntries() > 0) {
bool valid = getQuantiles(proj,mm,m25,med,p25,pp, outliers, nOutliers);
if (valid) {
float binWidth = myHisto->GetXaxis()->GetBinWidth(nBins);
float candleWidth = binWidth/(_nHist*2);
float candleOffset = - binWidth/2 + candleWidth + 2*candleWidth*group;
cout << "BinWidth: " << binWidth << " CandleWidth: " << candleWidth << " CandleOffset: " <<
candleOffset << " BinCenter: " << myHisto->GetXaxis()->GetBinCenter(i) << endl;
if(!_candles[candleCounter]) _candles[candleCounter] = new
Candle(myHisto->GetXaxis()->GetBinCenter(i)+candleOffset,med,proj->GetMean(),m25,p25,mm,pp,outliers,
nOutliers, candleWidth);
_candles[candleCounter]->SetLineColor(_color[group]);
_candles[candleCounter]->SetLineWidth(_width[group]);
_candles[candleCounter]->Draw();
candleCounter++;
if (group == 0 && _doSeparatorLines) {
TLine *myTLine = _separatorLine[_nSeparatorLines];
_nSeparatorLines++;
myTLine = new TLine();
myTLine->SetX1(myHisto->GetXaxis()->GetBinCenter(i) + binWidth/2);
myTLine->SetX2(myHisto->GetXaxis()->GetBinCenter(i) + binWidth/2);
myTLine->SetY1(_drawHisto->GetYaxis()->GetXmin());
myTLine->SetY2(_drawHisto->GetYaxis()->GetXmax());
myTLine->SetLineStyle(3);
myTLine->Draw();
}
}
}
}
leg->AddEntry(myHisto,myHisto->GetTitle(),"f");
}
leg->Draw();
/*
if (_nHist==1) {
_nCandles = _h[0]->GetNbinsX();
_drawHisto->Draw("colz");
TH1D *proj;
float mm, m25, med, p25, pp = 0.;
float outliers[1000],
int nOutliers;
for (int i = 0; i < _nCandles; i++) {
proj = _h[0]->ProjectionY("_py",i+1,i+1);
if (proj->Integral() > 0) {
bool valid = getQuantiles(proj,mm,m25,med,p25,pp, outliers, nOutliers);
if (valid) {
float width = (_h[0]->GetBinCenter(_nCandles)-_h[0]->GetBinCenter(0))/_nCandles;
if(!_candles[i]) _candles[i] = new
Candle(_h[0]->GetBinCenter(i)+width,med,proj->GetMean(),m25,p25,mm,pp,outliers, nOutliers, width*0.5);
_candles[i]->SetLineColor(_color[0]);
_candles[i]->SetLineWidth(_width[0]);
_candles[i]->Draw();
}
}
}
//leg->AddEntry(_h[group],_h[group]->GetTitle(),"f");
}
if (_nHist==2) {
_nCandles = _h[0]->GetNbinsX();
_drawHisto->Draw("colz");
TH1D *proj;
float outliers[1000];
int nOutliers;
float mm, m25, med, p25, pp;
for (int group = 0; group < _nHist; group++) {
_h[group]->SetLineColor(_color[group]);
_h[group]->SetLineWidth(_width[group]);
for (int i = 0; i < _nCandles; i++) {
proj = _h[group]->ProjectionY("_py",i+1,i+1);
if (proj->GetEntries() > 0) {
bool valid = getQuantiles(proj,mm,m25,med,p25,pp, outliers, nOutliers);
if (valid) {
float width = (_h[group]->GetBinCenter(_nCandles)-_h[group]->GetBinCenter(0))/_nCandles;
if(!_candles[i+group*_nCandles]) _candles[i+group*_nCandles] = new
Candle(_h[group]->GetBinCenter(i)+width*0.75 +
group*width*0.5,med,proj->GetMean(),m25,p25,mm,pp,outliers, nOutliers,width*0.25);
_candles[i+group*_nCandles]->SetLineColor(_color[group]);
_candles[i+group*_nCandles]->SetLineWidth(_width[group]);
_candles[i+group*_nCandles]->Draw();
}
}
}
leg->AddEntry(_h[group],_h[group]->GetTitle(),"f");
}
leg->Draw();
}*/
_drawHisto->Reset();
}
void SetLineColor(Color_t lcolor, int histo = 0) {
_color[histo] = lcolor;
}
void SetLineWidth(Width_t w , int histo = 0) {
_width[histo] = w;
}
void SetTitle(const char* title) {
_drawHisto->SetTitle(title);
}
TAxis* GetXaxis() { return _drawHisto->GetXaxis(); }
TAxis* GetYaxis() { return _drawHisto->GetYaxis(); }
};
#ifdef ____CINT____
#pragma link C++ class BoxPlot-;
#pragma link C++ class Candle-;
#endif
#endif

{
gROOT->Reset();
#include "box.h"
TCanvas *c1 = new TCanvas("c1","Sin + Cos",1200,800);
c1->Divide(2,2);
TRandom *rand = new TRandom();
TH2I *h1 = new TH2I("h1","Sin",36,0,360,300,-1.5,1.5);
TH2I *h2 = new TH2I("h2","Cos",36,0,360,300,-1.5,1.5);
h1->GetXaxis()->SetTitle("Deg [°]");
for (int i = 0; i < 360; i+=10) {
for (int j = 0; j < 100; j++) {
float myRand = rand->Gaus(sin(i*3.14/180),0.2);
h1->Fill(i,myRand);
float myRand = rand->Gaus(cos(i*3.14/180),0.1);
h2->Fill(i,myRand);
}
}
c1->cd(1);
h1->Draw();
c1->cd(2);
h2->Draw();
c1->cd(3);
BoxPlot *b = new BoxPlot("Sin + Cos",(TH2D*)h1,(TH2D*)h2);
b->SetLineColor(kRed,0);
b->SetLineColor(kBlue,1);
b->Draw();
}