Skip to content

Commit

Permalink
Mudança no título do eixo da CDF, e remoção do slider de p-value
Browse files Browse the repository at this point in the history
  • Loading branch information
wevertonms committed Aug 30, 2019
1 parent 315bc24 commit 6553398
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 170 deletions.
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
],
"cSpell.language": "en,pt-BR",
"python.formatting.provider": "black",
"python.pythonPath": "C:\\Users\\Weverton Marques\\.virtualenvs\\statfit-Hn-hXqrA\\Scripts\\python.exe",
"python.pythonPath": "/usr/bin/python3.7",
"python.linting.pylintEnabled": true,
"python.linting.enabled": true,
"autoDocstring.docstringFormat": "google"
Expand Down
88 changes: 44 additions & 44 deletions .vscode/tags
Original file line number Diff line number Diff line change
@@ -1,44 +1,44 @@
!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/
!_TAG_OUTPUT_MODE u-ctags /u-ctags or e-ctags/
!_TAG_PROGRAM_AUTHOR Universal Ctags Team //
!_TAG_PROGRAM_NAME Universal Ctags /Derived from Exuberant Ctags/
!_TAG_PROGRAM_URL https://ctags.io/ /official site/
!_TAG_PROGRAM_VERSION 0.0.0 /94653669/
DISTS ..\\main.py /^DISTS = [$/;" kind:variable line:20
DISTS ..\\main.py.2c553062089a15597e815da9d769e9ae.py /^DISTS = [$/;" kind:variable line:20
Dist ..\\stats.py /^class Dist:$/;" kind:class line:5
Figure ..\\bokeh_models.py /^def Figure(title, axis_labels, **kw):$/;" kind:function line:14
Plot ..\\plot.py /^class Plot:$/;" kind:class line:31
Table ..\\plot.py /^class Table:$/;" kind:class line:132
UploadButton ..\\bokeh_models.py /^def UploadButton(label, callback, **kw):$/;" kind:function line:36
__init__ ..\\plot.py /^ def __init__(self, dists):$/;" kind:member line:133
__init__ ..\\plot.py /^ def __init__(self, dists):$/;" kind:member line:34
__init__ ..\\stats.py /^ def __init__(self, name, scipy, params):$/;" kind:member line:6
bokeh_models.py ..\\bokeh_models.py 1;" kind:file line:1
chi_squared ..\\stats.py /^def chi_squared(cdf_function, values, hist, edges):$/;" kind:function line:13
colors ..\\bokeh_models.py /^from bokeh.palettes import Category10_10 as colors # pylint: disable=no-name-in-module$/;" kind:unknown line:9
cumulative ..\\plot.py /^def cumulative(hist, edges):$/;" kind:function line:11
kolmogorov_smirnov ..\\stats.py /^def kolmogorov_smirnov(cdf, x, y):$/;" kind:function line:20
main.py ..\\main.py 1;" kind:file line:1
main.py.2c553062089a15597e815da9d769e9ae.py ..\\main.py.2c553062089a15597e815da9d769e9ae.py 1;" kind:file line:1
make_plot ..\\main.py /^ def make_plot(attr, old, new):$/;" kind:function line:50
make_plot ..\\main.py.2c553062089a15597e815da9d769e9ae.py /^ def make_plot(attr, old, new):$/;" kind:function line:50
modify_doc ..\\main.py /^def modify_doc(doc):$/;" kind:function line:29
modify_doc ..\\main.py.2c553062089a15597e815da9d769e9ae.py /^def modify_doc(doc):$/;" kind:function line:29
np ..\\main.py /^import numpy as np$/;" kind:namespace line:8
np ..\\main.py.2c553062089a15597e815da9d769e9ae.py /^import numpy as np$/;" kind:namespace line:8
np ..\\plot.py /^import numpy as np$/;" kind:namespace line:3
np ..\\stats.py /^import numpy as np$/;" kind:namespace line:1
plot.py ..\\plot.py 1;" kind:file line:1
plt ..\\bokeh_models.py /^import bokeh.plotting as plt$/;" kind:namespace line:5
server ..\\main.py /^server = Server({"\/": modify_doc})$/;" kind:variable line:69
server ..\\main.py.2c553062089a15597e815da9d769e9ae.py /^server = Server({"\/": modify_doc})$/;" kind:variable line:69
slider_callback ..\\main.py /^ def slider_callback(attr, old, new):$/;" kind:function line:36
slider_callback ..\\main.py.2c553062089a15597e815da9d769e9ae.py /^ def slider_callback(attr, old, new):$/;" kind:function line:36
ss ..\\stats.py /^import scipy.stats as ss$/;" kind:namespace line:2
stats.py ..\\stats.py 1;" kind:file line:1
update_data ..\\plot.py /^ def update_data(self, values):$/;" kind:member line:100
update_histogram ..\\plot.py /^ def update_histogram(self, hist, edges):$/;" kind:member line:88
upload_callback ..\\bokeh_models.py /^ def upload_callback(attr, old, new):$/;" kind:function line:78
weverton ..\\stats.py /^def weverton(cdf, x, y):$/;" kind:function line:24
!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/
!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/
!_TAG_OUTPUT_MODE u-ctags /u-ctags or e-ctags/
!_TAG_PROGRAM_AUTHOR Universal Ctags Team //
!_TAG_PROGRAM_NAME Universal Ctags /Derived from Exuberant Ctags/
!_TAG_PROGRAM_URL https://ctags.io/ /official site/
!_TAG_PROGRAM_VERSION 0.0.0 /94653669/
DISTS ..\\main.py /^DISTS = [$/;" kind:variable line:20
DISTS ..\\main.py.2c553062089a15597e815da9d769e9ae.py /^DISTS = [$/;" kind:variable line:20
Dist ..\\stats.py /^class Dist:$/;" kind:class line:5
Figure ..\\bokeh_models.py /^def Figure(title, axis_labels, **kw):$/;" kind:function line:14
Plot ..\\plot.py /^class Plot:$/;" kind:class line:31
Table ..\\plot.py /^class Table:$/;" kind:class line:132
UploadButton ..\\bokeh_models.py /^def UploadButton(label, callback, **kw):$/;" kind:function line:36
__init__ ..\\plot.py /^ def __init__(self, dists):$/;" kind:member line:133
__init__ ..\\plot.py /^ def __init__(self, dists):$/;" kind:member line:34
__init__ ..\\stats.py /^ def __init__(self, name, scipy, params):$/;" kind:member line:6
bokeh_models.py ..\\bokeh_models.py 1;" kind:file line:1
chi_squared ..\\stats.py /^def chi_squared(cdf_function, values, hist, edges):$/;" kind:function line:13
colors ..\\bokeh_models.py /^from bokeh.palettes import Category10_10 as colors # pylint: disable=no-name-in-module$/;" kind:unknown line:9
cumulative ..\\plot.py /^def cumulative(hist, edges):$/;" kind:function line:11
kolmogorov_smirnov ..\\stats.py /^def kolmogorov_smirnov(cdf, x, y):$/;" kind:function line:20
main.py ..\\main.py 1;" kind:file line:1
main.py.2c553062089a15597e815da9d769e9ae.py ..\\main.py.2c553062089a15597e815da9d769e9ae.py 1;" kind:file line:1
make_plot ..\\main.py /^ def make_plot(attr, old, new):$/;" kind:function line:50
make_plot ..\\main.py.2c553062089a15597e815da9d769e9ae.py /^ def make_plot(attr, old, new):$/;" kind:function line:50
modify_doc ..\\main.py /^def modify_doc(doc):$/;" kind:function line:29
modify_doc ..\\main.py.2c553062089a15597e815da9d769e9ae.py /^def modify_doc(doc):$/;" kind:function line:29
np ..\\main.py /^import numpy as np$/;" kind:namespace line:8
np ..\\main.py.2c553062089a15597e815da9d769e9ae.py /^import numpy as np$/;" kind:namespace line:8
np ..\\plot.py /^import numpy as np$/;" kind:namespace line:3
np ..\\stats.py /^import numpy as np$/;" kind:namespace line:1
plot.py ..\\plot.py 1;" kind:file line:1
plt ..\\bokeh_models.py /^import bokeh.plotting as plt$/;" kind:namespace line:5
server ..\\main.py /^server = Server({"\/": modify_doc})$/;" kind:variable line:69
server ..\\main.py.2c553062089a15597e815da9d769e9ae.py /^server = Server({"\/": modify_doc})$/;" kind:variable line:69
slider_callback ..\\main.py /^ def slider_callback(attr, old, new):$/;" kind:function line:36
slider_callback ..\\main.py.2c553062089a15597e815da9d769e9ae.py /^ def slider_callback(attr, old, new):$/;" kind:function line:36
ss ..\\stats.py /^import scipy.stats as ss$/;" kind:namespace line:2
stats.py ..\\stats.py 1;" kind:file line:1
update_data ..\\plot.py /^ def update_data(self, values):$/;" kind:member line:100
update_histogram ..\\plot.py /^ def update_histogram(self, hist, edges):$/;" kind:member line:88
upload_callback ..\\bokeh_models.py /^ def upload_callback(attr, old, new):$/;" kind:function line:78
weverton ..\\stats.py /^def weverton(cdf, x, y):$/;" kind:function line:24
2 changes: 1 addition & 1 deletion Procfile
Original file line number Diff line number Diff line change
@@ -1 +1 @@
web: bokeh serve --port=$PORT --allow-websocket-origin=statfitweb.herokuapp.com --address=0.0.0.0 --use-xheaders .
web: bokeh serve --port=$PORT --allow-websocket-origin=statfitweb.herokuapp.com --address=0.0.0.0 --use-xheaders .
8 changes: 4 additions & 4 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def create_plot():
plot = Plot(DISTS)
numbins_slider = Slider(title="Number of bins", value=10, start=5, end=15, step=1)
div = Div(text="<h2>Goodness of fit tests (p-values)</h2>")
pvalue_slider = Slider(title="Min p-value (%)", value=5, start=1, end=10, step=1)
# pvalue_slider = Slider(title="Min p-value (%)", value=5, start=1, end=10, step=1)
table = Table([dist.name for dist in DISTS])

def numbins_slider_callback(attr, old, new):
Expand All @@ -36,7 +36,7 @@ def numbins_slider_callback(attr, old, new):
wms = [weverton(dist.cdf, data["x"], data["y"]) for dist in DISTS]
table.source.data.update(ks=ks, chi=chi, wms=wms)

def pvalue_slider_callback(attr, old, new):
# def pvalue_slider_callback(attr, old, new):
pass

def make_plot(attr, old, new):
Expand All @@ -49,8 +49,8 @@ def make_plot(attr, old, new):

button = UploadButton("Open file ...", make_plot, button_type="success")
numbins_slider.on_change("value", numbins_slider_callback)
pvalue_slider.on_change("value", pvalue_slider_callback)
controls = column(button, numbins_slider, div, pvalue_slider, table.table)
# pvalue_slider.on_change("value", pvalue_slider_callback)
controls = column(button, numbins_slider, div, table.table)
return row(controls, plot.layout, sizing_mode="scale_height")


Expand Down
2 changes: 1 addition & 1 deletion plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def __init__(self, dists):
)
self.cdf_plot = Figure(
"Cumulative Probability Density",
("Values", "Relative frequency"),
("Values", "Cumulative frequency"),
y_range=Range1d(start=0.0, end=1.02),
)
self.cdf_plot.line(
Expand Down
238 changes: 119 additions & 119 deletions stats.py
Original file line number Diff line number Diff line change
@@ -1,119 +1,119 @@
import numpy as np
import scipy.stats as ss


class Dist:
"""Classe que associada a uma dada distribuição disponível na scipy.stats
Args:
name (str): nome da distribuição.
scipy (scipy.stats.rvs): classe de scipy.stats associada com a distribuição.
params (dict): dicionário com chaves equivalentes aos parâmetros de ajuste da distribuição.
"""

def __init__(self, name, scipy, params):
"""Construtor."""
self.name = name
self.scipy = scipy
self.params = params
self.pdf = None
self.cdf = None

def fit(self, values):
"""Ajusta a distribuição para os valores das observações.
Args:
values (numpy.array): valores das observações
"""
param = self.scipy.fit(values)
for key, value in zip(self.params.keys(), param):
self.params[key] = value
self.pdf = self.scipy(**self.params).pdf
self.cdf = self.scipy(**self.params).cdf


DISTS = [
Dist("Normal", ss.norm, dict(loc=None, scale=None)),
Dist("Log-Normal", ss.lognorm, dict(s=None, loc=None, scale=None)),
Dist("Weibull", ss.weibull_min, dict(c=None, loc=None, scale=None)),
Dist("Gamma", ss.gamma, dict(a=None, loc=None, scale=None)),
Dist("Logistic", ss.logistic, dict(loc=None, scale=None)),
]


def cumulative(hist, edges):
"""Cria um gráfico CDF (escada) com base no histograma.
Args:
hist (numpy.array): frequências relativas do histograma.
edges (numpy.array): lista de valores que limitam as classes do histograma.
Returns:
list[float]: coordenadas do x dos pontos do gráfico.
list[float]: coordenadas do y dos pontos do gráfico.
"""
x = [edges[i // 2] for i in range(2 * len(edges[:-1]))]
x.append(edges[-1])
cdf = np.cumsum(hist * (edges[1] - edges[0]))
y = [cdf[(i - 1) // 2] for i in range(2 * len(cdf))]
y[0] = 0
y.append(cdf[-1])
return x, y


def chi_squared(values, cdf_function, num_bins):
"""Calcula o valor do teste Chi quadrado para um dado número de intervalos.
Args:
values (numpy.array): valores observados.
cdf_function (callable): função que calcula a CDF para um dados pontos.
num_bins (int): número de intervalos do histograma.
Returns:
float: p-value
"""
hist, edges = np.histogram(values, density=True, bins=num_bins)
observed = hist * (edges[1] - edges[0]) * len(values)
expected = (cdf_function(edges)[1:] - cdf_function(edges)[:-1]) * len(values)
results = ss.chisquare(observed, expected)
return results[1]


def kolmogorov_smirnov(values, cdf_function):
"""Calcula o valor do teste Kolmogorov-Smirnov para um dado número de intervalos.
Args:
values (numpy.array): valores observados.
cdf_function (callable): função que calcula a CDF para um dados pontos.
Returns:
float: p-value
"""
return ss.kstest(values, cdf_function)[1]


def weverton(cdf, x, y):
"""Calcula o valor do teste Chi quadrado para um dado número de intervalos.
Args:
cdf_function (callable): função que calcula a CDF para um dados pontos.
values (numpy.array): valores observados.
num_bins (int): número de intervalos do histograma.
Returns:
float: valor do teste.
"""
return np.sum((cdf(x) - y) ** 2)


if __name__ == "__main__":
values = np.linspace(-15, 15, 9)
DISTS[0].fit(values)
cdf_function = DISTS[0].cdf

results = ss.kstest(values, cdf_function)
print(f"KS: {results[0]:.3g}; p-value = {results[1]:.5g}")

n_bins = np.random.randint(1, len(values))
results = chi_squared(values, cdf_function, n_bins)
print(f"Chi: {results[0]:.3g}; p-value = {results[1]:.5g} ({n_bins} bins)")
import numpy as np
import scipy.stats as ss


class Dist:
"""Classe que associada a uma dada distribuição disponível na scipy.stats
Args:
name (str): nome da distribuição.
scipy (scipy.stats.rvs): classe de scipy.stats associada com a distribuição.
params (dict): dicionário com chaves equivalentes aos parâmetros de ajuste da distribuição.
"""

def __init__(self, name, scipy, params):
"""Construtor."""
self.name = name
self.scipy = scipy
self.params = params
self.pdf = None
self.cdf = None

def fit(self, values):
"""Ajusta a distribuição para os valores das observações.
Args:
values (numpy.array): valores das observações
"""
param = self.scipy.fit(values)
for key, value in zip(self.params.keys(), param):
self.params[key] = value
self.pdf = self.scipy(**self.params).pdf
self.cdf = self.scipy(**self.params).cdf


DISTS = [
Dist("Normal", ss.norm, dict(loc=None, scale=None)),
Dist("Log-Normal", ss.lognorm, dict(s=None, loc=None, scale=None)),
Dist("Weibull", ss.weibull_min, dict(c=None, loc=None, scale=None)),
Dist("Gamma", ss.gamma, dict(a=None, loc=None, scale=None)),
Dist("Logistic", ss.logistic, dict(loc=None, scale=None)),
]


def cumulative(hist, edges):
"""Cria um gráfico CDF (escada) com base no histograma.
Args:
hist (numpy.array): frequências relativas do histograma.
edges (numpy.array): lista de valores que limitam as classes do histograma.
Returns:
list[float]: coordenadas do x dos pontos do gráfico.
list[float]: coordenadas do y dos pontos do gráfico.
"""
x = [edges[i // 2] for i in range(2 * len(edges[:-1]))]
x.append(edges[-1])
cdf = np.cumsum(hist * (edges[1] - edges[0]))
y = [cdf[(i - 1) // 2] for i in range(2 * len(cdf))]
y[0] = 0
y.append(cdf[-1])
return x, y


def chi_squared(values, cdf_function, num_bins):
"""Calcula o valor do teste Chi quadrado para um dado número de intervalos.
Args:
values (numpy.array): valores observados.
cdf_function (callable): função que calcula a CDF para um dados pontos.
num_bins (int): número de intervalos do histograma.
Returns:
float: p-value
"""
hist, edges = np.histogram(values, density=True, bins=num_bins)
observed = hist * (edges[1] - edges[0]) * len(values)
expected = (cdf_function(edges)[1:] - cdf_function(edges)[:-1]) * len(values)
results = ss.chisquare(observed, expected)
return results[1]


def kolmogorov_smirnov(values, cdf_function):
"""Calcula o valor do teste Kolmogorov-Smirnov para um dado número de intervalos.
Args:
values (numpy.array): valores observados.
cdf_function (callable): função que calcula a CDF para um dados pontos.
Returns:
float: p-value
"""
return ss.kstest(values, cdf_function)[1]


def weverton(cdf, x, y):
"""Calcula o valor do teste Chi quadrado para um dado número de intervalos.
Args:
cdf_function (callable): função que calcula a CDF para um dados pontos.
values (numpy.array): valores observados.
num_bins (int): número de intervalos do histograma.
Returns:
float: valor do teste.
"""
return np.sum((cdf(x) - y) ** 2)


if __name__ == "__main__":
values = np.linspace(-15, 15, 9)
DISTS[0].fit(values)
cdf_function = DISTS[0].cdf

results = ss.kstest(values, cdf_function)
print(f"KS: {results[0]:.3g}; p-value = {results[1]:.5g}")

n_bins = np.random.randint(1, len(values))
results = chi_squared(values, cdf_function, n_bins)
print(f"Chi: {results[0]:.3g}; p-value = {results[1]:.5g} ({n_bins} bins)")

0 comments on commit 6553398

Please sign in to comment.