Skip to content

Commit

Permalink
Add the possibility to transform the data: normalize/standardize
Browse files Browse the repository at this point in the history
  • Loading branch information
zarch committed Aug 20, 2013
1 parent 1d17668 commit 77f3d8f
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 24 deletions.
3 changes: 2 additions & 1 deletion libmlcls/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ MODULE_TOPDIR = ../../..
include $(MODULE_TOPDIR)/include/Make/Other.make
include $(MODULE_TOPDIR)/include/Make/Python.make

MODULES = mlchk mlclassify mlconf mlplts mlsegment mlstats mltraining mlwriterast
MODULES = mlchk mlclassify mlconf mlplts mlsegment mlstats mltraining \
mlwriterast mltransform

ETCDIR = $(ETC)/ml.class

Expand Down
5 changes: 2 additions & 3 deletions libmlcls/mlstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,5 @@ def statistics(group, zones, ratio, hdf, stat_name, stat_results):
df = pnl2df(get_pnl(gmaps, stat_name))
# 'photo_r_mean', 'photo_g_mean', 'photo_b_mean'
add_ratio(df, ratio)
df_stand = (df - df.mean()) / df.std()
df_stand.to_hdf(hdf, str(stat_results))
return df_stand
df.to_hdf(hdf, str(stat_results))
return df
30 changes: 30 additions & 0 deletions libmlcls/mltransform.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# -*- coding: utf-8 -*-
"""
Created on Fri Aug 16 14:44:30 2013
@author: pietro
"""


#
# TRANSFORMATION FUNCTIONS
#
def stdize(df, mean, std):
return (df - mean) / std


def normalize(df, xmin, xmax):
return (df - xmin) / (xmax - xmin)


def transform(method, data, chk):
if method == 'standardize':
mean, std = chk.mean(), chk.std()
return stdize(data, mean, std), stdize(data, mean, std)
elif method == 'normalize':
xmin, xmax = chk.min(), chk.max()
return normalize(data, xmin, xmax), normalize(chk, xmin, xmax)
elif method == 'nothing':
return data, chk
else:
raise ValueError('method: <%s> not valid' % method)
45 changes: 26 additions & 19 deletions ml.classify/ml.classify.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
#% type: string
#% description: Name of the statistic data contained in the HDF file
#% required: yes
#% answer: K_all
#% answer: data
#%end
#%option
#% key: training_json
Expand All @@ -45,11 +45,11 @@
#% answer: training.json
#%end
#%option
#% key: training_Kchk
#% key: training_kchk
#% type: string
#% description: Name for the training values in the HDF
#% required: no
#% answer: K_chk
#% answer:
#% guisection: Training
#%end
#%option
Expand Down Expand Up @@ -113,6 +113,14 @@
#% required: no
#% answer: classify_using_%s
#%end
#%option
#% key: transform
#% type: string
#% options: nothing,standardize,normalize
#% description: Choose the traformation function of the statistics data of the segments
#% required: no
#% answer: nothing
#%end

from __future__ import absolute_import, division, print_function, unicode_literals
import json
Expand All @@ -139,39 +147,37 @@
sys.path.append(path)
from mlchk import check_classification
from mltraining import extract_itr, extract_training
from mltransform import transform
from mlclassify import mls_classification


def main(opts, flgs):
if not opts['training_hdf']:
opts['training_hdf'] = opts['hdf']

#import ipdb; ipdb.set_trace()
K_all = pnd.read_hdf(opts['hdf'], str(opts['data']))
data = pnd.read_hdf(opts['hdf'], str(opts['data']))

if opts['training_Kchk'] and opts['training_ychk']:
Kchk = pnd.read_hdf(opts['training_hdf'], str(opts['training_Kchk']))
if opts['training_kchk'] and opts['training_ychk']:
Kchk = pnd.read_hdf(opts['training_hdf'], str(opts['training_kchk']))
ychk = pnd.read_hdf(opts['training_hdf'], str(opts['training_ychk']))
itr = extract_itr(Kchk, ychk, int(opts['training_number']))
else:
with open(opts['training_json'], 'r') as fp:
tr = json.load(fp)
itr, Kchk, ychk = extract_training(tr, K_all,
check_classification(tr)
itr, Kchk, ychk = extract_training(tr, data,
int(opts['training_number']))
Kchk = pnd.DataFrame(Kchk.copy().tolist(),
index=Kchk.index,
columns=Kchk.iloc[0].index)
K_chk, y_chk = Kchk.loc[itr], ychk.loc[itr]
# Kchk = pnd.DataFrame(Kchk.copy().tolist(),
# index=Kchk.index,
# columns=Kchk.iloc[0].index)
conf = imp.load_source("conf", opts['training_conf'])
mls = getattr(conf, opts['training_mls'])
key = None if opts['training_key'] == '' else opts['training_key']
with open(opts['training_json'], 'r') as fp:
tr = json.load(fp)
check_classification(tr)
# mls_classification(K_all, K_chk, y_chk, mls, hdf, out_class)
mls_classification(K_all, K_chk, y_chk, mls,
opts['hdf'], opts['out_class'],
key=key)
tdata, tKchk = transform(opts['transform'], data, Kchk)
tK_chk, y_chk = tKchk.loc[itr], ychk.loc[itr]
# mls_classification(data, K_chk, y_chk, mls, hdf, out_class)
mls_classification(tdata, tK_chk, y_chk, mls,
opts['hdf'], opts['out_class'], key=key)


if __name__ == "__main__":
Expand All @@ -183,6 +189,7 @@ def main(opts, flgs):
training_conf=mlconf.py \
training_mls=BEST \
training_hdf=/home/pietro/docdat/phd/edinburgh/segSVM/segments-ml/data.hdf \
training_kchk=K_chk \
training_ychk=y_chk \
hdf=results.hdf
Expand Down
4 changes: 3 additions & 1 deletion ml.segstats/ml.segstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,9 @@ def main(opts, flgs):
segment(opts['thrs'], opts['group'], opts['seg_opts'],
opts['seg_name'])
if 'r' in flgs and flgs['r']:
statistics(opts['group'], opts['seg_name'] % opts['thrs'][-1],
sname = opts['seg_name'] % opts['thrs'][-1] \
if '%' in opts['seg_name'] else opts['seg_name']
statistics(opts['group'], sname,
opts['stat_ratio_cols'].split(','), opts['hdf'],
opts['stat_name'], opts['stat_results'])

Expand Down

0 comments on commit 77f3d8f

Please sign in to comment.