From e0429bb0345bad9ef6b1cd0356159559985a9172 Mon Sep 17 00:00:00 2001 From: Aleksey Lobanov Date: Sun, 28 Jun 2015 16:03:48 +0300 Subject: [PATCH] New, improved scoring function. Works better, but can be improved --- src/crossgen.cpp | 18 ++++++++++++------ wxCrossGen/wxCrossGen.project | 8 ++++---- wxCrossGen/wxCrossGen.txt | 2 +- 3 files changed, 17 insertions(+), 11 deletions(-) diff --git a/src/crossgen.cpp b/src/crossgen.cpp index 5b8e665..3db6a45 100644 --- a/src/crossgen.cpp +++ b/src/crossgen.cpp @@ -112,15 +112,21 @@ void generateAllWords(const DictType &dict, AllWordsType &words_out, // TODO: improve formula std::function< int(const wxString& ) > getWordScore = [freqs, freqs_sorted, char_cnt] (const wxString &s){ - double score = 0; + double score = 1; for (auto ch: s) - score += 1 / pow(static_cast(freqs.at(ch))/char_cnt,2); + score *= static_cast(freqs.at(ch))/char_cnt; // static_cast(freqs.at(ch))/char_cnt = normalaised frequency // euristic fomula for good (not normal) - score = score / s.size() + pow(std::max((int(s.size())-12),0),1.4) *15; - if ( score > 1.2 * freqs.size() * freqs.size() ) - return static_cast(score); + + score = std::pow(score, 1./s.size()); // score == mean geometric + /* magic formula, linear interpolation from good numbers + * can be taken from Maxima code: + * solve ([33*k+m = 1/20.4, 26*k+m=1/19],[k,m]); + */ + if ( score > 64./969 - freqs.size()*1./1938 ) + //if ( score > (1.3)* 1./(freqs.size() - 8)) + return static_cast(score * 1000); else return -1; }; @@ -176,7 +182,7 @@ void generateAllWords(const DictType &dict, AllWordsType &words_out, } for (unsigned int i = 2; i < words_out.size(); ++i) - wxLogDebug(wxT("With length %2d is %4d and after it %4d and coeff is %2.2f"), + wxLogDebug(wxT("With length %2d is %5d and after it %4d and coeff is %2.2f"), i, words_out_t.at(i).size(), words_out.at(i).size(), float(words_out_t.at(i).size())/words_out.at(i).size()); wxLogDebug(wxT("Number of words is %d"), dict.size()); diff --git a/wxCrossGen/wxCrossGen.project b/wxCrossGen/wxCrossGen.project index 888522e..6f43ab2 100644 --- a/wxCrossGen/wxCrossGen.project +++ b/wxCrossGen/wxCrossGen.project @@ -41,6 +41,10 @@ + + + + @@ -127,8 +131,4 @@ - - - - diff --git a/wxCrossGen/wxCrossGen.txt b/wxCrossGen/wxCrossGen.txt index 0208c0b..37eff4d 100644 --- a/wxCrossGen/wxCrossGen.txt +++ b/wxCrossGen/wxCrossGen.txt @@ -1 +1 @@ -../Release/main.cpp.o ../Release/fmain.cpp.o ../Release/fsettings.cpp.o ../Release/src_crossexport.cpp.o ../Release/src_crossgen.cpp.o ../Release/fbgui_fbgui.cpp.o +./Debug/main.cpp.o ./Debug/fmain.cpp.o ./Debug/fsettings.cpp.o ./Debug/src_crossexport.cpp.o ./Debug/src_crossgen.cpp.o ./Debug/fbgui_fbgui.cpp.o