Compare commits

16 Commits

Author SHA1 Message Date
57d5f1eafc Исправлено построение распределений для пятимерных функций 2021-05-09 13:58:07 +03:00
d29132b6a0 Добавили белый список для построения полного вывода 2021-05-08 20:19:56 +03:00
1ee802d8f2 Убраны лишние скобки для построения представлений функций 2021-05-08 20:19:39 +03:00
e9840a1bbb feat: Добавлена поддержка генерации всех представлений всех функций 2021-05-07 00:42:49 +03:00
1e4a37756f feat: Фильтр по степени полинома Ж. в постпроцессинге 2021-05-02 14:16:59 +03:00
c2ccb5e60f get_function_len: Added count_symmetricals method 2020-12-18 14:41:48 +03:00
b2431d07a7 Readme improvement 2020-06-29 10:57:20 +03:00
9feacee54e Readme improvement 2020-06-29 10:45:07 +03:00
dcff873fcc Added count_lens for get_function_len 2020-04-05 23:42:28 +03:00
d1998d0fd2 Added print_lens 2020-04-05 23:41:43 +03:00
041ff29388 Improved debug output 2020-03-29 23:29:52 +03:00
0976373ae4 Small fixes 2020-03-29 23:29:42 +03:00
0715c2d52f Base Python script for working with ranks added 2020-03-29 23:29:21 +03:00
230f37c9bb Improved rank saving
Now using full bit mask instead of functions list
2020-03-29 23:29:02 +03:00
3c0d408989 State saving + conditions optimization 2020-03-29 21:01:58 +03:00
e99f1874ad Improved tex output to file 2020-03-22 18:35:13 +03:00
3 changed files with 370 additions and 60 deletions

View File

@@ -1,3 +1,42 @@
# cmc-pseudo-polynomials
Курсовой проект за первый курс магистратуры ВМК МГУ
Курсовой проект за первый курс магистратуры ВМК МГУ
## Сборка
Требуется cmake и компилятор C++ с поддержкой C++14.
Если это не gcc, возможно, потребуется поправить CMakeLists.txt.
Команды для сборки могут быть такими:
```
mkdir build
cd build
cmake ..
make
```
Количество аргументов у функции изменяются в коде через изменение `ARGS_COUNT` в файле `main.cpp`.
Поддерживаются значения от 2 до 5.
Также может быть полезно отключить генерацию разбиения по классам эквивалентности, это параметр `ONLY_CREATE_CLASSES` в том же классе.
Это может ускорить полную генерацию и значительно сократит потребление памяти.
## Запуск
Для запуска достаточно собрать проект и вызвать
```
./main
```
## Сгенерированные результаты
Сгенерированные результаты для числа аргументов от 2 до 5 можно скачать [тут](https://storage2.likemath.ru:9000/public/cmc/espp_maps.zip).
## Анализ результатов
Результаты записываются в файлы вида `base_4_rank_3.txt` в рабочей директории.
Первое число означает число аргументов у функции, а второе длину функций в классе ПСПФ.
Сам файл представляет собой вектор битов,
где бит N выставлен если функция принадлежит множеству функций с фиксированным числом аргументов и длиной в классе ПСПФ.
Для анализа этих файлов удобно использовать `get_function_len.py` (требуется Python3). Для экономии места эти файлы можно предварительно сжать в XZ архивы. Есть несколько вариантов использования:
1. Получить длину некоторой функции. Пример `python3 get_function_len.py 10101010`, где `10101010` -- вектор значений функции.
2. Подсчёт количества функций по классам. Пример: `python3 get_function_len.py count_lens 3`, где 3 -- количество аргументов у функций.

170
get_function_len.py Normal file
View File

@@ -0,0 +1,170 @@
import lzma
import sys
import os
import itertools
from booleantools import generate_function
def is_good_zhegalkin(function_number, args_count):
return all(map(lambda x: len(x) <= 3, generate_function(function_number, args_count).listform))
def get_rank_file_object(args_count, rank_ind):
base_name = "base_{}_rank_{}.txt".format(args_count, rank_ind)
if os.path.exists(base_name):
return open(base_name, "rb")
elif os.path.exists(base_name + ".xz"):
return lzma.LZMAFile(base_name + ".xz", "rb")
else:
return None
def is_function_exists(function_number, file_object):
file_object.seek(function_number // 8)
result_byte = ord(file_object.read(1))
return (result_byte >> (7 - function_number % 8)) % 2 == 1
def function_values_to_number(values):
assert set(values).issubset({"0", "1"})
res = 0
for ch in values:
res *= 2
if ch == "1":
res += 1
return res
def count_lens(args_count, white_map_path):
bit_counts = bytes(bin(x).count("1") for x in range(256))
len_to_count = {}
white_map = None
if white_map_path:
white_map = open(white_map_path, "rb").read()
print(f"Total whitelisted functions: {sum(map(lambda x: bit_counts[x], white_map))}")
for rank_ind in range(1, 10):
file_object = get_rank_file_object(args_count, rank_ind)
if not file_object:
break
src_data = file_object.read()
if white_map:
assert len(src_data) == len(white_map)
src_data = bytes(x & y for x,y in zip(src_data, white_map))
len_to_count[rank_ind] = sum(map(lambda x: bit_counts[x], src_data))
total = sum(len_to_count.values())
for rank_ind, count in len_to_count.items():
print("len {} count {} share {}".format(rank_ind, count, 1.0 * count / total))
print("average: {}".format(sum([key * value for key, value in len_to_count.items()]) / total))
def get_function_len(args_count, function_number):
for rank_ind in range(1, 10):
file_object = get_rank_file_object(args_count, rank_ind)
if not file_object:
assert False
if is_function_exists(function_number, file_object):
return rank_ind
assert False
def count_symmetricals(args_count):
if args_count <= 4:
func_strings = list(open(f"./build/functions_strings_{args_count}.txt").read().split("\n")[:-1])
else:
func_strings = []
print(f"Total {len(func_strings)} representations")
functions_to_check = set()
for layer_values in itertools.product([0, 1], repeat=args_count + 1):
func_values = []
for argument_number in range(2 ** args_count):
if layer_values[bin(argument_number).count("1")]:
func_values.append("1")
else:
func_values.append("0")
function_number = function_values_to_number("".join(func_values))
functions_to_check.add(function_number)
function_ranks = {}
for fn in functions_to_check:
rank = get_function_len(args_count, fn)
if rank not in function_ranks:
function_ranks[rank] = []
function_ranks[rank].append(fn)
zhegalkin_functions = {key: list(filter(lambda x: is_good_zhegalkin(x, args_count), value)) for key,value in function_ranks.items()}
for processed_function_ranks in [function_ranks, zhegalkin_functions]:
if processed_function_ranks is zhegalkin_functions:
print("Со степенью <= 3:")
len_to_count = {key: len(value) for key, value in processed_function_ranks.items()}
total = sum(len_to_count.values())
for rank_ind, count in sorted(len_to_count.items(), key=lambda x: x[0]):
print("len {} count {} share {}".format(rank_ind, count, 1.0 * count / total))
print(
"average: {}".format(
sum([1.0 * key * value for key, value in len_to_count.items()]) / total
)
)
print(r"\begin{center}")
print(r"\begin{longtable}{| l| r | p{11cm}|}")
print(r"\hline")
if args_count <= 4:
print(r"Длина функции & Номер & ПСПФ\\")
else:
print(r"Длина функции & Номер & Полином Жегалкина\\")
print(r"\hline")
print(r"\endhead")
print(r"\hline \multicolumn{3}{r}{\textit{Продолжение на следующей странице}} \\")
print(r"\endfoot")
print(r"\hline")
print(r"\endlastfoot")
for rank in sorted(function_ranks.keys()):
for function_number in sorted(function_ranks[rank]):
if args_count <= 4:
function_representation = func_strings[function_number]
else:
function_representation = generate_function(function_number, args_count).tex_str().strip()
print(f"{rank} & {function_number} & ${function_representation}$\\\\")
if rank != max(function_ranks.keys()):
print(r"\hline")
print(r"\hline")
print(r"\end{longtable}")
print(r"\addtocounter{table}{-1}")
print(r"\end{center}")
def main():
if sys.argv[1] == "count_lens":
white_map_path = None
if len(sys.argv) > 3:
white_map_path = sys.argv[3]
count_lens(sys.argv[2], white_map_path)
return
elif sys.argv[1] == "count_symmetricals":
count_symmetricals(int(sys.argv[2]))
return
args_count = {
8: 3,
16: 4,
32: 5,
}[len(sys.argv[1])]
function_number = function_values_to_number(sys.argv[1])
print("Function number is {}".format(function_number))
for rank_ind in range(1, 10):
file_object = get_rank_file_object(args_count, rank_ind)
if not file_object:
print("No results file for {} arguments rank {}".format(args_count, rank_ind))
sys.exit(1)
if is_function_exists(function_number, file_object):
print("Result len is {}".format(rank_ind))
return
if __name__ == "__main__":
main()

219
main.cpp
View File

@@ -6,6 +6,7 @@
#include <cassert>
#include <fstream>
#include <string>
#include <algorithm>
#include "al_function.hpp"
#include "al_bool_matrix.hpp"
@@ -13,12 +14,19 @@
using namespace std;
typedef uint32_t Storage;
const size_t ARGS_COUNT = 5;
const size_t ARGS_COUNT = 4;
const size_t FUNCTION_LEN = 1ll << ARGS_COUNT;
const size_t FUNCTIONS_COUNT = 1ll << FUNCTION_LEN;
const bool ONLY_CREATE_CLASSES = false;
const bool GENERATE_ALL_REPRESENTATIONS = true;
typedef Function<Storage, FUNCTION_LEN> MyFunction;
typedef BoolSquareMatrix<Storage, ARGS_COUNT> MyMatrix;
static_assert(
GENERATE_ALL_REPRESENTATIONS || !ONLY_CREATE_CLASSES,
"representations required NOT ONLY_CREATE_CLASSES"
);
map<Storage, string> function_formulas;
void test_function() {
@@ -102,6 +110,18 @@ Function<STORAGE, 1ll << ARGUMENTS_COUNT > get_function_from_callable(Callable I
return Function<STORAGE, values_count >(function_values);
}
template<class CONTAINER>
void print_lens(CONTAINER len_map) {
map<int, size_t> counter;
for (const auto el: len_map) {
if ( counter.find(el) == counter.end() )
counter[el] = 0;
++counter[el];
}
for (auto&& el: counter)
cout << "len " << el.first << " count " << el.second << endl;
}
vector< MyMatrix > get_good_matrices() {
vector< MyMatrix > res;
for (size_t cur_val = 1ll << (ARGS_COUNT * ARGS_COUNT); cur_val != 0; --cur_val) {
@@ -117,6 +137,11 @@ vector< MyMatrix > get_good_matrices() {
vector<MyFunction> get_function_class(MyFunction f, const vector< MyMatrix >& tranformations, ostream& out) {
set<MyFunction> cur_res;
if constexpr ( GENERATE_ALL_REPRESENTATIONS ) {
// Тут достаточно использовать функциональный класс из одной функции.
// Хорошо работает для k=4 и меньше
return vector<MyFunction>{f};
}
for (Storage i = 0; i < FUNCTION_LEN; ++i) {
MyFunction cur_f = f;
for (Storage arg_ind = 0; arg_ind < ARGS_COUNT; ++arg_ind)
@@ -182,20 +207,12 @@ vector<MyFunction> get_linear_components() {
res.push_back(MyFunction("0101010101010101")); // f = x_4
function_formulas[res.back().value()] = "x_4";
} else if constexpr ( ARGS_COUNT == 5 ) {
res.push_back(MyFunction("00000000000000000000000000000000")); // f = 0
function_formulas[res.back().value()] = "0";
res.push_back(MyFunction("11111111111111111111111111111111")); // f = 1
function_formulas[res.back().value()] = "1";
res.push_back(MyFunction("00000000000000001111111111111111")); // f = x_1
function_formulas[res.back().value()] = "x_1";
res.push_back(MyFunction("00000000111111110000000011111111")); // f = x_2
function_formulas[res.back().value()] = "x_2";
res.push_back(MyFunction("00001111000011110000111100001111")); // f = x_3
function_formulas[res.back().value()] = "x_3";
res.push_back(MyFunction("00110011001100110011001100110011")); // f = x_4
function_formulas[res.back().value()] = "x_4";
res.push_back(MyFunction("01010101010101010101010101010101")); // f = x_5
function_formulas[res.back().value()] = "x_5";
} else {
assert (("bad args_count", false));
}
@@ -221,7 +238,7 @@ vector<MyFunction> get_linear_combinations(const vector<MyFunction> &linear_comp
}
string preprocess_factor(string factor) {
if ( factor.find("+") != string::npos and factor.find("*") == string::npos )
if ( factor.find("+") != string::npos and factor.find("*") == string::npos and factor[0] !='(' and factor[factor.size() - 1] != ')')
return "(" + factor + ")";
return factor;
}
@@ -235,11 +252,10 @@ vector<MyFunction> get_all_monomials(const vector<MyFunction> &linear_combinatio
for (auto el_second: res) {
bool is_added_now = res.insert(el_first and el_second).second;
if ( is_added_now ) {
if constexpr ( ARGS_COUNT < 5 ) {
Storage cur_value = (el_first and el_second).value();
function_formulas[cur_value] = preprocess_factor(function_formulas[el_first.value()])
+ " * " + preprocess_factor(function_formulas[el_second.value()]);
}
Storage cur_value = (el_first and el_second).value();
function_formulas[cur_value] = preprocess_factor(function_formulas[el_first.value()])
+ " " + preprocess_factor(function_formulas[el_second.value()]);
// no symbol for multiplication needed
}
is_added = is_added or is_added_now;
}
@@ -256,91 +272,158 @@ inline bool exists_test0 (const std::string& name) {
return f.good();
}
string get_out_file_name(size_t rank_ind) {
return string("base_" + to_string(ARGS_COUNT) + "_rank_" + to_string(rank_ind) + ".txt");
}
size_t recover_ranks(vector< vector<MyFunction> >& ranks, vector<int8_t>& used_map, size_t& functions_remains) {
for (size_t rank_ind = 2; true; ++rank_ind) {
ifstream f_in(get_out_file_name(rank_ind).c_str(), std::ios::binary);
if ( not f_in.good() )
return rank_ind;
ranks.push_back(vector<MyFunction>());
while ( not f_in.eof() ) {
Storage function_value;
f_in >> function_value;
MyFunction f(function_value);
used_map.at(f.value()) = rank_ind;
--functions_remains;
ranks.at(rank_ind).push_back(f);
for (Storage bytes_cnt = 0; bytes_cnt < FUNCTIONS_COUNT / 8; ++bytes_cnt) {
uint8_t buf;
f_in.read(reinterpret_cast<char*>(&buf), 1);
for (int8_t bits_cnt = 7; bits_cnt >= 0; --bits_cnt) {
bool is_exists = buf % 2;
if ( is_exists ) {
Storage function_value = bytes_cnt * 8 + bits_cnt;
MyFunction f(function_value);
if (used_map.at(f.value()))
continue;
used_map.at(f.value()) = rank_ind;
--functions_remains;
ranks.back().push_back(f);
}
buf /= 2;
}
}
ranks.back().shrink_to_fit();
}
}
void save_rank(size_t rank_ind, vector<MyFunction>& rank_items) {
if ( not is_sorted(rank_items.begin(), rank_items.end()) )
exit(1);
ofstream f_out(get_out_file_name(rank_ind).c_str(), ios::binary);
for (auto f: rank_items) {
Storage function_value = f.value();
f_out << reinterpret_cast<char*>(&function_value), sizeof(function_value);
size_t fn_ptr = 0;
for (Storage bytes_cnt = 0; bytes_cnt < FUNCTIONS_COUNT / 8; ++bytes_cnt) {
uint8_t buf = 0;
for (Storage bits_cnt = 0; bits_cnt < 8; ++bits_cnt) {
buf *= 2;
if ( fn_ptr >= rank_items.size() or rank_items[fn_ptr].value() != bytes_cnt * 8 + bits_cnt )
continue;
++buf;
++fn_ptr;
}
f_out.write(reinterpret_cast<char*>(&buf), 1);
}
}
void clean_trash_ranks(vector< vector<MyFunction> >& ranks) {
for (size_t rank_ind = 2; rank_ind < ranks.size() - 1; ++rank_ind)
ranks.at(rank_ind).clear();
}
void fill_ranks(vector<MyFunction> monomials) {
vector<int8_t> used_map(FUNCTIONS_COUNT, 0);
size_t functions_remains = FUNCTIONS_COUNT;
ofstream f_out("out.txt");
vector< vector<MyFunction> > ranks;
ranks.push_back(vector<MyFunction>()); // empty set
ranks.push_back(monomials); // empty set
save_rank(1, monomials);
cout << "rank index = " << 1 << endl;
for (auto el: monomials) {
--functions_remains;
used_map.at(el.value()) = 1;
}
size_t total_ranks = recover_ranks(ranks, used_map, functions_remains);
cout << " total_ranks = " << total_ranks << endl;
for (total_ranks = 1; functions_remains; ++total_ranks) {
ranks.push_back(vector<MyFunction>());
cout << "rank index = " << total_ranks << " remains: " << functions_remains << endl;
for (auto el_first: ranks.at(total_ranks - 1)) {
for (auto el_second: ranks.at(1)) {
MyFunction res_el = el_first xor el_second;
if ( used_map[res_el.value()] == 0 ) {
--functions_remains;
if constexpr ( ARGS_COUNT < 5 ) {
size_t total_ranks = 2;
if constexpr ( ONLY_CREATE_CLASSES ) {
total_ranks = recover_ranks(ranks, used_map, functions_remains);
}
clean_trash_ranks(ranks);
cout << "recovered to " << total_ranks << endl;
cout << "current ranks: " << endl;
for (auto&& r: ranks)
cout << r.size() << " ";
cout << endl;
for (; functions_remains; ++total_ranks) {
cout << "rank index = " << total_ranks << " remains: " << functions_remains << endl;
vector<int8_t> temp_used_map(FUNCTIONS_COUNT, 0);
for (size_t ind_first = 0; ind_first != ranks[1].size(); ++ind_first) {
if ( ind_first % 20 == 0 ) {
for (auto&& r: ranks)
cout << r.size() << " ";
cout << endl;
cout << 100. * ind_first / ranks[1].size() << "% for " << ranks[1].size()
<< " x " << ranks.back().size() << endl;
}
const auto el_first = ranks[1][ind_first];
for (auto el_second: ranks.back()) {
MyFunction res_el = el_first xor el_second;
if constexpr ( not ONLY_CREATE_CLASSES ) {
if ( used_map[res_el.value()] == 0 ) {
function_formulas[res_el.value()] =
preprocess_monom(function_formulas[el_first.value()]) +
" + " + preprocess_monom(function_formulas[el_second.value()]);
}
used_map.at(res_el.value()) = total_ranks;
ranks.at(total_ranks).push_back(res_el);
}
temp_used_map[res_el.value()] = total_ranks;
}
}
if ( total_ranks - 1 > 1 )
ranks.at(total_ranks - 1).clear(); // больше не нужен
cout << "size for rank " << total_ranks << " is " << ranks.at(total_ranks).size() << endl;
save_rank(total_ranks, ranks.at(total_ranks));
}
if ( ranks.at(ranks.size() - 1).size() != 0 ) {
ranks.push_back(vector<MyFunction>());
total_ranks += 2;
clean_trash_ranks(ranks);
for (size_t func_ind = 0; func_ind < temp_used_map.size(); ++func_ind) {
if (not temp_used_map[func_ind] or used_map[func_ind] != 0)
continue;
--functions_remains;
auto cur_fn = MyFunction(func_ind);
auto val = cur_fn.value();
if (val >= used_map.size()) {
cout << val << " " << cur_fn.string() << endl;
continue;
}
used_map[val] = total_ranks;
ranks.back().push_back(cur_fn);
}
cout << "remains: " << functions_remains << endl;
save_rank(total_ranks, ranks.back());
cout << "size for rank " << total_ranks << " is " << ranks.at(total_ranks).size() << endl;
}
cout << "ranks=" << total_ranks << endl;
for (size_t i = 0; i < ranks.size(); ++i) {
cout << "i=" << i << endl;
for (auto v: ranks.at(i))
cout << " " << v.string() << endl;
if constexpr ( ONLY_CREATE_CLASSES ) {
return;
}
print_lens(used_map);
auto possible_tranformations = get_good_matrices();
cout << "Total " << possible_tranformations.size() << " linear tranformations" << endl;
ofstream f_out("out.tex");
f_out << "\\begin{longtable}{| l| l | l | p{70mm} |}" << endl
<< "\\hline" << endl
<< "\\endhead" << endl
<< "\\hline \\multicolumn{4}{r}{\\textit{Продолжение на следующей странице}} \\\\" << endl
<< "\\endfoot" << endl
<< "\\hline" << endl
<< "\\endlastfoot" << endl
<< "\\hline" << endl
<< "Номер класса & Длина & Размер класса & ПСПФ\\\\" << endl
<< "\\hline" << endl;
ranks.clear();
for (auto i = total_ranks - 1; i != 0; --i)
ranks.push_back(vector<MyFunction>()); // empty set
@@ -354,9 +437,9 @@ void fill_ranks(vector<MyFunction> monomials) {
++total_unique_functions;
MyFunction current_fn(fn_value);
vector<MyFunction> function_class = get_function_class(
current_fn, possible_tranformations, f_out
current_fn, possible_tranformations, cout
);
f_out << "size of function class " << current_fn.string() << " is " << function_class.size() << endl;
// cout << "size of function class " << current_fn.string() << " is " << function_class.size() << endl;
class_sizes[current_fn] = function_class.size();
for (auto marked_function: function_class) {
if ( used_map[marked_function.value()] == 0 )
@@ -365,19 +448,30 @@ void fill_ranks(vector<MyFunction> monomials) {
used_map[marked_function.value()] = 0;
++total_functions;
}
cout << "cur_rank - 1 =" << cur_rank - 1 << endl;
ranks.at(cur_rank - 1).push_back(current_fn);
}
if ( total_functions != FUNCTIONS_COUNT)
cout << "total counted functions: " << total_functions
<< " but must be " << FUNCTIONS_COUNT << endl;
cout << "total function classes: " << total_unique_functions << endl;
size_t function_index = 1;
for (size_t rank_ind = 0; rank_ind < ranks.size(); ++rank_ind) {
f_out << "rank index = " << rank_ind + 1 << endl;
for (auto f: ranks.at(rank_ind))
f_out << f.string() << " size: " << class_sizes[f] << " "
cout << "rank index = " << rank_ind + 1 << endl;
f_out << "\\hline" << endl;
for (auto f: ranks.at(rank_ind)) {
cout << f.string() << " size: " << class_sizes[f] << " "
<< function_formulas[f.value()] << endl;
f_out << " " << function_index << " & " << rank_ind + 1 << " & "
<< class_sizes[f] << " & $" << function_formulas[f.value()] << "$ \\\\" << endl;
++function_index;
}
}
f_out << " \\hline" << endl
<< "\\end{longtable}" << endl
<< "\\captionof{figure}{Классы псевдополиномов}" << endl
<< "\\label{fig:polynoms}" << endl
<< "\\addtocounter{table}{-1}" << endl;
f_out.close();
}
@@ -399,5 +493,12 @@ int main() {
cout << "Monomials: " << monomials.size() << endl;
fill_ranks(monomials);
if constexpr ( GENERATE_ALL_REPRESENTATIONS ) {
ofstream f_out(("functions_strings_" + to_string(ARGS_COUNT) + ".txt").c_str());
for (size_t i = 0; i < FUNCTIONS_COUNT; ++i) {
f_out << function_formulas[i] << "\n";
}
}
return 0;
}