数据

64*64,128*128, 512*5121024*1024双精度浮点数矩阵。

下载

要求

  • 所有数据文件存放在和可执行文件相同的目录下
  • 程序命名:学号.exe(Windows下)或学号.c(Linux下)
  • 时间比赛程序输入:以上mx.rar中的两个1024*1024矩阵;输出:学号.txt,格式和M1024A.txt一致
  • 所有同学的程序按照随机程序运行三遍,取平均值为最终时间。
  • 运行环境:CPU:i7-6700K, 内存64GB,64位 Windows 10/Ubuntu, 所有文件存放在RAMDISK(用内存模拟硬盘)上。

代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#include <Windows.h>
#include <fstream>
#include <iostream>
#include <iomanip>

const int THREADS = 8;
const int N = 1024;
const int DATA_SIZE = N / THREADS;
const char* MATRIX_A_FILE = "M1024A.txt";
const char* MATRIX_B_FILE = "M1024B.txt";
const char* RESULT_FILE = "1927405160.txt";

/* global variables */
double matrix_a[N][N];
double matrix_b[N][N];
double result[N][N];

struct THR_FUNC_PARAM {
int a_start;
int a_end;
int b_start;
int b_end;
};

/* thread function */
DWORD thr_func_calc(LPVOID IpParam) {
// args *arg = (args *) _arg;
THR_FUNC_PARAM* pmd = (THR_FUNC_PARAM*) IpParam;
int i, j, k;
for (i = pmd->a_start; i < pmd->a_end; ++i) {
for (j = pmd->b_start; j < pmd->b_end; ++j) {
result[i][j] = 0;
for (k = 0; k < N; ++k) {
result[i][j] += matrix_a[i][k] * matrix_b[j][k];
}
}
}
return 0;
}

DWORD thr_read_matrix_A() {
int i, j;
double number;
std::ifstream fin(MATRIX_A_FILE);
if (!fin.is_open()) {
std::cerr << "can't read file:" << MATRIX_A_FILE << '\n';
return 1;
}
for (i = 0; i < N; ++i) {
for (j = 0; j < N; ++j) {
fin >> number;
matrix_a[i][j] = number;
}
}
fin.close();
return 0;
}

DWORD thr_read_matrix_B() {
int i, j;
double number;
std::ifstream fin(MATRIX_B_FILE);
if (!fin.is_open()) {
std::cerr << "can't read file:" << MATRIX_B_FILE << '\n';
return 1;
}
for (i = 0; i < N; ++i) {
for (j = 0; j < N; ++j) {
fin >> number;
matrix_b[j][i] = number;
}
}
fin.close();
return 0;
}

int write_matrix(const char* file_name, double matrix[N][N]) {
int i, j;
std::ofstream out(file_name, std::ios::out);
if (!out.is_open()) {
std::cerr << "can't write file:" << file_name << '\n';
return 1;
}
out << std::setiosflags(std::ios::fixed);
for (i = 0; i < N; ++i) {
for (j = 0; j < N; ++j) {
out << matrix[i][j] << ' ';
}
out << '\n';
}
out.close();
return 0;
}

int read_matrix() {
HANDLE hThread[2];
hThread[0] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)thr_read_matrix_A, NULL, 0, NULL);
hThread[1] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)thr_read_matrix_B, NULL, 0, NULL);;
WaitForMultipleObjects(2, hThread, TRUE, INFINITE);
return 0;
}

THR_FUNC_PARAM params[THREADS];

int main() {
std::ios::sync_with_stdio(false);
int i, start, end;
/* read matrix a and b */
read_matrix();

/* create threads */
HANDLE threads[THREADS];
for (i = 0; i < THREADS / 2; ++i) {
// 2 * i , 2 * i + 1
start = i * DATA_SIZE * 2;
end = (i + 1) * DATA_SIZE * 2;
params[2 * i].a_start = start;
params[2 * i].a_end = end;
params[2 * i].b_start = 0;
params[2 * i].b_end = N / 2;
params[2 * i + 1].a_start = start;
params[2 * i + 1].a_end = end;
params[2 * i + 1].b_start = N / 2;
params[2 * i + 1].b_end = N;
threads[2 * i] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)thr_func_calc, &params[2 * i], 0, NULL);
threads[2 * i + 1] = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)thr_func_calc, &params[2 * i + 1], 0, NULL);
}

/* join threads */
WaitForMultipleObjects(THREADS, threads, TRUE, INFINITE);

/* write result */
write_matrix(RESULT_FILE, result);
return 0;
}