Zoltan2
Loading...
Searching...
No Matches
largestComponent2Binary.cpp
Go to the documentation of this file.
1// @HEADER
2//
3// ***********************************************************************
4//
5// Zoltan2: A package of combinatorial algorithms for scientific computing
6// Copyright 2012 Sandia Corporation
7//
8// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9// the U.S. Government retains certain rights in this software.
10//
11// Redistribution and use in source and binary forms, with or without
12// modification, are permitted provided that the following conditions are
13// met:
14//
15// 1. Redistributions of source code must retain the above copyright
16// notice, this list of conditions and the following disclaimer.
17//
18// 2. Redistributions in binary form must reproduce the above copyright
19// notice, this list of conditions and the following disclaimer in the
20// documentation and/or other materials provided with the distribution.
21//
22// 3. Neither the name of the Corporation nor the names of the
23// contributors may be used to endorse or promote products derived from
24// this software without specific prior written permission.
25//
26// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37//
38// Questions? Contact Seher Acer (sacer@sandia.gov)
39// Erik Boman (egboman@sandia.gov)
40// Siva Rajamanickam (srajama@sandia.gov)
41// ***********************************************************************
42//
43// @HEADER
44//
46// Written by Seher Acer, 2019
47// This code reads a mtx file and writes a binary file in CRS format:
48// [nrows nnzs rowPtr[0 nrows] colInd[0 nnzs]
49// 1) It symmetrizes the input, regardless of it is already symmetric or not
50// 2) It removes the diagonal entries
51// 3) The column indices are sorted in increasing order for each row
52// 4) Index base for the output file is 0
54// The corresponding file reader exists in "readMatrixFromBinaryFile.hpp"
55// Note: This is research code. We do not guarantee it works in all cases.
57
58#include <iostream>
59#include <fstream>
60#include <sstream>
61#include <set>
62#include <vector>
63#include <algorithm>
64#include <queue>
65
66typedef long long ord_type;
67
69 ord_type inn, ord_type *inRowPtr, ord_type *inColInd,
70 ord_type &outn, ord_type *outRowPtr, ord_type *outColInd)
71{
72
73 std::vector<int> mark(inn, -1);
74 std::queue<ord_type> active;
75 ord_type i, v, lastSearch, root, popCnt = 0, ncc = 0;
76
77 root = seed;
78 lastSearch = 0;
79
80 while(popCnt < inn) {
81 active.push(root);
82 mark[root] = ncc;
83
84 while(active.empty() == false) {
85
86 i = active.front();
87 active.pop();
88 popCnt ++;
89
90 for(ord_type j = inRowPtr[i]; j < inRowPtr[i+1]; ++j) {
91 v = inColInd[j];
92 if(mark[v] == -1) {
93 mark[v] = ncc;
94 active.push(v);
95 }
96 }
97 }
98
99 //search for an unmarked vertex to be the next root
100 for(ord_type i_r = lastSearch; i_r < inn; ++i_r) {
101 if(mark[i_r] == -1) {
102 root = i_r;
103 lastSearch = i_r;
104 break;
105 }
106 }
107
108 //increase component id
109 ncc++;
110 }
111
112 // find component sizes
113 std::vector<ord_type> compSize(ncc, 0);
114 for(ord_type i_c = 0; i_c < inn; ++i_c) {
115 if(mark[i_c] == -1) {
116 std::cout << "Vertex " << i_c << " is untouched,, Exiting!\n";
117 exit(1);
118 }
119 else
120 compSize[mark[i_c]]++;
121 }
122
123 // find the largest component
124 ord_type maxSize = 0;
125 ord_type maxId = -1;
126 for(ord_type i_l = 0; i_l < ncc; ++i_l) {
127 if(compSize[i_l] > maxSize) {
128 maxSize = compSize[i_l];
129 maxId = i_l;
130 }
131 }
132
133 // renumber the vertices
134 outn = 0;
135 for(ord_type i_v = 0; i_v < inn; ++i_v) {
136 if(mark[i_v] == maxId)
137 mark[i_v] = outn++;
138 else
139 mark[i_v] = -1;
140 }
141
142 if(outn != maxSize) {
143 std::cout << "The number of vertices in this component: " << outn << " does not match the maximum component size: " << maxSize << "\n";
144 exit(1);
145 }
146
147 // write the largest component to the output arrays
148 ord_type ptr = 0;
149 outn = 0;
150 outRowPtr[outn] = 0;
151 for(ord_type ii = 0; ii < inn; ii++) {
152 if(mark[ii] > -1){
153 for(ord_type j = inRowPtr[ii]; j < inRowPtr[ii+1]; ++j) {
154 v = inColInd[j];
155 if(mark[v] == -1) {
156 std::cout << "Neighbor " << v << " of " << ii << " is found to be absent in the component\n";
157 exit(1);
158 }
159 outColInd[ptr++] = mark[v];
160 }
161 outn++;
162 outRowPtr[outn] = ptr;
163 }
164 }
165
166 if(outn != maxSize) {
167 std::cout << "The number of vertices written: " << outn << " does not match the maximum component size: " << maxSize << "\n";
168 exit(1);
169 }
170
171}
172
173int main(int argc, char* argv[])
174{
175
176 std::string line;
177 ord_type nrows, nnzs, r, c = 0;
178
179 std::ifstream in(argv[1]);
180
181 do{
182 getline(in, line);
183 }
184 while(line[0] == '%');
185
186 std::stringstream ss1(line);
187 ss1 >> nrows >> nrows >> nnzs;
188
189 std::vector<bool> diag(nrows, false);
190
191 ord_type *rowPtr = new ord_type[nrows+2];
192 for(ord_type i = 0; i < nrows+2; i++)
193 rowPtr[i] = 0;
194
195 while(getline(in, line)) {
196
197 std::stringstream ss2(line);
198 ss2 >> r >> c;
199 r--;
200 c--;
201
202 if(r != c) {
203 rowPtr[r+2]++;
204 rowPtr[c+2]++;
205 }
206 }
207 in.close();
208
209 for(ord_type j0 = 0; j0 < nrows; j0++)
210 rowPtr[j0+2]++;
211
212
213 for(ord_type k = 2; k < nrows+2; k++)
214 rowPtr[k] += rowPtr[k-1];
215
216 ord_type *colInd = new ord_type[rowPtr[nrows+1]];
217
218 // re-read from the beginning, skip the intro
219 in.open(argv[1]);
220 do {
221 getline(in, line);
222 }
223 while(line[0] == '%');
224
225
226 while(getline(in, line)) {
227
228 std::stringstream ss2(line);
229 ss2 >> r >> c;
230 r--;
231 c--;
232
233 if(r != c) {
234 colInd[rowPtr[r+1]++] = c;
235 colInd[rowPtr[c+1]++] = r;
236 }
237
238 }
239 in.close();
240
241
242 for(ord_type i0 = 0; i0 < nrows; i0++)
243 colInd[rowPtr[i0+1]++] = i0;
244
245
246 ord_type *rowPtrNew = new ord_type[nrows+1];
247 ord_type *colIndNew = new ord_type[rowPtr[nrows+1]];
248
249 rowPtrNew[0] = 0;
250 ord_type ptr = 0;
251 ord_type prev = -1;
252 for(ord_type i1 = 0; i1 < nrows; i1++) {
253
254 ord_type deg = rowPtr[i1+1] - rowPtr[i1];
255 if(deg > 0) {
256
257 std::sort(&colInd[rowPtr[i1]], &colInd[rowPtr[i1+1]]);
258 colIndNew[ptr++] = colInd[rowPtr[i1]];
259 prev = colInd[rowPtr[i1]];
260 for(ord_type j1 = rowPtr[i1]+1; j1 < rowPtr[i1+1]; j1++) {
261 if(colInd[j1] != prev) {
262 colIndNew[ptr++] = colInd[j1];
263 prev = colInd[j1];
264 }
265 }
266 }
267
268 rowPtrNew[i1+1] = ptr;
269 }
270
271 for(ord_type i2 = 0; i2 < nrows; i2++) {
272 for(ord_type j2 = rowPtrNew[i2]; j2 < rowPtrNew[i2+1]; ++j2)
273 if(colIndNew[j2] == i2)
274 diag[i2] = true;
275
276 if(diag[i2] == false)
277 std::cout << "ROW " << i2 << " misses diagonal\n";
278 }
279
280 std::cout << argv[1] << " " << nrows << " " << ptr << " ";
281
282 ord_type newnrows = -1;
283 findLargestComponent(0, //seed
284 nrows, rowPtrNew, colIndNew,
285 newnrows, rowPtr, colInd );
286 ptr = rowPtr[newnrows]; //new number of nonzeros
287
288 ord_type deg, max = 0;
289 for(ord_type i3 = 0; i3 < nrows; i3++) {
290 deg = rowPtrNew[i3+1] - rowPtrNew[i3];
291 if(deg > max)
292 max = deg;
293 }
294
295 // write into the output file
296 std::ofstream out(argv[2], std::ios::out | std::ios::binary);
297 out.write((char*)&newnrows, sizeof(ord_type));
298 out.write((char*)&ptr, sizeof(ord_type));
299
300 out.write((char*)rowPtr, sizeof(ord_type)*(newnrows+1));
301 out.write((char*)colInd, sizeof(ord_type)*(ptr));
302
303 out.close();
304
305 std::cout << newnrows << " " << ptr << " " << max << "\n";
306
307 delete [] rowPtr;
308 delete [] colInd;
309
310 delete [] rowPtrNew;
311 delete [] colIndNew;
312
313 return 0;
314}
int main()
void findLargestComponent(ord_type seed, ord_type inn, ord_type *inRowPtr, ord_type *inColInd, ord_type &outn, ord_type *outRowPtr, ord_type *outColInd)
long long ord_type