TRACO   an automatic parallelizing and optimizing compiler

based on the transitive closure of dependence graphs

//rna.c		
#pragma scop
for (i = N-1; i >= 0; i--) {
 for (j = i+1; j < N; j++) {
  for (k = 0; k < j-i; k++) {
	 S[i][j] = MAX(S[i][k+i] + S[k+i+1][j], S[i][j]); 
  }
  S[i][j] = MAX(S[i][j], S[i+1][j-1]) + MAX(i,j);
 }
}
#pragma endscop
		
$issf rna.c --tiling5=1,96,16 --rplus=iterate

/\__  _\ /\  == \   /\  __ \   /\  ___\   /\  __ \   
\/_/\ \/ \ \  __<   \ \  __ \  \ \ \____  \ \ \/\ \  
   \ \_\  \ \_\ \_\  \ \_\ \_\  \ \_____\  \ \_____\ 
    \/_/   \/_/ /_/   \/_/\/_/   \/_____/   \/_____/ 

      An Automatic Parallelizer and Optimizer
based on the TRAnsitive ClOsure of dependence graphs
             traco.sourceforge.net               

Dependence analysis: time taken:  0.0985560417175 seconds.
R
[N] -> { [i, j, k, _v1 = 10] -> [i', j' = i + j - i', 0, _v2 = 12] : 0 <= k < -i + j and i' >= -1 + i and i' >= 0 and -N + i + j < i' <= i; [i, j, k, _v1 = 10] -> [i' = i, j' = j, o2, _v2 = 10] : i >= 0 and j < N and k >= 0 and k < o2 < -i + j; [i, j, k, _v1 = 10] -> [i' = i, j', -i + j, _v2 = 10] : i >= 0 and 0 <= k < -i + j and j < j' < N; [i, j, k, _v1 = 10] -> [i', j' = j, -1 + i - i', _v2 = 10] : j < N and 0 <= k < -i + j and 0 <= i' < i; [i, j, k = 0, _v1 = 12] -> [i' = i, j', -i + j, _v2 = 10] : i >= 0 and j > i and j < j' < N; [i, j, k = 0, _v1 = 12] -> [i', j' = j, -1 + i - i', _v2 = 10] : i < j < N and 0 <= i' < i; [i, j, k = 0, _v1 = 12] -> [i' = -1 + i, j' = 1 + j, 0, _v2 = 12] : i > 0 and i < j <= -2 + N }
Transitive closure: time taken:  3.10718417168 seconds.
!! exact_rplus True
R*
[N] -> { [i, j, i2, 10] -> [i', j', o2, 10] : 0 <= i2 < -i + j and i' >= 0 and j < j' < N and ((0 <= o2 <= -2 + i - i') or (i' < i and i - i' <= o2 < -i' + j')); [i, j, i2, 10] -> [i', j' = j, 0, 12] : j < N and 0 <= i2 < -i + j and 0 <= i' <= i; [i, j, i2, 10] -> [i', j', 0, 12] : 0 <= i2 < -i + j and 0 <= i' <= i and j' < N and ((i' <= -2 + i and j' > j) or (i' >= -1 + i and j' >= i + j - i')); [i, j, i2 = 0, 12] -> [i', j' = j, o2, 10] : j < N and 0 <= i' < i and i - i' <= o2 < j - i'; [i, j, i2 = 0, 12] -> [i', j', o2, 10] : j > i and i' >= 0 and j' < N and ((j' >= j and 0 <= o2 < i - i') or (i' <= -2 + i and j' > j and i - i' <= o2 < -i' + j') or (-1 + i <= i' <= i and j - i' <= o2 < -i' + j')); [i, j, i2, 10] -> [i' = i, j' = j, o2, 10] : i >= 0 and j < N and i2 >= 0 and i2 < o2 < -i + j; [i, j, i2 = 0, 12] -> [i' = -1 + i, j', o2, 10] : i > 0 and j < j' < N and 0 < o2 <= -i + j; [i, j, i2, 10] -> [i' = i, j', o2, 10] : i >= 0 and 0 <= i2 < -i + j and j' < N and -i + j <= o2 < -i + j'; [i, j, i2, 10] -> [i', j' = j, o2, 10] : j < N and 0 <= i2 < -i + j and 0 <= i' < i and 0 <= o2 < j - i'; [i, j, i2, 10] -> [i', j', -1 + i - i', 10] : 0 <= i2 < -i + j and 0 <= i' < i and j <= j' < N; [i, j, i2 = 0, 12] -> [i', j', 0, 12] : j > i and 0 <= i' <= i and j' >= j and -i + j + i' < j' < N }
[u'i', u'j', u'k']
[N] -> { [i, j, k] : N > 0 and 0 <= i <= -2 + N and i < j < N and 0 <= k < -i + j }
[N] -> { [i, j] : N > 0 and 0 <= i <= -2 + N and i < j < N }
TILE
10:	[N, ii, jj, kk] -> { [i = -1 + N - ii, j, k, 10] : N > 0 and 0 < ii < N and jj >= 0 and kk >= 0 and j >= N - ii + 96jj and N - ii <= j <= 95 + N - ii + 96jj and j < N and k >= 16kk and 0 <= k <= -N + ii + j and k <= 15 + 16kk }
12:	[N, ii, jj, kk] -> { [i = -1 + N - ii, j, k = 0, 12] : kk = 0 and N > 0 and 0 < ii < N and jj >= 0 and j >= N - ii + 96jj and N - ii <= j <= 95 + N - ii + 96jj and j < N }
TILE_LT
10:	[N, ii, jj, kk] -> { [i, j, k, 10] : i >= N - ii and i >= 0 and j < N and 0 <= k < -i + j; [i, j, k = 0, 12] : i >= N - ii and i >= 0 and i < j < N; [i = -1 + N - ii, j, k, 10] : ii < N and j <= 95 + N - ii + 96jj and j < N and 0 <= k <= -N + ii + j and (j < N - ii + 96jj or (j >= N - ii + 96jj and k < 16kk)); [i = -1 + N - ii, j, k = 0, 12] : ii < N and N - ii <= j < N - ii + 96jj and j < N }
12:	[N, ii, jj, kk] -> { [i, j, k, 10] : i >= N - ii and i >= 0 and j < N and 0 <= k < -i + j; [i, j, k = 0, 12] : i >= N - ii and i >= 0 and i < j < N; [i = -1 + N - ii, j, k, 10] : ii < N and j <= 95 + N - ii + 96jj and j < N and 0 <= k <= -N + ii + j; [i = -1 + N - ii, j, k = 0, 12] : ii < N and j <= 95 + N - ii + 96jj and j < N and ((jj >= 0 and kk > 0 and j >= N - ii + 96jj) or (N - ii <= j < N - ii + 96jj)) }
TILE_GT
10:	[N, ii, jj, kk] -> { [i, j, k, 10] : 0 <= i <= -2 + N - ii and j < N and 0 <= k < -i + j; [i, j, k = 0, 12] : 0 <= i <= -2 + N - ii and i < j < N; [i = -1 + N - ii, j, k, 10] : ii < N and N - ii + 96jj <= j < N and 0 <= k <= -N + ii + j and (j >= 96 + N - ii + 96jj or (j <= 95 + N - ii + 96jj and k >= 16 + 16kk)); [i = -1 + N - ii, j, k = 0, 12] : ii < N and j >= N - ii + 96jj and N - ii <= j < N }
12:	[N, ii, jj, kk] -> { [i, j, k, 10] : 0 <= i <= -2 + N - ii and j < N and 0 <= k < -i + j; [i, j, k = 0, 12] : 0 <= i <= -2 + N - ii and i < j < N; [i = -1 + N - ii, j, k, 10] : ii < N and 96 + N - ii + 96jj <= j < N and 0 <= k <= -N + ii + j; [i = -1 + N - ii, j, k = 0, 12] : ii < N and N - ii + 96jj <= j < N and ((j >= 96 + N - ii + 96jj and j >= N - ii) or (jj >= 0 and kk < 0 and j <= 95 + N - ii + 96jj)) }
TILE_ITR
10:	[N, ii, jj, kk] -> { [i = -1 + N - ii, j, k, 10] : ii < N and kk >= 0 and N - ii + 96jj <= j <= 95 + N - ii + 96jj and j < N and 16kk <= k <= 15 + 16kk and k <= 96jj }
12:	[N, ii, jj, kk] -> { [i = -1 + N - ii, j, k = 0, 12] : kk = 0 and ii < N and jj >= 0 and N - ii + 96jj <= j <= 95 + N - ii + 96jj and j < N }
TVLD_LT
10:	[N, ii, jj, kk] -> { [i, j, k, i3] : 1 = 0 }
12:	[N, ii, jj, kk] -> { [i = -1 + N - ii, j, k, 10] : kk = 0 and ii < N and jj >= 0 and j <= 95 + N - ii + 96jj and j < N and 96jj < k <= -N + ii + j }
TILE_VLD
10:	[N, ii, jj, kk] -> { [i = -1 + N - ii, j, k, 10] : ii < N and kk >= 0 and N - ii + 96jj <= j <= 95 + N - ii + 96jj and j < N and 16kk <= k <= 15 + 16kk and k <= 96jj }
12:	[N, ii, jj, kk] -> { [i = -1 + N - ii, j, k, 10] : kk = 0 and ii < N and jj >= 0 and j <= 95 + N - ii + 96jj and j < N and 96jj < k <= -N + ii + j; [i = -1 + N - ii, j, k = 0, 12] : kk = 0 and ii < N and jj >= 0 and N - ii + 96jj <= j <= 95 + N - ii + 96jj and j < N }
TILE_VLD_EXT
10:	[N] -> { [i0, i1, i2, -1 + N - i0, i4, i5, 10] : i0 < N and i2 >= 0 and N - i0 + 96i1 <= i4 <= 95 + N - i0 + 96i1 and i4 < N and 16i2 <= i5 <= 15 + 16i2 and i5 <= 96i1 }
12:	[N] -> { [i0, i1, 0, -1 + N - i0, i4, i5, 10] : i0 < N and i1 >= 0 and i4 <= 95 + N - i0 + 96i1 and i4 < N and 96i1 < i5 <= -N + i0 + i4; [i0, i1, 0, -1 + N - i0, i4, 0, 12] : i0 < N and i1 >= 0 and N - i0 + 96i1 <= i4 <= 95 + N - i0 + 96i1 and i4 < N }
RMaps
10:	{ [ii, jj, kk, i, j, k, 10] -> [0, ii, 0, jj, 0, kk, 0, i, 0, j, 0, k, 10] }
12:	{ [ii, jj, kk, i, j, k, 12] -> [0, ii, 0, jj, 1, kk, 0, i, 0, j, 1, k, 12]; [ii, jj, kk, i, j, k, 10] -> [0, ii, 0, jj, 1, kk, 0, i, 0, j, 0, k, 10] }
TILE_VLD_EXT after Map
10:	[N] -> { [0, i1, 0, i3, 0, i5, 0, -1 + N - i1, 0, i9, 0, i11, 10] : i1 < N and i5 >= 0 and N - i1 + 96i3 <= i9 <= 95 + N - i1 + 96i3 and i9 < N and 16i5 <= i11 <= 15 + 16i5 and i11 <= 96i3 }
12:	[N] -> { [0, i1, 0, i3, 1, 0, 0, -1 + N - i1, 0, i9, 0, i11, 10] : i1 < N and i3 >= 0 and i9 <= 95 + N - i1 + 96i3 and i9 < N and 96i3 < i11 <= -N + i1 + i9; [0, i1, 0, i3, 1, 0, 0, -1 + N - i1, 0, i9, 1, 0, 12] : i1 < N and i3 >= 0 and N - i1 + 96i3 <= i9 <= 95 + N - i1 + 96i3 and i9 < N }
TILE_VLD_EXT to CodeGen
[N] -> { [0, i1, 0, i3, 0, i5, 0, -1 + N - i1, 0, i9, 0, i11, 10] : i1 < N and i5 >= 0 and N - i1 + 96i3 <= i9 <= 95 + N - i1 + 96i3 and i9 < N and 16i5 <= i11 <= 15 + 16i5 and i11 <= 96i3; [0, i1, 0, i3, 1, 0, 0, -1 + N - i1, 0, i9, 0, i11, 10] : i1 < N and i3 >= 0 and i9 <= 95 + N - i1 + 96i3 and i9 < N and 96i3 < i11 <= -N + i1 + i9; [0, i1, 0, i3, 1, 0, 0, -1 + N - i1, 0, i9, 1, 0, 12] : i1 < N and i3 >= 0 and N - i1 + 96i3 <= i9 <= 95 + N - i1 + 96i3 and i9 < N }
step array (st,loop)
[[-1.  1.  1.]
 [-1.  1.  0.]]
decrementation on 1 loop
RSCHEDULE
[N]->{[i1,i2,i3,i4,i5,i6,i7,i8,i9,i10,i11,i12,i13] -> [i1,i2 + i4,i3,i4,i5,i6,i7,-i8,i9,i10,i11,i12,i13] :  (  (  i1 = 0 and i3 = 0 and i5 = 0 and i7 = 0 and i8 = -1 + N - i2 and i9 = 0 and i11 = 0 and i13 = 10 and i2 < N and i6 >= 0 and N - i2 + 96i4 <= i10 <= 95 + N - i2 + 96i4 and i10 < N and 16i6 <= i12 <= 15 + 16i6 and i12 <= 96i4   ) or  (  i1 = 0 and i3 = 0 and i5 = 1 and i6 = 0 and i7 = 0 and i8 = -1 + N - i2 and i9 = 0 and i11 = 0 and i13 = 10 and i2 < N and i4 >= 0 and i10 <= 95 + N - i2 + 96i4 and i10 < N and 96i4 < i12 <= -N + i2 + i10   ) or  (  i1 = 0 and i3 = 0 and i5 = 1 and i6 = 0 and i7 = 0 and i8 = -1 + N - i2 and i9 = 0 and i11 = 1 and i12 = 0 and i13 = 12 and i2 < N and i4 >= 0 and N - i2 + 96i4 <= i10 <= 95 + N - i2 + 96i4 and i10 < N  )  )   }
VALIDATION CHECKING 
*** VALIDATION OK ***
Parallelism searching
c1 no!
c3 found!
c5 no!
c7 found!
c9 no!
c11 no!
Algorithm: time taken:  1.90744113922 seconds.
for (int c1 = 1; c1 < N + floord(N - 2, 96); c1 += 1)
  for (int c3 = max(0, -N + c1 + 1); c3 <= (c1 - 1) / 97; c3 += 1)
    for (int c4 = 0; c4 <= 1; c4 += 1) {
      if (c4 == 1) {
        for (int c9 = N - c1 + 97 * c3; c9 <= min(N - 1, N - c1 + 97 * c3 + 95); c9 += 1)
          for (int c10 = max(0, N - c1 + 97 * c3 - c9 + 1); c10 <= 1; c10 += 1) {
            if (c10 == 1) {
              (0, c1 - c3, 0, c3, 1, 0, 0, N - c1 + c3 - 1, 0, c9, 1, 0, 12);
            } else
              for (int c11 = 96 * c3 + 1; c11 <= -N + c1 - c3 + c9; c11 += 1)
                (0, c1 - c3, 0, c3, 1, 0, 0, N - c1 + c3 - 1, 0, c9, 0, c11, 10);
          }
      } else
        for (int c5 = 0; c5 <= 6 * c3; c5 += 1)
          for (int c9 = N - c1 + 97 * c3; c9 <= min(N - 1, N - c1 + 97 * c3 + 95); c9 += 1)
            for (int c11 = 16 * c5; c11 <= min(96 * c3, 16 * c5 + 15); c11 += 1)
              (0, c1 - c3, 0, c3, 0, c5, 0, N - c1 + c3 - 1, 0, c9, 0, c11, 10);
    }

Code Generation: time taken:  0.19371008873 seconds


for( c1 = 1; c1 < N + floord(N - 2, 96); c1 += 1)
  #pragma omp parallel for
  for( c3 = max(0, -N + c1 + 1); c3 <= (c1 - 1) / 97; c3 += 1)
    for( c4 = 0; c4 <= 1; c4 += 1) {
      if (c4 == 1) {
        for( c9 = N - c1 + 97 * c3; c9 <= min(N - 1, N - c1 + 97 * c3 + 95); c9 += 1)
          for( c10 = max(0, N - c1 + 97 * c3 - c9 + 1); c10 <= 1; c10 += 1) {
            if (c10 == 1) {
              S[(N-c1+c3-1)][c9] = MAX(S[(N-c1+c3-1)][c9], S[(N-c1+c3-1)+1][c9-1]) + MAX((N-c1+c3-1),c9);
            } else
              for( c11 = 96 * c3 + 1; c11 <= -N + c1 - c3 + c9; c11 += 1)
                S[(N-c1+c3-1)][c9] = MAX(S[(N-c1+c3-1)][c11+(N-c1+c3-1)] + S[c11+(N-c1+c3-1)+1][c9], S[(N-c1+c3-1)][c9]);
          }
      } else
        for( c5 = 0; c5 <= 6 * c3; c5 += 1)
          for( c9 = N - c1 + 97 * c3; c9 <= min(N - 1, N - c1 + 97 * c3 + 95); c9 += 1)
            for( c11 = 16 * c5; c11 <= min(96 * c3, 16 * c5 + 15); c11 += 1)
              S[(N-c1+c3-1)][c9] = MAX(S[(N-c1+c3-1)][c11+(N-c1+c3-1)] + S[c11+(N-c1+c3-1)+1][c9], S[(N-c1+c3-1)][c9]);
    }

Output written to: rna_tiling.c