/src/pgesv/HPL_plindx1.c - HPL sur GPU - Forge du Centre Blaise Pascal

root / src / pgesv / HPL_plindx1.c

Historique | Voir | Annoter | Télécharger (11,39 ko)

       /*
        * -- High Performance Computing Linpack Benchmark (HPL)
        *    HPL - 2.0 - September 10, 2008
        *    Antoine P. Petitet
        *    University of Tennessee, Knoxville
        *    Innovative Computing Laboratory
        *    (C) Copyright 2000-2008 All Rights Reserved
+       *
        * -- Copyright notice and Licensing terms:
+       *
        * Redistribution  and  use in  source and binary forms, with or without
        * modification, are  permitted provided  that the following  conditions
        * are met:
+       *
        * 1. Redistributions  of  source  code  must retain the above copyright
        * notice, this list of conditions and the following disclaimer.
+       *
        * 2. Redistributions in binary form must reproduce  the above copyright
        * notice, this list of conditions,  and the following disclaimer in the
        * documentation and/or other materials provided with the distribution.
+       *
        * 3. All  advertising  materials  mentioning  features  or  use of this
        * software must display the following acknowledgement:
        * This  product  includes  software  developed  at  the  University  of
        * Tennessee, Knoxville, Innovative Computing Laboratory.
+       *
        * 4. The name of the  University,  the name of the  Laboratory,  or the
        * names  of  its  contributors  may  not  be used to endorse or promote
        * products  derived   from   this  software  without  specific  written
        * permission.
+       *
        * -- Disclaimer:
+       *
        * THIS  SOFTWARE  IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
        * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES,  INCLUDING,  BUT NOT
        * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
        * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY
        * OR  CONTRIBUTORS  BE  LIABLE FOR ANY  DIRECT,  INDIRECT,  INCIDENTAL,
        * SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL DAMAGES  (INCLUDING,  BUT NOT
        * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
        * DATA OR PROFITS; OR BUSINESS INTERRUPTION)  HOWEVER CAUSED AND ON ANY
        * THEORY OF LIABILITY, WHETHER IN CONTRACT,  STRICT LIABILITY,  OR TORT
        * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
        * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
        * ---------------------------------------------------------------------
        */
       /*
        * Include files
        */
       #include "hpl.h"
       #ifdef STDC_HEADERS
       void HPL_plindx1
+      (
          HPL_T_panel *                    PANEL,
          const int                        K,
          const int *                      IPID,
          int *                            IPA,
          int *                            LINDXA,
          int *                            LINDXAU,
          int *                            IPLEN,
          int *                            IPMAP,
          int *                            IPMAPM1,
          int *                            PERMU,
          int *                            IWORK
+      )
       #else
       void HPL_plindx1
       ( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK )
          HPL_T_panel *                    PANEL;
          const int                        K;
          const int *                      IPID;
          int *                            IPA;
          int *                            LINDXA;
          int *                            LINDXAU;
          int *                            IPLEN;
          int *                            IPMAP;
          int *                            IPMAPM1;
          int *                            PERMU;
          int *                            IWORK;
       #endif
+      {
       /*
        * Purpose
        * =======
+       *
        * HPL_plindx1 computes two local arrays  LINDXA and  LINDXAU  containing
        * the  local  source and final destination position  resulting from the
        * application of row interchanges.  In addition, this function computes
        * three arrays IPLEN, IPMAP and IPMAPM1  that contain  the  logarithmic
        * mapping information for the spreading phase.
+       *
        * Arguments
        * =========
+       *
        * PANEL   (local input/output)          HPL_T_panel *
        *         On entry,  PANEL  points to the data structure containing the
        *         panel information.
+       *
        * K       (global input)                const int
        *         On entry, K specifies the number of entries in IPID.  K is at
        *         least 2*N, and at most 4*N.
+       *
        * IPID    (global input)                const int *
        *         On entry,  IPID  is an array of length K. The first K entries
        *         of that array contain the src and final destination resulting
        *         from the application of the interchanges.
+       *
        * IPA     (global output)               int *
        *         On exit,  IPA  specifies  the number of rows that the current
        *         process row has that either belong to U  or should be swapped
        *         with remote rows of A.
+       *
        * LINDXA  (global output)               int *
        *         On entry, LINDXA  is an array of dimension 2*N. On exit, this
        *         array contains the local indexes of the rows of A I have that
        *         should be copied into U.
+       *
        * LINDXAU (global output)               int *
        *         On exit, LINDXAU  is an array of dimension 2*N. On exit, this
        *         array contains  the local destination  information encoded as
        *         follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
        *         copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
        *         of A should be locally copied into A(-LINDXAU(k),:).
+       *
        * IPLEN   (global output)               int *
        *         On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
        *         this array is such that  IPLEN[i]  is the number of rows of A
        *         in  the  processes  before  process  IPMAP[i]  after the sort
        *         with the convention that IPLEN[nprow]  is the total number of
        *         rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
        *         local number of rows of A that should be moved to the process
        *         IPMAP[i]. IPLEN is such that the number of rows of the source
        *         process  row can be computed as  IPLEN[1] - IPLEN[0], and the
        *         remaining  entries  of  this  array  are  sorted  so that the
        *         quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
+       *
        * IPMAP   (global output)               int *
        *         On entry, IPMAP is an array of dimension NPROW. On exit, this
        *         array contains  the logarithmic mapping of the processes.  In
        *         other words, IPMAP[myrow] is the corresponding sorted process
        *         coordinate.
+       *
        * IPMAPM1 (global output)               int *
        *         On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
        *         this  array  contains  the inverse of the logarithmic mapping
        *         contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
        *         [0.. NPROCS)
+       *
        * PERMU   (global output)               int *
        *         On entry,  PERMU  is an array of dimension JB. On exit, PERMU
        *         contains  a sequence of permutations,  that should be applied
        *         in increasing order to permute in place the row panel U.
+       *
        * IWORK   (workspace)                   int *
        *         On entry, IWORK is a workarray of dimension 2*JB.
+       *
        * ---------------------------------------------------------------------
        */
       /*
        * .. Local Variables ..
        */
          int                        * iwork;
          int                        dst, dstrow, fndd, i, ia, icurrow, il,
                                     ip, ipU, iroff, j, jb, myrow, nb, nprow,
                                     src, srcrow;
       /* ..
        * .. Executable Statements ..
        */
       /*
        * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1
        */
          HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 );
       /*
        * Compute the local arrays  LINDXA  and  LINDXAU  containing  the local
        * source and final destination position resulting from  the application
        * of N interchanges. Compute LINDXA and LINDXAU in icurrow,  and LINDXA
        * elsewhere and PERMU in every process.
        */
          myrow = PANEL->grid->myrow; nprow   = PANEL->grid->nprow;
          jb    = PANEL->jb;          nb      = PANEL->nb;     ia = PANEL->ia;
          iroff = PANEL->ii;          icurrow = PANEL->prow;
          iwork = IWORK + jb;
          if( myrow == icurrow )
+         {
             for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
+            {
                src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
                if( srcrow == icurrow )
+               {
                   dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
                   Mindxg2l( il, src, nb, nb, myrow, 0, nprow );
                   LINDXA[ip] = il - iroff;
                   if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
+                  {
                      PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
                      j          = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j;
                      IPLEN[il]++; ipU++;
+                  }
                   else if( dstrow != icurrow )
+                  {
                      j = 0;
                      do { fndd = ( dst == IPID[j] ); j+=2; }
                      while( !fndd && ( j < K ) );
                      PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow];
                      j          = IPLEN[il];    iwork[ipU] = LINDXAU[ip] = j;
                      IPLEN[il]++; ipU++;
+                  }
                   else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) )
+                  {
                      Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
                      LINDXAU[ip] = iroff - il;
+                  }
                   ip++;
+               }
+            }
             *IPA = ip;
+         }
          else
+         {
             for( i = 0, ip = 0, ipU = 0; i < K; i += 2 )
+            {
                src = IPID[i  ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow );
                dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow );
       /*
        * LINDXA[i] is the local index of the row of A that belongs into U
        */
                if( myrow == dstrow )
+               {
                   Mindxg2l( il, dst, nb, nb, myrow, 0, nprow );
                   LINDXA[ip] = il - iroff; ip++;
+               }
       /*
        * iwork[i] is the local (current) position  index in U
        * PERMU[i] is the local (final) destination index in U
        */
                if( srcrow == icurrow )
+               {
                   if( ( dstrow == icurrow ) && ( dst - ia < jb ) )
+                  {
                      PERMU[ipU] = dst - ia;  il = IPMAPM1[dstrow];
                      iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
+                  }
                   else if( dstrow != icurrow )
+                  {
                      j = 0;
                      do { fndd = ( dst == IPID[j] ); j+=2; }
                      while( !fndd && ( j < K ) );
                      PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow];
                      iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++;
+                  }
+               }
+            }
             *IPA = 0;
+         }
       /*
        * Simplify iwork and PERMU, return in PERMU the sequence of permutation
        * that need to be apply to U after it has been broadcast.
        */
          HPL_perm( jb, iwork, PERMU, IWORK );
       /*
        * Reset IPLEN to its correct value
        */
          for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1];
          IPLEN[0] = 0;
       /*
        * End of HPL_plindx1
        */
+      }

Centre Blaise Pascal » HPL sur GPU

root / src / pgesv / HPL_plindx1.c