1016 lines
34 KiB
C
Raw Permalink Normal View History

2020-04-22 12:56:21 -04:00
/*
File: vectorOps.h
Contains: vector and matrix functions for AltiVec
Version: QuickTime 7.3
Copyright: (c) 2007 (c) 1999-2001 by Apple Computer, Inc., all rights reserved.
Bugs?: For bug reports, consult the following page on
the World Wide Web:
http://developer.apple.com/bugreporter/
*/
#ifndef __VECTOROPS__
#define __VECTOROPS__
#ifndef __CONDITIONALMACROS__
#include <ConditionalMacros.h>
#endif
#if PRAGMA_ONCE
#pragma once
#endif
#ifdef __cplusplus
extern "C" {
#endif
#if PRAGMA_IMPORT
#pragma import on
#endif
/*
-------------------------------------------------------------------------------------
This section is a collection of Basic Linear Algebra Subprograms (BLAS), which
use AltiVec technology for their implementations. The functions are grouped into
three categories (called levels), as follows:
1) Vector-scalar linear algebra subprograms
2) Matrix-vector linear algebra subprograms
3) Matrix operations
Following is a list of subprograms and a short description of each one.
-------------------------------------------------------------------------------------
*/
#ifdef __VEC__
/*
-------------------------------------------------------------------------------------
Level 1
-------------------------------------------------------------------------------------
*/
/**************************************************
vIsamax finds the position of the first vector
element having the largest magnitude.
count length of vector x (count is a
multiple of 4)
x array of floats
**************************************************/
/*
* vIsamax()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( long )
vIsamax(
long count,
const vector float x[]);
/**************************************************
vIsamin finds the position of the first vector
element having minimum absolute value.
count length of vector x (count is a
multiple of 4)
x array of floats
**************************************************/
/*
* vIsamin()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( long )
vIsamin(
long count,
const vector float x[]);
/**************************************************
vIsmax finds the position of the first vector
element having maximum value.
count length of vector x (count is a
multiple of 4)
x array of floats
**************************************************/
/*
* vIsmax()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( long )
vIsmax(
long count,
const vector float x[]);
/**************************************************
vIsmin finds the position of the first vector
element having minimum value.
count length of vector x (count is a
multiple of 4)
x array of floats
**************************************************/
/*
* vIsmin()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( long )
vIsmin(
long count,
const vector float x[]);
/**************************************************
vSasum finds the sum of the magnitudes of the
elements in a vector.
count length of vector x (count is a
multiple of 4)
x array of floats
**************************************************/
/*
* vSasum()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( float )
vSasum(
long count,
const vector float x[]);
/**************************************************
vSsum is the vector version of sasum but without
the absolute value. It takes the value of each
element of the array and adds them together.
multiple of 4)
x array of floats
**************************************************/
/*
* vSsum()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( float )
vSsum(
long count,
const vector float x[]);
/**************************************************
vSaxpy multiplies a vector x, by a scalar and
adds it to a vector y and stores the result in y
n number of floats in x (n is a
multiple of 4)
alpha scalar number is single-precision
floating-point
x array of vector floats
y array of vector floats, where the
the result is stored
**************************************************/
/*
* vSaxpy()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSaxpy(
long n,
float alpha,
const vector float x[],
vector float y[]);
/*************************************************************
vScopy copies a vector x, into another vector y.
n mumber of floats in x and y (n is a
multiple of 4)
x array of vector floats
y array of vector floats
*************************************************************/
/*
* vScopy()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vScopy(
long n,
const vector float x[],
vector float y[]);
/*************************************************************
vSdot finds the dot product of two vectors.
n mumber of floats in x and y (n is a
multiple of 4)
x array of vector floats
y array of vector floats
*************************************************************/
/*
* vSdot()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( float )
vSdot(
long n,
const vector float x[],
const vector float y[]);
/*************************************************************
vSnaxpy computes saxpy "n" times.
n number of saxpyV computations to be
performed and the number of elements
in vector A (n is a multiple of 4)
m number of floats in each vector x(i)
or y(i)
a array of vector floats containing
scalars a(i)
x matrix containing arrays of vector-
floats x(i)
y matrix containing vectors y(i)
*************************************************************/
/*
* vSnaxpy()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSnaxpy(
long n,
long m,
const vector float a[],
const vector float x[],
vector float y[]);
/*************************************************************
vSndot computes the dot products "n" times.
n number of dot product computations
to be performed and the number of
elements in vector S
m number of elements in vectors x(i)
and y(i) for each dot product
computation (m is a multiple of 4)
s array of floats. Depending on the
value of "isw" different computations/
are performed and the results are
stored in the array S
isw indicates the type of computation
to perform.
if isw=1, S(i) <-- x(i) y(i)
if isw=2, S(i) <-- - x(i) y(i)
if isw=3, S(i) <-- S(i) + x(i) y(i)/
if isw=4, S(i) <-- S(i) - x(i) y(i)/
x matrix containing arrays x(i)
y matrix containing arrays y(i)
*************************************************************/
/*
* vSndot()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSndot(
long n,
long m,
float s[],
long isw,
const vector float x[],
const vector float y[]);
/*************************************************************
vSnrm2 finds the Euclidean length of a vector
with scaling of input to avoid destructive
underflow and overflow.
count length of vector (multiple of 4)
x array of vector floats
*************************************************************/
/*
* vSnrm2()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( float )
vSnrm2(
long count,
const vector float x[]);
/*************************************************************
vSnorm2 finds the Euclidean length of a vector
with no scaling of input.
count length of vector (multiple of 4)
x array of vector floats
*************************************************************/
/*
* vSnorm2()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( float )
vSnorm2(
long count,
const vector float x[]);
/*************************************************************
vSrot applies a plane rotation.
n number of points to be rotated, also
number of elements in x and y (n is
a multiple of 4)
x array of vector floats. It is a
vector of length n, containing x(i)
coordinates of points to be rotated
y array of vector floats. It is a
vector of length n, containing y(i)
coordinates of points to be rotated
c cosine of angle of rotation
s sine of angle of rotation
*************************************************************/
/*
* vSrot()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSrot(
long n,
vector float x[],
vector float y[],
float c,
float s);
/*************************************************************
vSscal multiplies a vector x, by a scalar and
stores the result in the vector x.
n number of floats in x (n is a
multiple of 4)
alpha scalar number is single-precision
floating-point
x array of vector floats
*************************************************************/
/*
* vSscal()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSscal(
long n,
float alpha,
vector float x[]);
/*************************************************************
vSswap interchanges the elements of vectors x
and y
n number of floats in x and y (n is a
multiple of 4)
x array of vector floats
y array of vector floats
*************************************************************/
/*
* vSswap()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSswap(
long n,
vector float x[],
vector float y[]);
/*************************************************************
vSyax multiplies a vector x, by a scalar and
stores the result in a vector y.
n number of floats in x (n is a
multiple of 4)
alpha scalar number is single-precision
floating-point
x array of vector floats
y array of vector floats
*************************************************************/
/*
* vSyax()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSyax(
long n,
float alpha,
const vector float x[],
vector float y[]);
/*************************************************************
vSzaxpy multiplies a vector x, by a scalar and
adds it to a vector y and stores the result in
vector Z.
n number of floats in x (n is a
multiple of 4)
alpha scalar number is single-precision
floating-point
x array of vector floats
y array of vector floats
Z array of vector floats, where the
is stored
*************************************************************/
/*
* vSzaxpy()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSzaxpy(
long n,
float alpha,
const vector float x[],
const vector float yY[],
vector float z[]);
/*
-------------------------------------------------------------------------------------
Level 2
-------------------------------------------------------------------------------------
*/
/*************************************************************
vSgemv multiplies an array of vector floats y by
a scalar beta, and takes the result and adds it
to the product of a scalar alpha multiplied by
a matrix A multiplied by a vector x. The above
result is stored in array y. Futhermore, the
same function also performs the above calculation/
with the transpose of matrix A, instead of
matrix A. In this function argument "forma"
distinguishes between the above two cases.
forma indicates the form of matrix A to
use in the computation, where:
If forma = "n", Matrix A is used
If forma = "T", Transpose of Matrix
A is used
m number of rows in matrix A and
depending on value of forma
if forma = "n", it is the length of
vector y
if forma = "T", it is the length of
vector x. m is a multiple of 4
n number of columns in matrix A and
depending on value of forma
if forma = "n", it is the length of
vector x
if forma = "T", it is the length of
vector y. m is a multiple of 4
alpha is a scaling constant
A is an m by n matrix. Its elements
are vector floats
x is an array of vector floats
beta is a scaling constant
y is an array of vector floats
*************************************************************/
/*
* vSgemv()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSgemv(
char forma,
long m,
long n,
float alpha,
const vector float a[],
const vector float x[],
float beta,
vector float y[]);
/*************************************************************
vSgemx adds an array of vector floats y to the
product of an scalar alpha by a mtrix A
multiplied by an array of vector floats x. It
then stores the result in the vector y.
m number of rows in matrix A and
the length of vector y. m is a
multiple of 4
n number of columns in matrix A and
the length of vector x. m is a
multiple of 4
alpha is a scaling constant
a is an m by n matrix. Its elements
are vector floats
x is an array of vector floats
y is an array of vector floats
*************************************************************/
/*
* vSgemx()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSgemx(
long m,
long n,
float alpha,
const vector float a[],
const vector float x[],
vector float y[]);
/*************************************************************
vSgemtx takes the transpose of a mtrix A and
multiplies it by an array x. It then multiplies
the result by a scalar alpha. Finally adds the
above result to an array y and stores the result
in array y.
m number of rows in matrix A and
the length of vector x. m is a
multiple of 4
n number of columns in matrix A and
the length of vector y. m is a
multiple of 4
alpha is a scaling constant
a is an m by n matrix. Its elements
are vector floats
x is an array of vector floats
y is an array of vector floats
*************************************************************/
/*
* vSgemtx()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSgemtx(
long m,
long n,
float alpha,
const vector float a[],
const vector float x[],
vector float y[]);
/*
-------------------------------------------------------------------------------------
Level 3
-------------------------------------------------------------------------------------
*/
/*************************************************************
vSgeadd performs matrix addition for general
matrices or their transposes.
height height of the matrix (it is multiple
of 4)
width width of the matrix (it is multiple
of 4)
A matrix A, and depending on forma:
if forma='n', A is used in the
computation, and A has m rows and
n columns
if forma='T', A(T) is used in the
computation, and A has n rows and
m columns
forma indicates the form of matrix A to
use in the computation, where:
if forma='n', A is used in the
computation
if forma='T', A(T) is used in the
computation
b matrix b, and depending on formb:
if formb='n', b is used in the
computation, and b has m rows and
n columns
if formb='T', b(T) is used in the
computation, and b has n rows and
m columns
formb indicates the form of matrix b to
use in the computation, where:
if forma='n', b is used in the
computation
if forma='T', b(T) is used in the
computation
c is an m by n matrix c, containing
the reults of the computation
*************************************************************/
/*
* vSgeadd()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSgeadd(
long height,
long width,
const vector float a[],
char forma,
const vector float b[],
char formb,
vector float c[]);
/*************************************************************
vSgesub performs matrix subtraction for general
matrices or their transposes.
height height of the matrix (it is multiple
of 4)
width width of the matrix (it is multiple
of 4)
A matrix A, and depending on forma:
if forma='n', A is used in the
computation, and A has m rows and
n columns
if forma='T', A(T) is used in the
computation, and A has n rows and
m columns
forma indicates the form of matrix A to
use in the computation, where:
if forma='n', A is used in the
computation
if forma='T', A(T) is used in the
computation
b matrix b, and depending on formb:
if formb='n', b is used in the
computation, and b has m rows and
n columns
if formb='T', b(T) is used in the
computation, and b has n rows and
m columns
formb indicates the form of matrix b to
use in the computation, where:
if forma='n', b is used in the
computation
if forma='T', b(T) is used in the
computation
c is an m by n matrix c, containing
the reults of the computation
*************************************************************/
/*
* vSgesub()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSgesub(
long height,
long width,
const vector float a[],
char forma,
const vector float b[],
char formb,
vector float c[]);
/*************************************************************
vSgemul performs matrix multiplication for
general matrices or their transposes.
l height of the matrix A (it is
multiple of 4)
m width of matrix A (it is multiple
of 4)
n width of matrix b (it is multiple
of 4)
A matrix A, and depending on forma:
if forma='n', A is used in the
computation, and A has l rows and
m columns
if forma='T', A(T) is used in the
computation, and A has m rows and
l columns
forma indicates the form of matrix A to
use in the computation, where:
if forma='n', A is used in the
computation
if forma='T', A(T) is used in the
computation
b matrix b, and depending on formb:
if formb='n', b is used in the
computation, and b has m rows and
n columns
if formb='T', b(T) is used in the
computation, and b has n rows and
m columns
formb indicates the form of matrix b to
use in the computation, where:
if forma='n', b is used in the
computation
if forma='T', b(T) is used in the
computation
matrix is the matrix containing the
results of the computation
*************************************************************/
/*
* vSgemul()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSgemul(
long l,
long m,
long n,
const vector float a[],
char forma,
const vector float b[],
char formb,
vector float matrix[]);
/*************************************************************
vSgemm performs combined matrix multiplication
and addition for general matrices or their transposes.
l number of rows in matrix c (it is
multiple of 4)
m has the following meaning:
if forma='n', it is the number of
columns in matrix A
if forma='T', it is the number of
rows in matrix A. In addition
if formb='n', it is the number of
rows in matrix b
if formb='T', it is the number of
columns in matrix b
n columns in matrix c
A matrix A, and depending on forma:
if forma='n', A is used in the
computation, and A has l rows and
m columns
if forma='T', A(T) is used in the
computation, and A has m rows and
l columns
forma indicates the form of matrix A to
use in the computation, where:
if forma='n', A is used in the
computation
if forma='T', A(T) is used in the
computation
b matrix b, and depending on formb:
if formb='n', b is used in the
computation, and b has m rows and
n columns
if formb='T', b(T) is used in the
computation, and b has n rows and
m columns
formb indicates the form of matrix b to
use in the computation, where:
if forma='n', b is used in the
computation
if forma='T', b(T) is used in the
computation
alpha is a scalar
beta is a scalar
matrix is the l by n matrix
*************************************************************/
/*
* vSgemm()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSgemm(
long l,
long m,
long n,
const vector float a[],
char forma,
const vector float b[],
char formb,
vector float c[],
float alpha,
float beta,
vector float matrix[]);
/*************************************************************
vSgetmi performs general matrix transpose (in place).
size is the number of rows and columns
in matrix x
*************************************************************/
/*
* vSgetmi()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSgetmi(
long size,
vector float x[]);
/*************************************************************
vSgetmo performs general matrix transpose (out-of-place).
height is the height of the matrix
width is the width of the matrix
x array of vector floats
y array of vector floats
*************************************************************/
/*
* vSgetmo()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSgetmo(
long height,
long width,
const vector float x[],
vector float y[]);
/*************************************************************
vSgevv is a new function. It takes matrix A and
multiplies it by matrix b and puts the result in
matrix m.
l is the height of the matrix
n is the width of the matrix
A array of vector floats of at least
l * m in length
b array of vector floats of at least
m * n in length
m array of vector floats, containing
the results of multiplication. It
is m * n in size
*************************************************************/
/*
* vSgevv()
*
* Availability:
* Non-Carbon CFM: in vecLib 1.0 and later
* CarbonLib: not in Carbon, but vecLib is compatible with CarbonLib
* Mac OS X: in version 10.0 and later
*/
EXTERN_API_C( void )
vSgevv(
long l,
long n,
const vector float a[],
const vector float b[],
vector float m[]);
#endif /* defined(__VEC__) */
#ifdef PRAGMA_IMPORT_OFF
#pragma import off
#elif PRAGMA_IMPORT
#pragma import reset
#endif
#ifdef __cplusplus
}
#endif
#endif /* __VECTOROPS__ */