[CONTACT]

[ABOUT]

[POLICY]

[ADVERTISE]

Vector Operations in VAX Assembler S

Found at: ftp.icm.edu.pl:70/packages/netlib/misc/vax-blas

	#		asblas.s:
	#
	#	Vector Operations in VAX Assembler
	#	Single Precision Version
	#	Loops unrolled 4 times
	#	Compile on UNIX or VMS using the UNIX assembler.
	#		cc -c asblas.s
	#
	#	For documentation, see file toblas.c
	#
	#	Oliver McBryan
	#	New York University
	#
	#
	#
	# 	zero_vector(n,v)
	#	v[i] = 0.
	# assume n multiple of 4
	.text
	.align	1
	.globl	_szv
_szv:
	.word	0xc00
	ashl	$-2,4(ap),r11	# n / 4
	movl	8(ap),r10	# v
	jbr	a2
a1:
	clrf	(r10)+
	clrf	(r10)+
	clrf	(r10)+
	clrf	(r10)+
a2:
	sobgeq	r11,a1
	ret

		
	# vector_equals_scalar(n,v,a)
	#	v[i] = a
	# assume n multiple of 4
	.text
	.align	1
	.globl	_sves
_sves:
	.word	0xc00
	ashl	$-2,4(ap),r11	# n / 4
	movl	8(ap),r10	# v
	cvtdf	12(ap),r0	# a
	jbr	b2
b1:
	movf	r0,(r10)+
	movf	r0,(r10)+
	movf	r0,(r10)+
	movf	r0,(r10)+
b2:
	sobgeq	r11,b1
	ret

		
	# float sum_elements_of_vector(n,v)
	# assume n mulitple of 4
	.text
	.align	1
	.globl	_svsum
_svsum:
	.word	0xc00
	ashl	$-2,4(ap),r11	# n / 4
	movl	8(ap),r10	# v
	clrf	r0
	jbr	c2
c1:
	addf3	(r10)+,(r10)+,r1
	addf2	r1,r0
	addf3	(r10)+,(r10)+,r1
	addf2	r1,r0
c2:
	sobgeq	r11,c1
	cvtfd	r0,r0
	ret

		

		
	# copy_vector_to_vector(n,v1,v2)
	#	v2[i] = v1[i]
	.text
	.align	1
	.globl	_svev
_svev:
	.word	0x0
	ashl	$2,4(ap),r0		# n * 4
	movc3	r0,*8(ap),*12(ap)
	ret

		
	# add_scalar_to_vector(n,a,v)
	#	v[i] = v[i] + a
	# assume n multiple of 4
	.text
	.align	1
	.globl	_svas
_svas:
	.word	0xc00
	ashl	$-2,4(ap),r11	# n / 4
	cvtdf	8(ap),r0	# a
	movl	16(ap),r10	# v
	jbr	d2
	addf3	r0,(r10),(r10)+
	addf3	r0,(r10),(r10)+
	addf3	r0,(r10),(r10)+
	addf3	r0,(r10),(r10)+
	sobgeq	r11,d1
	ret

		
	# multiply_vector_by_scalar(n,v,a)
	#	v[i] *=  a
	# n is a multiple of 4
	.text
	.align	1
	.globl	_svms
_svms:
	.word	0xc00
	ashl	$-2,4(ap),r11	# n / 4
	movl	8(ap),r10	# v
	cvtdf	12(ap),r0	# a
	jbr	e2
e1:
	mulf3	r0,(r10),(r10)+
	mulf3	r0,(r10),(r10)+
	mulf3	r0,(r10),(r10)+
	mulf3	r0,(r10),(r10)+
e2:
	sobgeq	r11,e1
	ret

		
	# vector_equals_scalar_plus_vector(n,v1,a,v2)
	#	v1[i] = v2[i] + a
	# n is a multiple of 4
	.text
	.align	1
	.globl	_svespv
_svespv:
	.word	0xe00
	ashl	$-2,4(ap),r11	# n / 4
	movl	8(ap),r10	# v1
	cvtdf	12(ap),r0	# a
	movl	20(ap),r9	# v2
	jbr	f2
f1:
	addf3	r0,(r9)+,(r10)+
	addf3	r0,(r9)+,(r10)+
	addf3	r0,(r9)+,(r10)+
	addf3	r0,(r9)+,(r10)+
f2:
	sobgeq	r11,f1
	ret

		
	# vector_equals_scalar_times_vector(n,v1,a,v2)
	#	v1[i] = v2[i] * a
	# n is a multiple of 4
	.text
	.align	1
	.globl	_svesmv
_svesmv:
	.word	0xe00
	ashl	$-2,4(ap),r11	# n / 4
	movl	8(ap),r10	# v1
	cvtdf	12(ap),r0	# a
	movl	20(ap),r9	# v2
	jbr	g2
	mulf3	r0,(r9)+,(r10)+
	mulf3	r0,(r9)+,(r10)+
	mulf3	r0,(r9)+,(r10)+
	mulf3	r0,(r9)+,(r10)+
	sobgeq	r11,g1
	ret

		
	# float inner_product(n,v1,v2)
	#	return v1.v2
	# n multiple of 4
	.globl	_svdotv
_svdotv:
	.word	0xe00
	ashl	$-2,4(ap),r11	# n / 4
	movl	8(ap),r10	# v1
	movl	12(ap),r9	# v2
	clrf	r0
	jbr	h2
	mulf3	(r9)+,(r10)+,r1
	addf2	r1,r0
	mulf3	(r9)+,(r10)+,r1
	addf2	r1,r0
	mulf3	(r9)+,(r10)+,r1
	addf2	r1,r0
	mulf3	(r9)+,(r10)+,r1
	addf2	r1,r0
	sobgeq	r11,h1
	cvtfd	r0,r0
	ret

		
	# multiply_vector_by_vector(n,v1,v2)
	#	v1[i] = v1[i]*v2[i]
	# n multiple of 4
	.globl	_svmv
_svmv:
	.word	0xe00
	ashl	$-2,4(ap),r11	# n / 4
	movl	8(ap),r10	# v1
	movl	12(ap),r9	# v2
	jbr	i2
1:
	mulf3	(r9)+,(r10),(r10)+
	mulf3	(r9)+,(r10),(r10)+
	mulf3	(r9)+,(r10),(r10)+
	mulf3	(r9)+,(r10),(r10)+
2:
	sobgeq	r11,i1
	ret

		
	# divide_vector_by_vector(n,v1,v2)
	#	v1[i] = v1[i]/v2[i]
	# n multiple of 4
	.globl	_svdv
_svdv:
	.word	0xe00
	ashl	$-2,4(ap),r11	# n / 4
	movl	8(ap),r10	# v1
	movl	12(ap),r9	# v2
	jbr	j2
	divf3	(r9)+,(r10),(r10)+
	divf3	(r9)+,(r10),(r10)+
	divf3	(r9)+,(r10),(r10)+
	divf3	(r9)+,(r10),(r10)+
	sobgeq	r11,j1
	ret

		
	# subtract_vector_from_vector(n,v1,v2)
	#	v2[i] = v2[i] - v1[i]
	# n multiple of 4
	.globl	_svlv
_svlv:
	.word	0xe00
	ashl	$-2,4(ap),r11	# n / 4
	movl	8(ap),r10	# v1
	movl	12(ap),r9	# v2
	jbr	k2
k1:
	subf3	(r10)+,(r9),(r9)+
	subf3	(r10)+,(r9),(r9)+
	subf3	(r10)+,(r9),(r9)+
	subf3	(r10)+,(r9),(r9)+
k2:
	sobgeq	r11,k1
	ret

		
	# add_scalar_times_vector_to_vector(n,a,v1,v2)
	#	v2[i] = v2[i] + a*v1[i]
	.text
	.align	1
	.globl	_svpsv
_svpsv:
	.word	0xe00
	ashl	$-2,4(ap),r11	# n / 4
	cvtdf	8(ap),r0	# a
	movl	16(ap),r10	# v1
	movl	20(ap),r9	# v2
	jbr	l2
l1:
	mulf3	r0,(r10)+,r1
	addf3	r1,(r9),(r9)+
	mulf3	r0,(r10)+,r1
	addf3	r1,(r9),(r9)+
	mulf3	r0,(r10)+,r1
	addf3	r1,(r9),(r9)+
	mulf3	r0,(r10)+,r1
	addf3	r1,(r9),(r9)+
l2:
	sobgeq	r11,l1
	ret

		
	# vector_equals_vector_minus_vector(n,v1,v2,v3)
	#	v1[i] = v2[i] - v3[i]
	.text
	.align	1
	.globl	_svevlv
_svevlv:
	.word	0xe00
	ashl	$-2,4(ap),r11	# n / 4
	movl	8(ap),r0	# v1
	movl	12(ap),r10	# v2
	movl	16(ap),r9	# v3
	jbr	m2
m1:
	subf3	(r9)+,(r10)+,(r0)+
	subf3	(r9)+,(r10)+,(r0)+
	subf3	(r9)+,(r10)+,(r0)+
	subf3	(r9)+,(r10)+,(r0)+
m2:
	sobgeq	r11,m1
	ret

		
	# add_vector_times_vector_to_vector(n,v1,v2,v3)
	#	v3[i] = v3[i] + v1[i]*v2[i]
	.text
	.align	1
	.globl	_svpvv
_svpvv:
	.word	0xf00
	ashl	$-2,4(ap),r11	# n / 4
	movl	8(ap),r10	# v1
	movl	12(ap),r9	# v2
	movl	16(ap),r8	# v3
	jbr	n2
n1:
	mulf3	(r10)+,(r9)+,r0
	addf3	r0,(r8),(r8)+
	mulf3	(r10)+,(r9)+,r0
	addf3	r0,(r8),(r8)+
	mulf3	(r10)+,(r9)+,r0
	addf3	r0,(r8),(r8)+
	mulf3	(r10)+,(r9)+,r0
	addf3	r0,(r8),(r8)+
n2:
	sobgeq	r11,n1
	ret

		
	# vector_equals_vector_plus_scalar_times_vector(n,v1,v2,a,v3)
	#	v1[i] = v2[i] + a*v3[i]
	.globl	_svevpsv
_svevpsv:
	.word	0xf00
	ashl	$-2,4(ap),r11	# n / 4
	movl	8(ap),r10	# v1
	movl	12(ap),r9	# v2
	cvtdf	16(ap),r0	# a
	movl	24(ap),r8	# v3
	jbr	o2
o1:
	mulf3	(r8)+,r0,r1
	addf3	r1,(r9)+,(r10)+
	mulf3	(r8)+,r0,r1
	addf3	r1,(r9)+,(r10)+
	mulf3	(r8)+,r0,r1
	addf3	r1,(r9)+,(r10)+
	mulf3	(r8)+,r0,r1
	addf3	r1,(r9)+,(r10)+
o2:
	sobgeq	r11,o1
	ret

		

		
.

NEW PAGES:

[ODDNUGGET]

[GOPHER]