forked from pcsx-redux/nugget
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgte-kernels.hh
367 lines (321 loc) · 13.7 KB
/
gte-kernels.hh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
/*
MIT License
Copyright (c) 2023 PCSX-Redux authors
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#pragma once
#include <stdint.h>
namespace psyqo {
namespace GTE {
/**
* @brief The GTE math kernels.
*
* @details This namespace contains all of the PS1 GTE math kernels.
* They are not necessarily meant to be used directly, but they are
* still exposed publicly as they may be useful to some. Their usage
* is delicate, as the compiler will not be able to understand the
* interlocking nature of the GTE, and thus will not be able to add
* the necessary hazard stalls. This means that the programmer must
* be careful to add the necessary stalls themselves.
*/
namespace Kernels {
// Shift factor: Unsigned (no change) or Shifted (>> 12)
enum SF : unsigned { Unshifted, Shifted };
// Low limit: Unlimited (-2^15) or Limited (0)
enum LM : unsigned { Unlimited, Limited };
// Coordinate and Perspective Transformation
// RTPS - Perspective Transformation (single)
// pers(([rt]·[v0]) >> 12 + [tr]) -> sxy2
// 14 cycles
static inline void rtps() { asm volatile("cop2 0x0180001"); }
// RTPT - Perspective Transformation (triple)
// pers(([rt]·[v0]) >> 12 + [tr]) -> sxy0
// pers(([rt]·[v1]) >> 12 + [tr]) -> sxy1
// pers(([rt]·[v2]) >> 12 + [tr]) -> sxy2
// 22 cycles
static inline void rtpt() { asm volatile("cop2 0x0280030"); }
// Depth Queuing
// DCPL - Depth Cue Color light
// (1 - dp)·[rgb·sv] + dp·[fc] -> rgb, lv, sv
// 8 cycles
static inline void dpcl() { asm volatile("cop2 0x0680029"); }
// DPCS - Depth Cueing (single)
// (1 - dp)·[rgb] + dp·[fc] -> rgb, lv, sv
// 8 cycles
static inline void dpcs() { asm volatile("cop2 0x0780010"); }
// DPCT - Depth Cueing (triple)
// (1 - dp)·[rgb0] + dp·[fc] -> rgb0, lv, sv
// (1 - dp)·[rgb1] + dp·[fc] -> rgb1, lv, sv
// (1 - dp)·[rgb2] + dp·[fc] -> rgb2, lv, sv
// 17 cycles
static inline void dpct() { asm volatile("cop2 0x0f8002a"); }
// INTPL - Interpolation of a vector and far color
// (1 - dp)·[sv] + dp·[fc] -> rgb2, lv, sv
// 8 cycles
static inline void intpl() { asm volatile("cop2 0x0980011"); }
// Termwise Vector Square
// [sv.x² >> 12, sv.y² >> 12, sv.z² >> 12] -> lv, sv
// 5 cycles
template <SF sf = Shifted>
static inline void sqr() {
if constexpr (sf == Shifted) {
asm volatile("cop2 0x0a80428");
} else {
asm volatile("cop2 0x0a00428");
}
}
// Light Source Calculations
// NCS - Normal color (single)
// limit(([ll]·[v0]) >> 12) -> sv
// limit(([lc]·[sv]) >> 12) + [bk] -> rgb2
// 14 cycles
static inline void ncs() { asm volatile("cop2 0x0c8041e"); }
// NCT - Normal color (triple)
// limit(([ll]·[v0]) >> 12) -> sv
// limit(([lc]·[sv]) >> 12) + [bk] -> rgb0
// limit(([ll]·[v1]) >> 12) -> sv
// limit(([lc]·[sv]) >> 12) + [bk] -> rgb1
// limit(([ll]·[v2]) >> 12) -> sv
// limit(([lc]·[sv]) >> 12) + [bk] -> rgb2
// 30 cycles
static inline void nct() { asm volatile("cop2 0x0d80420"); }
// NCDS - Normal color depth cue (single vector)
// limit(([ll]·[v0]) >> 12) -> sv
// limit(([lc]·[sv]) >> 12) + [bk] -> sv
// (1 - dp)·[rgb·sv] + dp·[fc] -> rgb2
// 19 cycles
static inline void ncds() { asm volatile("cop2 0x0e80413"); }
// NCDT - Normal color depth cue (triple vectors)
// limit(([ll]·[v0]) >> 12) -> sv
// limit(([lc]·[sv]) >> 12) + [bk] -> sv
// (1 - dp)·[rgb·sv] + dp·[fc] -> rgb0
// limit(([ll]·[v1]) >> 12) -> sv
// limit(([lc]·[sv]) >> 12) + [bk] -> sv
// (1 - dp)·[rgb·sv] + dp·[fc] -> rgb1
// limit(([ll]·[v2]) >> 12) -> sv
// limit(([lc]·[sv]) >> 12) + [bk] -> sv
// (1 - dp)·[rgb·sv] + dp·[fc] -> rgb2
// 44 cycles
static inline void ncdt() { asm volatile("cop2 0x0f80416"); }
// NCCS - Normal Color Color (single vector)
// limit(([ll]·[v0]) >> 12) -> sv
// limit(([lc]·[sv]) >> 12) + [bk] -> sv
// [rgb·sv] -> rgb2
// 17 cycles
static inline void nccs() { asm volatile("cop2 0x0108041b"); }
// NCCT - Normal Color Color (triple vector)
// limit(([ll]·[v0]) >> 12) -> sv
// limit(([lc]·[sv]) >> 12) + [bk] -> sv
// [rgb·sv] -> rgb0
// limit(([ll]·[v1]) >> 12) -> sv
// limit(([lc]·[sv]) >> 12) + [bk] -> sv
// [rgb·sv] -> rgb1
// limit(([ll]·[v2]) >> 12) -> sv
// limit(([lc]·[sv]) >> 12) + [bk] -> sv
// [rgb·sv] -> rgb2
// 39 cycles
static inline void ncct() { asm volatile("cop2 0x0118043f"); }
// Color Depth Que
// limit(([lc]·[sv]) >> 12) + [bk] -> sv
// (1 - dp)·[rgb·sv] + dp·[fc] -> rgb2
// 13 cycles
static inline void cdp() { asm volatile("cop2 0x01280414"); }
// Color Color
// limit(([lc]·[sv]) >> 12) + [bk] -> sv
// [rgb·sv] -> rgb2
// 11 cycles
static inline void cc() { asm volatile("cop2 0x0138041c"); }
// NCLIP - Normal clipping
// sx0*sy1 + sx1*sy2 + sx2*sy0 - sx0*sy2 - sx1*sy0 - sx2*sy1 -> opz
// aka determinant of the matrix
// [sx1 - sx0, sy1 - sy0]
// [sx2 - sx0, sy2 - sy0]
// 8 cycles
static inline void nclip() { asm volatile("cop2 0x01400006"); }
// Z Average
// AVSZ3 - Average of three Z values (for Triangles)
// zsf3 * (sz0 + sz1 + sz2) -> otz
// 5 cycles
static inline void avsz3() { asm volatile("cop2 0x0158002d"); }
// AVSZ4 - Average of four Z values (for Quads)
// zsf4 * (sz0 + sz1 + sz2 + sz4) -> otz
// 6 cycles
static inline void avsz4() { asm volatile("cop2 0x0168002e"); }
// Cross Product (improperly named Outer Product in Sony's lingo)
// rt.22 * ir3 - rt.33 * ir2 -> ir1
// rt.33 * ir1 - rt.11 * ir3 -> ir2
// rt.11 * ir2 - rt.22 * ir1 -> ir3
// 6 cycles
template <SF sf = Shifted>
static inline void cp() {
if constexpr (sf == Shifted) {
asm volatile("cop2 0x0178000c");
} else {
asm volatile("cop2 0x0170000c");
}
}
// General Interpolation
// General purpose interpolation
// dp·[sv] -> lv, sv
// 5 cycles
template <SF sf = Shifted>
static inline void gpf() {
if constexpr (sf == Shifted) {
asm volatile("cop2 0x0198003d");
} else {
asm volatile("cop2 0x0190003d");
}
}
// General purpose interpolation with base
// [lv] + dp·[sv] -> lv, sv
// 5 cycles
template <SF sf = Shifted>
static inline void gpl() {
if constexpr (sf == Shifted) {
asm volatile("cop2 0x01a8003e");
} else {
asm volatile("cop2 0x01a0003e");
}
}
// All of the MVMVA operations take 8 cycles to complete.
// The MVMVA operation is the basis for the matrix math operations.
// The functions defined right underneath are simply aliases. They
// are provided for convenience, as programmers may know them from
// the original PS1 SDK documentation, but using the MVMVA operation
// directly may actually be more readable.
// Multiplication Matrix: Rotation, Light Source Direction, Light Source Color
enum class MX : unsigned { RT, LL, LC };
// Multiplication Vector
enum class MV : unsigned { V0, V1, V2, IR };
// Translation Vector: Translation, Back Color, Front Color, Zero
enum class TV : unsigned { TR, BK, FC, Zero };
// Multiply vector by matrix and add vector
template <MX mx, MV v, TV cv = TV::Zero, SF sf = Shifted, LM lm = Unlimited>
void mvmva() {
constexpr uint32_t op =
(4 << 20) | (sf << 19) | (uint32_t(mx) << 17) | (uint32_t(v) << 15) | (uint32_t(cv) << 13) | (lm << 10) | 18;
asm volatile("cop2 %0" : : "i"(op));
}
// Coordinate Conversion, Light Source Calculations
// ([rt]·[v0]) >> 12 + [tr] -> lv, sv
static inline void rt() { mvmva<MX::RT, MV::V0, TV::TR>(); }
// limit(([ll]·[v0]) >> 12) -> lv, sv
static inline void ll() { mvmva<MX::LL, MV::V0, TV::Zero, SF::Shifted, LM::Limited>(); }
// limit(([lc]·[sv]) >> 12) + [bk] -> lv, sv
static inline void lc() { mvmva<MX::LC, MV::IR, TV::BK, SF::Shifted, LM::Limited>(); }
// [rt]·[sv] -> lv
static inline void rtir_sf0() { mvmva<MX::RT, MV::IR, TV::Zero, SF::Unshifted>(); }
// General Matrix Operations
// ([rt]·[v0]) >> 12 -> lv, sv
static inline void rtv0() { mvmva<MX::RT, MV::V0, TV::Zero>(); }
// ([rt]·[v1]) >> 12 -> lv, sv
static inline void rtv1() { mvmva<MX::RT, MV::V1, TV::Zero>(); }
// ([rt]·[v2]) >> 12 -> lv, sv
static inline void rtv2() { mvmva<MX::RT, MV::V2, TV::Zero>(); }
// ([rt]·[sv]) >> 12 -> lv, sv
static inline void rtir() { mvmva<MX::RT, MV::IR, TV::Zero>(); }
// ([rt]·[v0]) >> 12 + [tr] -> lv, sv
static inline void rtv0tr() { mvmva<MX::RT, MV::V0, TV::TR>(); }
// ([rt]·[v1]) >> 12 + [tr] -> lv, sv
static inline void rtv1tr() { mvmva<MX::RT, MV::V1, TV::TR>(); }
// ([rt]·[v2]) >> 12 + [tr] -> lv, sv
static inline void rtv2tr() { mvmva<MX::RT, MV::V2, TV::TR>(); }
// ([rt]·[sv]) >> 12 + [tr] -> lv, sv
static inline void rtirtr() { mvmva<MX::RT, MV::IR, TV::TR>(); }
// ([rt]·[v0]) >> 12 + [bk] -> lv, sv
static inline void rtv0bk() { mvmva<MX::RT, MV::V0, TV::BK>(); }
// ([rt]·[v1]) >> 12 + [bk] -> lv, sv
static inline void rtv1bk() { mvmva<MX::RT, MV::V1, TV::BK>(); }
// ([rt]·[v2]) >> 12 + [bk] -> lv, sv
static inline void rtv2bk() { mvmva<MX::RT, MV::V2, TV::BK>(); }
// ([rt]·[sv]) >> 12 + [bk] -> lv, sv
static inline void rtirbk() { mvmva<MX::RT, MV::IR, TV::BK>(); }
// ([rt]·[v0]) >> 12 + [fc] -> lv, sv
static inline void rtv0fc() { mvmva<MX::RT, MV::V0, TV::FC>(); }
// ([rt]·[v1]) >> 12 + [fc] -> lv, sv
static inline void rtv1fc() { mvmva<MX::RT, MV::V1, TV::FC>(); }
// ([rt]·[v2]) >> 12 + [fc] -> lv, sv
static inline void rtv2fc() { mvmva<MX::RT, MV::V2, TV::FC>(); }
// ([rt]·[sv]) >> 12 + [fc] -> lv, sv
static inline void rtirfc() { mvmva<MX::RT, MV::IR, TV::FC>(); }
// ([ll]·[v0]) >> 12 -> lv, sv
static inline void llv0() { mvmva<MX::LL, MV::V0, TV::Zero>(); }
// ([ll]·[v1]) >> 12 -> lv, sv
static inline void llv1() { mvmva<MX::LL, MV::V1, TV::Zero>(); }
// ([ll]·[v2]) >> 12 -> lv, sv
static inline void llv2() { mvmva<MX::LL, MV::V2, TV::Zero>(); }
// ([ll]·[sv]) >> 12 -> lv, sv
static inline void llir() { mvmva<MX::LL, MV::IR, TV::Zero>(); }
// ([ll]·[v0]) >> 12 + [tr] -> lv, sv
static inline void llv0tr() { mvmva<MX::LL, MV::V0, TV::TR>(); }
// ([ll]·[v1]) >> 12 + [tr] -> lv, sv
static inline void llv1tr() { mvmva<MX::LL, MV::V1, TV::TR>(); }
// ([ll]·[v2]) >> 12 + [tr] -> lv, sv
static inline void llv2tr() { mvmva<MX::LL, MV::V2, TV::TR>(); }
// ([ll]·[sv]) >> 12 + [tr] -> lv, sv
static inline void llirtr() { mvmva<MX::LL, MV::IR, TV::TR>(); }
// ([ll]·[v0]) >> 12 + [bk] -> lv, sv
static inline void llv0bk() { mvmva<MX::LL, MV::V0, TV::BK>(); }
// ([ll]·[v1]) >> 12 + [bk] -> lv, sv
static inline void llv1bk() { mvmva<MX::LL, MV::V1, TV::BK>(); }
// ([ll]·[v2]) >> 12 + [bk] -> lv, sv
static inline void llv2bk() { mvmva<MX::LL, MV::V2, TV::BK>(); }
// ([ll]·[sv]) >> 12 + [bk] -> lv, sv
static inline void llirbk() { mvmva<MX::LL, MV::IR, TV::BK>(); }
// ([ll]·[v0]) >> 12 + [fc] -> lv, sv
static inline void llv0fc() { mvmva<MX::LL, MV::V0, TV::FC>(); }
// ([ll]·[v1]) >> 12 + [fc] -> lv, sv
static inline void llv1fc() { mvmva<MX::LL, MV::V1, TV::FC>(); }
// ([ll]·[v2]) >> 12 + [fc] -> lv, sv
static inline void llv2fc() { mvmva<MX::LL, MV::V2, TV::FC>(); }
// ([ll]·[sv]) >> 12 + [fc] -> lv, sv
static inline void llirfc() { mvmva<MX::LL, MV::IR, TV::FC>(); }
// ([lc]·[v0]) >> 12 -> lv, sv
static inline void lcv0() { mvmva<MX::LC, MV::V0, TV::Zero>(); }
// ([lc]·[v1]) >> 12 -> lv, sv
static inline void lcv1() { mvmva<MX::LC, MV::V1, TV::Zero>(); }
// ([lc]·[v2]) >> 12 -> lv, sv
static inline void lcv2() { mvmva<MX::LC, MV::V2, TV::Zero>(); }
// ([lc]·[sv]) >> 12 -> lv, sv
static inline void lcir() { mvmva<MX::LC, MV::IR, TV::Zero>(); }
// ([lc]·[v0]) >> 12 + [tr] -> lv, sv
static inline void lcv0tr() { mvmva<MX::LC, MV::V0, TV::TR>(); }
// ([lc]·[v1]) >> 12 + [tr] -> lv, sv
static inline void lcv1tr() { mvmva<MX::LC, MV::V1, TV::TR>(); }
// ([lc]·[v2]) >> 12 + [tr] -> lv, sv
static inline void lcv2tr() { mvmva<MX::LC, MV::V2, TV::TR>(); }
// ([lc]·[sv]) >> 12 + [tr] -> lv, sv
static inline void lcirtr() { mvmva<MX::LC, MV::IR, TV::TR>(); }
// ([lc]·[v0]) >> 12 + [bk] -> lv, sv
static inline void lcv0bk() { mvmva<MX::LC, MV::V0, TV::BK>(); }
// ([lc]·[v1]) >> 12 + [bk] -> lv, sv
static inline void lcv1bk() { mvmva<MX::LC, MV::V1, TV::BK>(); }
// ([lc]·[v2]) >> 12 + [bk] -> lv, sv
static inline void lcv2bk() { mvmva<MX::LC, MV::V2, TV::BK>(); }
// ([lc]·[sv]) >> 12 + [bk] -> lv, sv
static inline void lcirbk() { mvmva<MX::LC, MV::IR, TV::BK>(); }
// ([lc]·[v0]) >> 12 + [fc] -> lv, sv
static inline void lcv0fc() { mvmva<MX::LC, MV::V0, TV::FC>(); }
// ([lc]·[v1]) >> 12 + [fc] -> lv, sv
static inline void lcv1fc() { mvmva<MX::LC, MV::V1, TV::FC>(); }
// ([lc]·[v2]) >> 12 + [fc] -> lv, sv
static inline void lcv2fc() { mvmva<MX::LC, MV::V2, TV::FC>(); }
// ([lc]·[sv]) >> 12 + [fc] -> lv, sv
static inline void lcirfc() { mvmva<MX::LC, MV::IR, TV::FC>(); }
} // namespace Kernels
} // namespace GTE
} // namespace psyqo