/* RiscOS.c
 * RiscOS veneer for fast parallel Mandelbrot set scanner
 * By Neil A Carson, RiscBSD kernel team, 1996 A.D.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "hydra.h"
#include "kernel.h"
#include "MBrot.h"
#include "os.h"

/* Filenames, for 16 bit and 32 bit iterators respectively
 */
const char *Host_Code_File[] =
{
    "<ParaBrot$Dir>.HostRM",
    "<ParaBrot$Dir>.Host32"
};

const char *Slave_Code_File[] =
{
    "<ParaBrot$Dir>.SlaveRM",
    "<ParaBrot$Dir>.Slave32"
};

/* Base address of the Slave's screen memory---currently 16Mb up the memory map.
 */
#define Slave_Screen_Base (16*1024*1024)
#define Slave_Data_Base (8*1024*1024)
#define App_Base 0x8000 /* Base of application workspace on the slave */

/* Exit with an error, from a _kernel_oserror *
 */
#define Error_Handle(e) if (e != 0) { printf("%s\n", e->errmess); exit(0); }

/* Screen memory
 */
extern char *screen;

/* Dynamic area for screen
 */
#define Screen_Dynamic_Area 2

/* Chunks and associated spaces for the code, data and screen
 */
hydra_chunk slave_code = 0, data = 0, video = 0;
int *host_code_base = 0, *slave_code_base = 0, *data_base = 0;

/* Call some machine code
 */
extern void mc_call(int r0, void *addr);

/* Return a word of type hydra_cpu_mask, letting us know if a CPU number is
 * present or not.
 */
hydra_cpu_mask hydra_get_cpus(void)
{
    int m, cnt;
    hydra_cpu_id_block b;

    m = 0;
    Error_Handle(hydra_processors(&b));
    for (cnt = 0; cnt < hydra_Max_CPUs; cnt ++)
        if (b.cpu[cnt] != 0) m |= 1 << cnt;
    return (hydra_cpu_mask) m;
}

/* Change screen mode to 640x480x256, returning address of VRAM
 */
char *veneer_change_mode(void)
{
    int screen_size, screen_size_limit, cnt;
    hydra_cpu_mask m;
    char *screenstart;

    /* Read screen base
     */
    xos_read_dynamic_area(Screen_Dynamic_Area, (byte **) &screenstart,
                          &screen_size, &screen_size_limit);

    /* Map the screen into the Hydra memory, on the CPUs present, at the
     * appropriate address
     */
    Error_Handle(hydra_new_alias((void *) Slave_Screen_Base,
                                 Screen_Dynamic_Area,
                                 ACCESS_rw, &video));
    m = hydra_get_cpus();
    for (cnt = 0; cnt < hydra_Max_CPUs; cnt ++)
        if (m & (1<<cnt))
            Error_Handle(hydra_map_chunk(video, cnt, MAP_map));

    return screenstart;
}

/* Get some memory---in fact use a Hydra chunk to store the data, and make sure
 * it is mapped in at 8Mb onto all of the Hydra CPUs present.
 */
result_data_t *veneer_get_result(int s)
{
    int cnt;
    hydra_cpu_mask m;
    hydra_chunk_parms tmp;

    /* Create a new chunk for the data
     */
    strcpy(tmp.name, "ParaBrot Mandelbrot data chunk");
    tmp.init_size = tmp.max_size = s;
    tmp.base_addr = (void *) Slave_Data_Base;
    tmp.mode = ACCESS_rw;

    if ((data_base = hydra_new_chunk(&tmp, &data)) == 0)
        Exit_Error("Can't grab memory for data chunk!");

    /* Map the data in, at 8Mb, on all CPUs present
     */
    m = hydra_get_cpus();
    for (cnt = 0; cnt < hydra_Max_CPUs; cnt ++)
        if (m & (1<<cnt))
            Error_Handle(hydra_map_chunk(data, cnt, MAP_map));

    return (result_data_t *) data_base;
}

/* Called on loading. If prec=1 then use high precision iteration,
 * otherwise use low precision.
 */
void veneer_start(int prec)
{
    int slave_code_size, host_file_size, cnt;
    hydra_cpu_mask cpus;
    FILE *host_fp;

    /* Load the code into Hydra memory
     */
    Error_Handle(hydra_load_chunk((char *) Slave_Code_File[prec], &slave_code,
                                  &slave_code_base, &slave_code_size));

    /* And onto the main CPU!
     */
    if ((host_fp = fopen(Host_Code_File[prec], "rb")) == 0)
        Exit_Error("Can't open code!");
    fseek(host_fp, 0L, SEEK_END);
    host_file_size = (int) ftell(host_fp);
    fseek(host_fp, 0L, SEEK_SET);
    if ((host_code_base = malloc(host_file_size)) == 0)
        Exit_Error("No free memory to load code onto main CPU!");
    if (fread(host_code_base, host_file_size, 1, host_fp) != 1)
        Exit_Error("Can't read sufficient members from file!");
    fclose(host_fp);

    /* Map it into all CPU's present
     */
    cpus = hydra_get_cpus();
    for (cnt = 0; cnt < hydra_Max_CPUs; cnt ++)
        if (cpus & (1 << cnt))
            Error_Handle(hydra_map_chunk(slave_code, cnt, MAP_map));
}

/* Called on exit
 */
void veneer_close_down(void)
{
    if (slave_code_base != 0) Error_Handle(hydra_free_chunk(slave_code));
    if (host_code_base != 0) free(host_code_base);
    if (data_base != 0) Error_Handle(hydra_free_chunk(data));
    if (video != 0) Error_Handle(hydra_free_chunk(video));
}

/* Calculate the Mandelbrot.
 */
void veneer_fork(int squares, int maxiter, result_data_t *r)
{
    int cnt;
    hydra_cpu_mask cpus;

    /* Set up screen memory pointers
     */
    host_code_base[host_code_base[0]] = (int) screen;
    slave_code_base[slave_code_base[0]] = Slave_Screen_Base;

    /* Set up number of squares to recurse
     */
    host_code_base[host_code_base[1]] = squares;
    slave_code_base[slave_code_base[1]] = squares;

    /* Set up maximum iterations
     */
    host_code_base[host_code_base[2]] = maxiter;
    slave_code_base[slave_code_base[2]] = maxiter;

    /* Start code on the slave/s
     */
    cpus = hydra_get_cpus();
    for (cnt = 0; cnt < hydra_Max_CPUs; cnt ++)
        if (cpus & (1 << cnt)) hydra_call(cnt, (void *) (App_Base + 12));

    /* Start code on the host
     */
    mc_call((int) r, &host_code_base[3]);
}

/* See if we have finished or not.
 */
int veneer_finished(void)
{
    hydra_cpu_snapshot regs[hydra_Max_CPUs];
    hydra_cpu_mask cpus;
    int cnt;

    /* We know here we are pretty near done, as the host has returned...
     */
    cpus = hydra_get_cpus();
    for (cnt = 0; cnt < hydra_Max_CPUs; cnt ++)
        if (cpus & (1 << cnt)) hydra_snapshot_regs(cnt, &regs[cnt]);

    /* If a PC is within the application workspace, then we haven't
     * finished!
     */
    for (cnt = 0; cnt < hydra_Max_CPUs; cnt ++)
        if (cpus & (1 << cnt))
        {
            if (regs[cnt].pc >= 0x8000 && regs[cnt].pc <= 0x10000)
            return 0;
        }
    return 1;
}
