diff --git a/Cargo.toml b/Cargo.toml index e4a98e2b..9f90a8f9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ cudarc = { path = "./resources/cudarc", features = [ metal-rs = { version = "0.27.0", package = "metal", optional = true, features = [ "mps", ] } -block = {version="0.1.6", optional = true} +block = { version = "0.1.6", optional = true } safetensors = "0.3.1" memmap2 = { version = "0.7.1", features = ["stable_deref_trait"] } half = { version = "2.3.1", features = ["num-traits", "rand_distr"] } @@ -48,3 +48,4 @@ gemm = "0.15.4" [dev-dependencies] dfdx = { version = "0.13", features = ["f16"] } rust_tokenizers = "8.1.0" +clap = { version = "4.4.18", features = ["derive"] } diff --git a/examples/mistral/main.rs b/examples/mistral/main.rs index 7503943d..a61719b7 100644 --- a/examples/mistral/main.rs +++ b/examples/mistral/main.rs @@ -1,5 +1,10 @@ -use std::{io::Write, marker::PhantomData, time::Instant}; +use std::{ + io::{self, Write}, + marker::PhantomData, + time::Instant, +}; +use clap::Parser; use colored::Colorize; use half::f16; use rust_tokenizers::tokenizer::{SentencePieceBpeTokenizer, Tokenizer, TruncationStrategy}; @@ -17,21 +22,23 @@ type DeviceCompiler = CudaFp16Compiler; #[cfg(all(not(feature = "cuda"), not(feature = "metal")))] type DeviceCompiler = CPUCompiler; -fn main() { - // let prompt = "[INST]Write me a python implementation of merge sort[/INST]\n"; - let prompt = " -# Three Laws of Robotics - -**The Three Laws of Robotics** (often shortened to **The Three Laws** or **Asimov's Laws**) are a set of rules devised by science fiction author Isaac Asimov, which were to be followed by robots in several of his stories. The rules were introduced in his 1942 short story \"Runaround\" (included in the 1950 collection I, Robot), although similar restrictions had been implied in earlier stories. - -## The Laws +// Command args parser +#[derive(Debug, Parser)] +#[command(author, version, about, long_about = None)] +pub struct CLIArgs { + /// Number of tokens to generate + #[clap(short = 't', long = "gen_tokens", default_value = "128")] + gen_tokens: i32, + + /// Prompt for the model + #[clap(short = 'p', long = "prompt", default_value = include_str!("prompts/shakespeare.txt"))] + prompt: String, +} -The Three Laws, presented to be from the fictional \"Handbook of Robotics, 56th Edition, 2058 A.D.\", are: - - The First Law: A robot may not injure a human being or, through inaction, allow a human being to come to harm. - - The Second Law: A robot must obey the orders given it by human beings except where such orders would conflict with the First Law. - - The Third Law: A robot must protect its own existence as long as such protection does not conflict with the First or Second Law. -"; - let tokens_to_generate = 256; +fn main() { + let cli_args = CLIArgs::parse(); + let prompt = cli_args.prompt.as_str(); + let tokens_to_generate = cli_args.gen_tokens; let tokenizer = SentencePieceBpeTokenizer::from_file( "./examples/mistral/setup/mistral-7b-hf/tokenizer.model", @@ -39,7 +46,9 @@ The Three Laws, presented to be from the fictional \"Handbook of Robotics, 56th ) .unwrap(); - println!("Creating graph..."); + print!("Defining Graphs"); + io::stdout().flush().unwrap(); + let now = Instant::now(); let mut cx1 = Graph::new(); let mut input = cx1.named_tensor::<(Const<1>, Dyn<'s'>)>("Input"); let model = model::MistralLM::initialize(&mut cx1); @@ -80,15 +89,22 @@ The Three Laws, presented to be from the fictional \"Handbook of Robotics, 56th )); decode_logits.retrieve(); cache_dest.keep(); + println!("\t - {}ms", now.elapsed().as_millis()); - println!("Compiling graph..."); + print!("Compiling Prompt Processing Graph"); + io::stdout().flush().unwrap(); + let now = Instant::now(); cx1.compile( <(GenericCompiler, DeviceCompiler)>::default(), (&mut input, &mut logits, &mut kv_cache), ); let model_weights = downstream(&state_set(&model), &cx1); cx1.no_delete.extend(model_weights.clone()); + println!("\t - {}ms", now.elapsed().as_millis()); + print!("Compiling Token Generation Graph"); + io::stdout().flush().unwrap(); + let now = Instant::now(); // Compile second graph cx2.compile( <(GenericCompiler, DeviceCompiler)>::default(), @@ -105,15 +121,17 @@ The Three Laws, presented to be from the fictional \"Handbook of Robotics, 56th let cache_dest_set = cache_dest.to_ids(); delete_inputs(&kv_model_weights, &mut cx2); delete_inputs(&cache_src_set, &mut cx2); + println!("\t - {}ms", now.elapsed().as_millis()); // Initial forward pass to load weights - println!("Loading model..."); + print!("Loading model"); + io::stdout().flush().unwrap(); let now = Instant::now(); input.set_dyn(vec![1.], vec![1, 1]); cx1.execute(); logits.drop(); kv_cache.drop(); - println!("Model loading took {}ms", now.elapsed().as_millis()); + println!("\t - {}ms", now.elapsed().as_millis()); // Now that weights are loaded, delete the loading nodes so they don't run again delete_inputs(&model_weights, &mut cx1); @@ -125,9 +143,14 @@ The Three Laws, presented to be from the fictional \"Handbook of Robotics, 56th input_ids.iter().map(|i| *i as f32).collect::>(), vec![1, input_ids.len()], ); + print!("Processing Prompt"); + io::stdout().flush().unwrap(); let now = Instant::now(); cx1.execute(); - println!("Prompt processing took {}ms", now.elapsed().as_millis()); + let elapsed_ms = now.elapsed().as_millis(); + let n_prompt_tokens = input_ids.len(); + let pp_speed = 1000.0 * (n_prompt_tokens as f64) / (elapsed_ms as f64); + println!("\t - {}ms ({:.2} tok/s)", elapsed_ms, pp_speed); let output_id = sample_index(&logits.data()); input_ids.push(output_id); @@ -138,7 +161,7 @@ The Three Laws, presented to be from the fictional \"Handbook of Robotics, 56th prompt.white().bold(), decode(&tokenizer, &[output_id]).bright_green() ); - std::io::stdout().flush().unwrap(); + io::stdout().flush().unwrap(); // Transfer weights and kv cache transfer_data(&model_weights, &mut cx1, &kv_model_weights, &mut cx2); @@ -161,7 +184,7 @@ The Three Laws, presented to be from the fictional \"Handbook of Robotics, 56th decode_logits.drop(); input_ids.push(output_id); print!("{}", decode(&tokenizer, &[output_id]).bright_green()); - std::io::stdout().flush().unwrap(); + io::stdout().flush().unwrap(); // Swap caches transfer_data_same_graph(&cache_dest_set, &cache_src_set, &mut cx2); diff --git a/examples/mistral/prompts/asimov.txt b/examples/mistral/prompts/asimov.txt new file mode 100644 index 00000000..36d49744 --- /dev/null +++ b/examples/mistral/prompts/asimov.txt @@ -0,0 +1,10 @@ +# Three Laws of Robotics + +**The Three Laws of Robotics** (often shortened to **The Three Laws** or **Asimov's Laws**) are a set of rules devised by science fiction author Isaac Asimov, which were to be followed by robots in several of his stories. The rules were introduced in his 1942 short story \"Runaround\" (included in the 1950 collection I, Robot), although similar restrictions had been implied in earlier stories. + +## The Laws + +The Three Laws, presented to be from the fictional \"Handbook of Robotics, 56th Edition, 2058 A.D.\", are: + - The First Law: A robot may not injure a human being or, through inaction, allow a human being to come to harm. + - The Second Law: A robot must obey the orders given it by human beings except where such orders would conflict with the First Law. + - The Third Law: A robot must protect its own existence as long as such protection does not conflict with the First or Second Law. \ No newline at end of file diff --git a/examples/mistral/prompts/merge_sort.txt b/examples/mistral/prompts/merge_sort.txt new file mode 100644 index 00000000..f981c33f --- /dev/null +++ b/examples/mistral/prompts/merge_sort.txt @@ -0,0 +1 @@ +[INST]Write me a python implementation of merge sort[/INST] diff --git a/examples/mistral/prompts/shakespeare.txt b/examples/mistral/prompts/shakespeare.txt new file mode 100644 index 00000000..af0bca47 --- /dev/null +++ b/examples/mistral/prompts/shakespeare.txt @@ -0,0 +1,209 @@ +[INST] Complete the following + +## SCENE VII. The forest. +A table set out. Enter DUKE SENIOR, AMIENS, and Lords like outlaws + +### DUKE SENIOR +I think he be transform'd into a beast; +For I can no where find him like a man. + +### First Lord +My lord, he is but even now gone hence: +Here was he merry, hearing of a song. + +### DUKE SENIOR +If he, compact of jars, grow musical, +We shall have shortly discord in the spheres. +Go, seek him: tell him I would speak with him. +Enter JAQUES + +### First Lord +He saves my labour by his own approach. + +### DUKE SENIOR +Why, how now, monsieur! what a life is this, +That your poor friends must woo your company? +What, you look merrily! + +### JAQUES +A fool, a fool! I met a fool i' the forest, +A motley fool; a miserable world! +As I do live by food, I met a fool +Who laid him down and bask'd him in the sun, +And rail'd on Lady Fortune in good terms, +In good set terms and yet a motley fool. +'Good morrow, fool,' quoth I. 'No, sir,' quoth he, +'Call me not fool till heaven hath sent me fortune:' +And then he drew a dial from his poke, +And, looking on it with lack-lustre eye, +Says very wisely, 'It is ten o'clock: +Thus we may see,' quoth he, 'how the world wags: +'Tis but an hour ago since it was nine, +And after one hour more 'twill be eleven; +And so, from hour to hour, we ripe and ripe, +And then, from hour to hour, we rot and rot; +And thereby hangs a tale.' When I did hear +The motley fool thus moral on the time, +My lungs began to crow like chanticleer, +That fools should be so deep-contemplative, +And I did laugh sans intermission +An hour by his dial. O noble fool! +A worthy fool! Motley's the only wear. + +### DUKE SENIOR +What fool is this? + +### JAQUES +O worthy fool! One that hath been a courtier, +And says, if ladies be but young and fair, +They have the gift to know it: and in his brain, +Which is as dry as the remainder biscuit +After a voyage, he hath strange places cramm'd +With observation, the which he vents +In mangled forms. O that I were a fool! +I am ambitious for a motley coat. + +### DUKE SENIOR +Thou shalt have one. + +### JAQUES +It is my only suit; +Provided that you weed your better judgments +Of all opinion that grows rank in them +That I am wise. I must have liberty +Withal, as large a charter as the wind, +To blow on whom I please; for so fools have; +And they that are most galled with my folly, +They most must laugh. And why, sir, must they so? +The 'why' is plain as way to parish church: +He that a fool doth very wisely hit +Doth very foolishly, although he smart, +Not to seem senseless of the bob: if not, +The wise man's folly is anatomized +Even by the squandering glances of the fool. +Invest me in my motley; give me leave +To speak my mind, and I will through and through +Cleanse the foul body of the infected world, +If they will patiently receive my medicine. + +### DUKE SENIOR +Fie on thee! I can tell what thou wouldst do. + +### JAQUES +What, for a counter, would I do but good? + +### DUKE SENIOR +Most mischievous foul sin, in chiding sin: +For thou thyself hast been a libertine, +As sensual as the brutish sting itself; +And all the embossed sores and headed evils, +That thou with licence of free foot hast caught, +Wouldst thou disgorge into the general world. + +### JAQUES +Why, who cries out on pride, +That can therein tax any private party? +Doth it not flow as hugely as the sea, +Till that the weary very means do ebb? +What woman in the city do I name, +When that I say the city-woman bears +The cost of princes on unworthy shoulders? +Who can come in and say that I mean her, +When such a one as she such is her neighbour? +Or what is he of basest function +That says his bravery is not of my cost, +Thinking that I mean him, but therein suits +His folly to the mettle of my speech? +There then; how then? what then? Let me see wherein +My tongue hath wrong'd him: if it do him right, +Then he hath wrong'd himself; if he be free, +Why then my taxing like a wild-goose flies, +Unclaim'd of any man. But who comes here? +Enter ORLANDO, with his sword drawn + +### ORLANDO +Forbear, and eat no more. + +### JAQUES +Why, I have eat none yet. + +### ORLANDO +Nor shalt not, till necessity be served. + +### JAQUES +Of what kind should this cock come of? + +### DUKE SENIOR +Art thou thus bolden'd, man, by thy distress, +Or else a rude despiser of good manners, +That in civility thou seem'st so empty? + +### ORLANDO +You touch'd my vein at first: the thorny point +Of bare distress hath ta'en from me the show +Of smooth civility: yet am I inland bred +And know some nurture. But forbear, I say: +He dies that touches any of this fruit +Till I and my affairs are answered. + +### JAQUES +An you will not be answered with reason, I must die. + +### DUKE SENIOR +What would you have? Your gentleness shall force +More than your force move us to gentleness. + +### ORLANDO +I almost die for food; and let me have it. + +### DUKE SENIOR +Sit down and feed, and welcome to our table. + +### ORLANDO +Speak you so gently? Pardon me, I pray you: +I thought that all things had been savage here; +And therefore put I on the countenance +Of stern commandment. But whate'er you are +That in this desert inaccessible, +Under the shade of melancholy boughs, +Lose and neglect the creeping hours of time +If ever you have look'd on better days, +If ever been where bells have knoll'd to church, +If ever sat at any good man's feast, +If ever from your eyelids wiped a tear +And know what 'tis to pity and be pitied, +Let gentleness my strong enforcement be: +In the which hope I blush, and hide my sword. + +### DUKE SENIOR +True is it that we have seen better days, +And have with holy bell been knoll'd to church +And sat at good men's feasts and wiped our eyes +Of drops that sacred pity hath engender'd: +And therefore sit you down in gentleness +And take upon command what help we have +That to your wanting may be minister'd. + +### ORLANDO +Then but forbear your food a little while, +Whiles, like a doe, I go to find my fawn +And give it food. There is an old poor man, +Who after me hath many a weary step +Limp'd in pure love: till he be first sufficed, +Oppress'd with two weak evils, age and hunger, +I will not touch a bit. + +### DUKE SENIOR +Go find him out, +And we will nothing waste till you return. + +### ORLANDO +I thank ye; and be blest for your good comfort! +Exit + +### DUKE SENIOR +Thou seest we are not all alone unhappy: +This wide and universal theatre +Presents more woeful pageants than the scene +Wherein we play in. +[/INST]