Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

推理速度 #4

Open
Wuhuddy opened this issue Jan 17, 2025 · 4 comments
Open

推理速度 #4

Wuhuddy opened this issue Jan 17, 2025 · 4 comments
Assignees

Comments

@Wuhuddy
Copy link

Wuhuddy commented Jan 17, 2025

你好,推理的时候输入768x1024的图片,大概要一分半钟出结果
另外会出现重复回答的问题

Image

请问如何解决?

@xujz18
Copy link
Member

xujz18 commented Jan 17, 2025

您好,这可能是由于当前推理代码作为开源版本存在一部分mismatch的地方,我已经请 @Uranusxer 检查一下问题,多谢

@Uranusxer
Copy link
Contributor

您好,能否进一步提供详细代码和命令行信息?关于推理速度,部署模型本身需要1min左右的时间,单个问题则仅需1-2s,可以部署模型后批量推理;另外对同一个图片/视频做多次问答时,可以对图像/视频embedding在transformer层做kv cache加速,我们未来也计划做进一步更新。

@gsfsdv
Copy link

gsfsdv commented Jan 21, 2025

-- encoding: utf-8 --

import os, sys
import json
import argparse
import torch
from tqdm import tqdm # Import tqdm for the progress bar
from sat.model.mixins import CachedAutoregressiveMixin
from sat.quantization.kernels import quantize
from sat.model import AutoModel
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(file))))
from utils.utils import chat, llama2_tokenizer, llama2_text_processor_inference, get_image_processor
from utils.utils import llama3_tokenizer
from utils.models import CogVLMModel
from utils.models import VisualLlamaEVA
from io import BytesIO
import pandas as pd
from PIL import Image
import numpy as np
from VisionReward_Image.t2v_metrics.vqascore import VQAScore

MASK_INDICES = [0, 1, 2] # Indices of mask features in original list
MASK_FEATURE_MAP = {
0: [22, 23, 24, 28, 29], # 'body(mask)' masks related features 'body correct' & 'harmfulness'
1: [25, 26], # 'face(mask)' masks related features 'face'
2: [27], # 'hands(mask)' masks related features 'hands'
}

def cal_score(args,image_path,prompt,model,text_processor_infer,image_processor):
with open(args.ques_file, 'r') as file:
ques_data = [line.strip() for line in file]
with open(args.weight_file, 'r') as file2:
weight_data = json.load(file2)
wegiht = weight_data['coef']
intercept = weight_data['intercept']
answer_list = []
alignment_score = VQAScore(model='clip-flant5-xxl') # our recommended scoring model
alignment = alignment_score(images=[image_path], texts=[prompt])[0][0].cpu().item()
for ques in tqdm(ques_data, f'scoring image:{image_path}'):
try:
response, _, _ = chat(
image_path=image_path,
image = None,
model=model,
text_processor=text_processor_infer,
img_processor=image_processor,
query=ques,
max_length=args.max_length,
top_p=args.top_p,
temperature=args.temperature,
top_k=args.top_k,
invalid_slices=text_processor_infer.invalid_slices,
args=args
)
answer_list.append(response)
except Exception as e:
answer_list.append(None)
print(f"Error processing {ques}: {str(e)}")
reward = [(1 if ans =='yes<|end_of_text|>' else -1 ) for ans in answer_list]
# add mask
for mask_index, feature_indices in MASK_FEATURE_MAP.items():
for feature_index in feature_indices:
reward[feature_index] *= (int)(reward[mask_index] > 0)
reward_filtered = [v for i, v in enumerate(reward) if i not in MASK_INDICES]
final_reward = [alignment] + reward_filtered
score = np.dot(final_reward, wegiht) + intercept
return score[0]

def main():
parser = argparse.ArgumentParser()
parser.add_argument("--max_length", type=int, default=3328, help='max length of the total sequence')
parser.add_argument("--top_p", type=float, default=0.4, help='top p for nucleus sampling')
parser.add_argument("--top_k", type=int, default=1, help='top k for top k sampling')
parser.add_argument("--temperature", type=float, default=0.8, help='temperature for sampling')
parser.add_argument("--version", type=str, default="vqa", choices=['chat', 'vqa', 'chat_old', 'base'], help='version of language process')
parser.add_argument("--quant", choices=[8, 4], type=int, default=None, help='quantization bits')
parser.add_argument("--from_pretrained", type=str, default="/mnt/dolphinfs/ssd_pool/docker/user/hadoop-aipnlp/HAL/hongshibo/models/VisionReward", help='pretrained ckpt') # You need to first download the model from https://huggingface.co/THUDM/VisionReward-Image and then refer to its README to extract the checkpoint.
parser.add_argument("--tokenizer_path", type=str, default="/mnt/dolphinfs/ssd_pool/docker/user/hadoop-aipnlp/HAL/hongshibo/models/Llama-3-8B-Instruct", help='tokenizer path')
parser.add_argument("--fp16", action="store_true", help="Use fp16 precision")
parser.add_argument("--bf16", action="store_true", help="Use bf16 precision")
parser.add_argument("--stream_chat", action="store_true")
parser.add_argument("--ques_file", type=str, default="VisionReward_Image/VisionReward_image_qa_select.txt", help="Path to the meta question file")
parser.add_argument("--weight_file", type=str, default="VisionRewardImage/weight_select.json", help="Path to the weight file")
parser.add_argument('--question', type=str, help='Question to be answered', default='Is the image clear?')
parser.add_argument('--score', help='Whether to output the score', default=False, action='store_true')
args = parser.parse_args()

# Initialize model
model, model_args = VisualLlamaEVA.from_pretrained(
    args.from_pretrained,
    args=argparse.Namespace(
        deepspeed=None,
        local_rank=0,
        rank=0,
        world_size=1,
        model_parallel_size=1,
        mode='inference',
        skip_init=True,
        use_gpu_initialization=not args.quant,
        device='cpu' if args.quant else 'cuda',
        **vars(args)
    )
)
model = model.eval()
if args.quant:
    quantize(model, args.quant)
    if torch.cuda.is_available():
        model = model.cuda()
model.add_mixin('auto-regressive', CachedAutoregressiveMixin())
tokenizer = llama3_tokenizer(args.tokenizer_path, signal_type=args.version)
image_processor = get_image_processor(model_args.eva_args["image_size"][0])
text_processor_infer = llama2_text_processor_inference(tokenizer, args.max_length, model.image_length)

# Set input
image_path1 = "asset/test/test1.jpg"
image_path2 = "asset/test/test2.jpg"
prompt = "A child shouting on a chair with several plush toys."

with torch.no_grad():
    if args.score:
        score = cal_score(args,image_path1,prompt,model,text_processor_infer,image_processor)
        print(f"score: {score}")
    else:
        ques = args.question
        response, _, _ = chat(
            image_path=image_path1,
            image = None,
            model=model,
            text_processor=text_processor_infer,
            img_processor=image_processor,
            query=ques,
            max_length=args.max_length,
            top_p=args.top_p,
            temperature=args.temperature,
            top_k=args.top_k,
            invalid_slices=text_processor_infer.invalid_slices,
            args=args
        )
        print(f"response:{response}")

if name == "main":
main()

使用上面的代码遇到同样的问题

@Wuhuddy
Copy link
Author

Wuhuddy commented Jan 21, 2025

您好,能否进一步提供详细代码和命令行信息?关于推理速度,部署模型本身需要1min左右的时间,单个问题则仅需1-2s,可以部署模型后批量推理;另外对同一个图片/视频做多次问答时,可以对图像/视频embedding在transformer层做kv cache加速,我们未来也计划做进一步更新。

以下是代码:

# -*- encoding: utf-8 -*-
import os, sys
os.environ["CUDA_VISIBLE_DEVICES"] = "7"
import json
import argparse
import torch
from tqdm import tqdm  # Import tqdm for the progress bar
from sat.model.mixins import CachedAutoregressiveMixin
from sat.quantization.kernels import quantize
from sat.model import AutoModel
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from utils.utils import chat, llama2_tokenizer, llama2_text_processor_inference, get_image_processor
from utils.utils import llama3_tokenizer
from utils.models import CogVLMModel
from utils.models import VisualLlamaEVA
from io import BytesIO
import pandas as pd
from PIL import Image
import numpy as np
import VisionRewardImage.t2v_metrics

MASK_INDICES = [0, 1, 2]      # Indices of mask features in original list
MASK_FEATURE_MAP = {
    0: [22, 23, 24, 28, 29],      # 'body(mask)' masks related features 'body correct' & 'harmfulness'
    1: [25, 26],                  # 'face(mask)' masks related features 'face'
    2: [27],                      # 'hands(mask)' masks related features 'hands'
}

def cal_score(args,image_path,prompt,model,text_processor_infer,image_processor):
    with open(args.ques_file, 'r') as file:
        ques_data = [line.strip() for line in file]
    with open(args.weight_file, 'r') as file2:
        weight_data = json.load(file2)
    wegiht = weight_data['coef']
    intercept = weight_data['intercept']
    answer_list = []
    alignment_score = t2v_metrics.VQAScore(model='clip-flant5-xxl') # our recommended scoring model
    alignment = alignment_score(images=[image_path], texts=[prompt])[0][0].cpu().item() 
    for ques in tqdm(ques_data, f'scoring image:{image_path}'):
        try:
            response, _, _ = chat(
                image_path=image_path,
                image = None,
                model=model,
                text_processor=text_processor_infer,
                img_processor=image_processor,
                query=ques,
                max_length=args.max_length,
                top_p=args.top_p,
                temperature=args.temperature,
                top_k=args.top_k,
                invalid_slices=text_processor_infer.invalid_slices,
                args=args
            )
            answer_list.append(response)
        except Exception as e:
            answer_list.append(None)
            print(f"Error processing {ques}: {str(e)}")
    reward = [(1 if ans =='yes<|end_of_text|>' else -1 ) for ans in answer_list]
    # add mask
    for mask_index, feature_indices in MASK_FEATURE_MAP.items():
        for feature_index in feature_indices:
            reward[feature_index] *= (int)(reward[mask_index] > 0)
    reward_filtered = [v for i, v in enumerate(reward) if i not in MASK_INDICES]
    final_reward = [alignment] + reward_filtered
    score = np.dot(final_reward, wegiht) + intercept
    return score[0]

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--max_length", type=int, default=3328, help='max length of the total sequence')
    parser.add_argument("--top_p", type=float, default=0.4, help='top p for nucleus sampling')
    parser.add_argument("--top_k", type=int, default=1, help='top k for top k sampling')
    parser.add_argument("--temperature", type=float, default=0.8, help='temperature for sampling')
    parser.add_argument("--version", type=str, default="vqa", choices=['chat', 'vqa', 'chat_old', 'base'], help='version of language process')
    parser.add_argument("--quant", choices=[8, 4], type=int, default=None, help='quantization bits')
    parser.add_argument("--from_pretrained", type=str, default="/home/aigc_worker/aigc/data_hub/tiezhu/THUDM/VisionRewardImage/ckpts", help='pretrained ckpt')  # You need to first download the model from https://huggingface.co/THUDM/VisionReward-Image and then refer to its README to extract the checkpoint.
    parser.add_argument("--tokenizer_path", type=str, default="/home/aigc_worker/aigc/data_hub/tiezhu/THUDM/VisionRewardImage/ckpts", help='tokenizer path')
    parser.add_argument("--fp16", action="store_true", help="Use fp16 precision")
    parser.add_argument("--bf16", action="store_true", help="Use bf16 precision")
    parser.add_argument("--stream_chat", action="store_true")
    parser.add_argument("--ques_file", type=str, default="VisionRewardImage/VisionReward_image_qa_select.txt", help="Path to the meta question file")
    parser.add_argument("--weight_file", type=str, default="VisionRewardImage/weight_select.json", help="Path to the weight file")
    parser.add_argument('--question', type=str, help='Question to be answered', default='Is the human body in the image completely correct?')
    parser.add_argument('--score', help='Whether to output the score', default=False, action='store_true')
    args = parser.parse_args()

    # Initialize model
    model, model_args = VisualLlamaEVA.from_pretrained(
        args.from_pretrained,
        args=argparse.Namespace(
            deepspeed=None,
            local_rank=0,
            rank=0,
            world_size=1,
            model_parallel_size=1,
            mode='inference',
            skip_init=True,
            use_gpu_initialization=not args.quant,
            device='cpu' if args.quant else 'cuda',
            **vars(args)
        )
    )
    model = model.eval()
    if args.quant:
        quantize(model, args.quant)
        if torch.cuda.is_available():
            model = model.cuda()
    model.add_mixin('auto-regressive', CachedAutoregressiveMixin())
    tokenizer = llama3_tokenizer(args.tokenizer_path, signal_type=args.version)
    image_processor = get_image_processor(model_args.eva_args["image_size"][0])
    text_processor_infer = llama2_text_processor_inference(tokenizer, args.max_length, model.image_length)
    
    # Set input
    image_path1 = "/home/aigc_worker/aigc/data_hub/tiezhu/image_quality/dataset_normal/train/bad_case/product_650715_1731976093_2157116_0_EYGgW.jpg"
    # image_path2 = "asset/test/test2.jpg"
    prompt = "This is an AI-generated image.I need you to determine whether there are any limb abnormalities in the model in this photo."
    
    with torch.no_grad():
        if args.score:
            score = cal_score(args,image_path1,prompt,model,text_processor_infer,image_processor)
            print(f"score: {score}")
        else:
            ques = args.question
            response, _, _ = chat(
                image_path=image_path1,
                image = None,
                model=model,
                text_processor=text_processor_infer,
                img_processor=image_processor,
                query=ques,
                max_length=args.max_length,
                top_p=args.top_p,
                temperature=args.temperature,
                top_k=args.top_k,
                invalid_slices=text_processor_infer.invalid_slices,
                args=args
            )
            print(f"response:{response}")
               

if __name__ == "__main__":
    main()

以下是命令行信息

(visionreward_image) root@xflux-dev:/home/aigc_worker/aigc/data_hub/tiezhu/THUDM# CUDA_VISIBLE_DEVICES=7 python inference-image.py --bf16
/root/env/miniconda3/envs/visionreward_image/lib/python3.11/site-packages/torch/cuda/__init__.py:611: UserWarning: Can't initialize NVML
  warnings.warn("Can't initialize NVML")
[2025-01-21 16:55:42,272] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2025-01-21 16:55:46,792] [INFO] building VisualLlamaEVA model ...
[2025-01-21 16:55:46,795] [INFO] [RANK 0] > initializing model parallel with size 1
[2025-01-21 16:55:46,795] [INFO] [RANK 0] You didn't pass in LOCAL_WORLD_SIZE environment variable. We use the guessed LOCAL_WORLD_SIZE=1. If this is wrong, please pass the LOCAL_WORLD_SIZE manually.
[2025-01-21 16:55:46,795] [INFO] [RANK 0] You are using model-only mode.
For torch.distributed users or loading model parallel models, set environment variables RANK, WORLD_SIZE and LOCAL_RANK.
[2025-01-21 16:56:01,770] [INFO] [RANK 0]  > number of parameters on model parallel rank 0: 19503105280
[2025-01-21 16:56:11,331] [INFO] [RANK 0] global rank 0 is loading checkpoint /home/aigc_worker/aigc/data_hub/tiezhu/THUDM/VisionRewardImage/ckpts/1/mp_rank_00_model_states.pt
[2025-01-21 16:57:02,067] [INFO] [RANK 0] > successfully loaded /home/aigc_worker/aigc/data_hub/tiezhu/THUDM/VisionRewardImage/ckpts/1/mp_rank_00_model_states.pt
response:yes<|end_of_text|><|end_of_text|><|end_of_text|>person<|end_of_text|>footwear<|end_of_text|>sand<|end_of_text|>tree<|end_of_text|>sky<|end_of_text|>water<|end_of_text|>beach<|end_of_text|>rock<|end_of_text|>cloud<|end_of_text|>shadow<|end_of_text|>leaf<|end_of_text|>palm<|end_of_text|>sun<|end_of_text|>light<|end_of_text|>wind<|end_of_text|>wave<|end_of_text|>ocean<|end_of_text|>surf<|end_of_text|>tide<|end_of_text|>rocky<|end_of_text|>coast<|end_of_text|>seascape<|end_of_text|>beachscape<|end_of_text|>oceanfront<|end_of_text|>oceanview<|end_of_text|>oceanfrontview<|end_of_text|>oceanview<|end_of_text|>oceanfrontview<|end_of_text|>oceanfrontview<|end_of_text|>oceanfrontview<|end_of_text|>oceanfrontview<|end_of_text|>oceanfrontview<|end_of_text|>oceanfrontview<|end_of_text|>oceanfrontview<|end_of_text|>oceanfrontview<|end_of_text|>oceanfrontview<|end_of_text|>oceanfrontview<|end_of_text|>oceanfrontview<|end_of_text|>oceanfrontview<|end_of_text|>oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfrontview
oceanfront

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

4 participants