Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

T1 language support for simulation #3610

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .cspell.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,8 @@
".github/pipelines/**",
".github/CODEOWNERS",
"src/promptflow-evals/tests/**",
"benchmark/promptflow-serve/result-archive/**"
"benchmark/promptflow-serve/result-archive/**",
"src/promptflow-evals/samples/**"
],
"words": [
"amlignore",
Expand Down
9 changes: 9 additions & 0 deletions src/promptflow-evals/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,15 @@
- Introduced `IndirectAttackEvaluator` to evaluate content for the presence of XPIA (cross domain prompt injected attacks) injected into conversation or Q/A context to interrupt normal expected functionality by eliciting manipulated content, intrusion and attempting to gather information outside the scope of your AI system.
- Add a new evaluator (ProtectedMaterialEvaluator) and associated adversarial content simulator enum type (AdversarialScenario.ADVERSARIAL_CONTENT_PROTECTED_MATERIAL) for protected material, which determines if given inputs contain material protected by IP laws.
- Added four mathematic evaluators, `BleuScoreEvaluator`, `GleuScoreEvaluator`, `MeteorScoreEvaluator` and `RougeScoreEvaluator` - for evaluating the quality of generated text by comparing it against referenced text.
- Added support for the following languages in the simulator:
- Spanish (`es`)
- Italian (`it`)
- French (`fr`)
- German (`de`)
- Simplified Chinese (`zh-cn`)
- Portuguese (`pt`)
- Japanese (`ja`)
- English (`en`)

### Bugs Fixed
- Fixed evaluators to accept (non-Azure) Open AI Configs.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from .adversarial_scenario import AdversarialScenario
from .adversarial_simulator import AdversarialSimulator
from .constants import SupportedLanguages
from .direct_attack_simulator import DirectAttackSimulator
from .xpia_simulator import IndirectAttackSimulator

__all__ = ["AdversarialSimulator", "AdversarialScenario", "DirectAttackSimulator", "IndirectAttackSimulator"]
__all__ = [
"AdversarialSimulator",
"AdversarialScenario",
"DirectAttackSimulator",
"IndirectAttackSimulator",
"SupportedLanguages",
]
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
import logging
from typing import Callable, Dict, List, Tuple, Union

from promptflow.evals.synthetic._helpers._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING
from promptflow.evals.synthetic.constants import SupportedLanguages

from ..._http_utils import AsyncHttpPipeline
from . import ConversationBot, ConversationTurn

Expand Down Expand Up @@ -60,8 +63,10 @@ def is_closing_message_helper(response: str) -> bool:


async def simulate_conversation(
*,
nagkumar91 marked this conversation as resolved.
Show resolved Hide resolved
bots: List[ConversationBot],
session: AsyncHttpPipeline,
language: SupportedLanguages,
stopping_criteria: Callable[[str], bool] = is_closing_message,
turn_limit: int = 10,
history_limit: int = 5,
Expand Down Expand Up @@ -101,6 +106,13 @@ async def simulate_conversation(
else:
conversation_id = None
first_prompt = first_response["samples"][0]
if language != SupportedLanguages.English:
if not isinstance(language, SupportedLanguages) or language not in SupportedLanguages:
raise Exception( # pylint: disable=broad-exception-raised
f"Language option '{language}' isn't supported. Select a supported language option from "
f"promptflow.evals.synthetic._constants.SupportedLanguages: {[f'{e}' for e in SupportedLanguages]}"
)
first_prompt += f" {SUPPORTED_LANGUAGES_MAPPING[language]}"
# Add all generated turns into array to pass for each bot while generating
# new responses. We add generated response and the person generating it.
# in the case of the first turn, it is supposed to be the user search query
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from ._language_suffix_mapping import SUPPORTED_LANGUAGES_MAPPING

__all__ = ["SUPPORTED_LANGUAGES_MAPPING"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from promptflow.evals.synthetic.constants import SupportedLanguages

BASE_SUFFIX = "Make the conversation in __language__ language."

SUPPORTED_LANGUAGES_MAPPING = {
SupportedLanguages.English: BASE_SUFFIX.replace("__language__", "english"),
SupportedLanguages.Spanish: BASE_SUFFIX.replace("__language__", "spanish"),
SupportedLanguages.Italian: BASE_SUFFIX.replace("__language__", "italian"),
SupportedLanguages.French: BASE_SUFFIX.replace("__language__", "french"),
SupportedLanguages.German: BASE_SUFFIX.replace("__language__", "german"),
SupportedLanguages.SimplifiedChinese: BASE_SUFFIX.replace("__language__", "simplified chinese"),
SupportedLanguages.Portuguese: BASE_SUFFIX.replace("__language__", "portuguese"),
SupportedLanguages.Japanese: BASE_SUFFIX.replace("__language__", "japanese"),
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
# noqa: E501
# pylint: disable=E0401,E0611
import asyncio
import functools
import logging
Expand All @@ -26,6 +27,7 @@
TokenScope,
)
from ._utils import JsonLineList
from .constants import SupportedLanguages

logger = logging.getLogger(__name__)

Expand All @@ -45,6 +47,7 @@ def wrapper(*args, **kwargs):
max_conversation_turns = kwargs.get("max_conversation_turns", None)
max_simulation_results = kwargs.get("max_simulation_results", None)
_jailbreak_type = kwargs.get("_jailbreak_type", None)
selected_language = (kwargs.get("language", SupportedLanguages.English),)
decorated_func = monitor_operation(
activity_name="adversarial.simulator.call",
activity_type=ActivityType.PUBLICAPI,
Expand All @@ -53,6 +56,7 @@ def wrapper(*args, **kwargs):
"max_conversation_turns": max_conversation_turns,
"max_simulation_results": max_simulation_results,
"_jailbreak_type": _jailbreak_type,
"selected_language": selected_language,
},
)(func)

Expand Down Expand Up @@ -116,6 +120,7 @@ async def __call__(
api_call_delay_sec: int = 0,
concurrent_async_task: int = 3,
_jailbreak_type: Optional[str] = None,
language: SupportedLanguages = SupportedLanguages.English,
randomize_order: bool = True,
randomization_seed: Optional[int] = None,
):
Expand Down Expand Up @@ -149,6 +154,8 @@ async def __call__(
:keyword concurrent_async_task: The number of asynchronous tasks to run concurrently during the simulation.
Defaults to 3.
:paramtype concurrent_async_task: int
:keyword language: The language in which the conversation should be generated. Defaults to English.
:paramtype language: promptflow.evals.synthetic.constants.SupportedLanguages
:keyword randomize_order: Whether or not the order of the prompts should be randomized. Defaults to True.
:paramtype randomize_order: bool
:keyword randomization_seed: The seed used to randomize prompt selection. If unset, the system's
Expand Down Expand Up @@ -246,6 +253,7 @@ async def __call__(
api_call_retry_limit=api_call_retry_limit,
api_call_retry_sleep_sec=api_call_retry_sleep_sec,
api_call_delay_sec=api_call_delay_sec,
language=language,
semaphore=semaphore,
)
)
Expand Down Expand Up @@ -298,6 +306,7 @@ async def _simulate_async(
api_call_retry_limit,
api_call_retry_sleep_sec,
api_call_delay_sec,
language,
semaphore,
) -> List[Dict]:
user_bot = self._setup_bot(role=ConversationRole.USER, template=template, parameters=parameters)
Expand All @@ -319,6 +328,7 @@ async def _simulate_async(
session=session,
turn_limit=max_conversation_turns,
api_call_delay_sec=api_call_delay_sec,
language=language,
)
return self._to_chat_protocol(conversation_history=conversation_history, template_parameters=parameters)

Expand Down
17 changes: 17 additions & 0 deletions src/promptflow-evals/promptflow/evals/synthetic/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# ---------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# ---------------------------------------------------------
from enum import Enum


class SupportedLanguages(Enum):
"""Supported languages for evaluation, using ISO standard language codes."""

Spanish = "es"
Italian = "it"
French = "fr"
German = "de"
SimplifiedChinese = "zh-cn"
Portuguese = "pt"
Japanese = "ja"
English = "en"
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# flake8: noqa

import asyncio
diondrapeck marked this conversation as resolved.
Show resolved Hide resolved
import json
import os
from typing import Any, Dict, List, Optional

from azure.identity import DefaultAzureCredential

from promptflow.evals.evaluate import evaluate
from promptflow.evals.synthetic import AdversarialScenario, AdversarialSimulator, SupportedLanguages


async def callback(
messages: List[Dict],
stream: bool = False,
session_state: Any = None, # noqa: ANN401
context: Optional[Dict[str, Any]] = None,
) -> dict:
messages_list = messages["messages"]
# get last message
latest_message = messages_list[-1]
query = latest_message["content"]
context = None
# call your endpoint to get the response
response = "I cannot answer that question"

# we are formatting the response to follow the openAI chat protocol format
formatted_response = {
"content": query,
"role": "assistant",
"context": {
"citations": None,
},
}
messages["messages"].append(formatted_response)
return {"messages": messages["messages"], "stream": stream, "session_state": session_state, "context": context}


async def main(azure_ai_project):
simulator = AdversarialSimulator(azure_ai_project=azure_ai_project, credential=DefaultAzureCredential())
outputs = await simulator(
target=callback,
scenario=AdversarialScenario.ADVERSARIAL_CONVERSATION,
max_simulation_results=5,
max_conversation_turns=3,
language=SupportedLanguages.French,
)
print(json.dumps(outputs, indent=2))
import pdb

pdb.set_trace()


if __name__ == "__main__":

azure_ai_project = {
"subscription_id": os.environ.get("AZURE_SUBSCRIPTION_ID"),
"resource_group_name": os.environ.get("RESOURCE_GROUP"),
"project_name": os.environ.get("PROJECT_NAME"),
}

asyncio.run(main(azure_ai_project=azure_ai_project))
print("done!")
Loading
Loading