Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NewIR]support new ir load combine #56101

Merged
merged 11 commits into from
Aug 15, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
#include "paddle/fluid/platform/event.h"
#include "paddle/ir/core/value.h"

namespace ir {
class Value;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"

#include <map>
#include <string>
#include <unordered_map>
#include <vector>

#include "paddle/fluid/framework/new_executor/instruction/instruction_util.h"

#include "paddle/fluid/framework/new_executor/new_executor_defs.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/event.h"
Expand All @@ -42,7 +42,7 @@ std::vector<int> GetValueIds(
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name) {
std::vector<int> ids;
std::string var_name = value_2_var_name.at(value);
auto& var_name = value_2_var_name.at(value);
ids.push_back(var_name_2_id.at(var_name));
// NOTE(zhangbo): Value maybe a VariableRefArray
auto var = inner_scope->FindVar(var_name);
Expand All @@ -61,7 +61,7 @@ platform::DeviceContext* ParseDeviceContext(
const platform::Place& place,
const std::string& execution_stream,
const int stream_priority) {
auto op_attributes = op->attributes();
auto& op_attributes = op->attributes();
auto op_name =
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString();
interpreter::ContextManager& ctx_manager =
Expand Down Expand Up @@ -149,7 +149,7 @@ OpFuncType AnalyseOpFuncType(::ir::Operation* op,
// computing. They execute serially in device thread and block CUDA kernel
// launching in other GPU OPs. To improve performance, set them as kGpuSync
// and so that they would be dispatched to host thread.
auto op_attributes = op->attributes();
auto& op_attributes = op->attributes();
auto op_name =
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString();
if (op_name == kCoalesceTensor &&
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ LegacyKernelInstruction::LegacyKernelInstruction(
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name)
: InstructionBase(id, place) {
auto op_attributes = op->attributes();
auto& op_attributes = op->attributes();
auto op_name =
op_attributes.at("op_name").dyn_cast<::ir::StrAttribute>().AsString();
ir::OpInfo op_info = ir::IrContext::Instance()->GetRegisteredOpInfo(op_name);
Expand Down Expand Up @@ -97,18 +97,20 @@ LegacyKernelInstruction::LegacyKernelInstruction(
yaml_interface->get_op_info_());
VLOG(6) << "finish process yaml_info_parser";

::ir::BuildPhiContext<
phi::InferMetaContext,
phi::MetaTensor,
phi::MetaTensor,
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
false>(op,
value_2_var_name,
scope,
local_scope,
yaml_info_parser,
&infer_meta_context_);
if (infer_meta_interface_) {
::ir::BuildPhiContext<
phi::InferMetaContext,
phi::MetaTensor,
phi::MetaTensor,
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
paddle::small_vector<phi::MetaTensor, phi::kInputSmallVectorSize>,
false>(op,
value_2_var_name,
scope,
local_scope,
yaml_info_parser,
&infer_meta_context_);
}
VLOG(6) << "finish process infer meta context";

auto kernel_name =
Expand All @@ -123,8 +125,10 @@ LegacyKernelInstruction::LegacyKernelInstruction(
phi_kernel_->IsValid(), true, "not found kernel for [%s]", kernel_name);
VLOG(6) << "finish process select kernel";

operator_base_ =
ir::BuildOperatorBase(op, value_2_var_name, yaml_info_parser);
Scope* inner_scope = local_scope == nullptr ? scope : local_scope;

operator_base_ = ir::BuildOperatorBase(
op, value_2_var_name, yaml_info_parser, variable_2_var_name, inner_scope);
paddle::framework::VariableValueMap in_map;
paddle::framework::VariableValueMap out_map;
auto dev_ctx = phi::DeviceContextPool::Instance().Get(
Expand All @@ -151,7 +155,6 @@ LegacyKernelInstruction::LegacyKernelInstruction(
GetStreamPriority()));
VLOG(6) << "finish process device context";

Scope* inner_scope = local_scope == nullptr ? scope : local_scope;
InitInputsOutputsIds(
op, inner_scope, value_2_var_name, var_name_2_id, variable_2_var_name);
VLOG(6) << "finish process inputs outputs index";
Expand All @@ -169,10 +172,16 @@ LegacyKernelInstruction::~LegacyKernelInstruction() {
if (kernel_context_ != nullptr) {
delete kernel_context_;
}

if (phi_kernel_ != nullptr) {
delete phi_kernel_;
}
}

void LegacyKernelInstruction::Run() {
infer_meta_interface_->infer_meta_(&(infer_meta_context_));
if (infer_meta_interface_) {
infer_meta_interface_->infer_meta_(&(infer_meta_context_));
}
VLOG(6) << "Run op " << legacy_op_name_ << " infer meta.";
(*(phi_kernel_))((kernel_context_));
VLOG(6) << "Run op " << legacy_op_name_ << " kernel.";
Expand Down
41 changes: 11 additions & 30 deletions paddle/fluid/framework/new_executor/interpreter/interpreter_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1076,36 +1076,17 @@ void BuildOpFuncList(
"not found kernel for [%s]",
kernel_name);

if (kernel_name == "fused_softmax_mask_upper_triangle" ||
kernel_name == "fused_softmax_mask_upper_triangle_grad") {
// builder operator
op_func_node.operator_base_ =
ir::BuildOperatorBase(op, value_2_name_map, op_yaml_info_parser);
paddle::framework::VariableValueMap in_map;
paddle::framework::VariableValueMap out_map;
op_func_node.runtime_ctx_ =
std::make_shared<paddle::framework::RuntimeContext>(
paddle::framework::RuntimeContext(in_map, out_map));
ir::BuildRuntimeContext(op,
value_2_name_map,
scope,
local_scope,
op_yaml_info_parser,
op_func_node.runtime_ctx_.get());
op_func_node.fluid_op = true;
} else {
::ir::BuildPhiContext<phi::KernelContext,
const phi::TensorBase*,
phi::TensorBase*,
paddle::small_vector<const phi::TensorBase*>,
paddle::small_vector<phi::TensorBase*>,
true>(op,
value_2_name_map,
scope,
local_scope,
op_yaml_info_parser,
&(op_func_node.kernel_context_));
}
::ir::BuildPhiContext<phi::KernelContext,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

随着执行器接入的完善,BuildOpFuncList函数后续应该没有什么用处了。TODO:后续随执行器代码清理 pr 一并清理

const phi::TensorBase*,
phi::TensorBase*,
paddle::small_vector<const phi::TensorBase*>,
paddle::small_vector<phi::TensorBase*>,
true>(op,
value_2_name_map,
scope,
local_scope,
op_yaml_info_parser,
&(op_func_node.kernel_context_));

VLOG(6) << "finish process kernel context";
op_func_node.kernel_context_.SetDeviceContext(
Expand Down
4 changes: 2 additions & 2 deletions paddle/fluid/framework/new_executor/new_ir_interpreter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@

#include "paddle/fluid/framework/new_executor/instruction/legacy_kernel_instruction.h"
#include "paddle/fluid/framework/new_executor/instruction/phi_kernel_instruction.h"
#include "paddle/fluid/ir/dialect/utils.h"
#include "paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h"
#include "paddle/ir/core/builtin_attribute.h"

Expand Down Expand Up @@ -439,8 +440,7 @@ void NewIRInterpreter::BuildInstruction() {
}
VLOG(6) << "process " << op_name;

if (op_name == "pd.fused_softmax_mask_upper_triangle" ||
op_name == "pd.fused_softmax_mask_upper_triangle_grad") {
if (dialect::IsLegacyOp(op_name)) {
vec_instruction_base_.emplace_back(
std::make_unique<LegacyKernelInstruction>(op_idx++,
place_,
Expand Down
2 changes: 1 addition & 1 deletion paddle/fluid/ir/dialect/pd_op.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@
- {typename: bool, name: load_as_fp16}
- {typename: bool, name: model_from_memory}
outputs:
- {typename: 'Tensor[]', name: out, optional: true, intermediate: false}
- {typename: 'Tensor[]', name: Out, optional: true, intermediate: false}
no_need_buffer: null
data_transform: null
kernel:
Expand Down
7 changes: 7 additions & 0 deletions paddle/fluid/ir/dialect/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,11 @@
namespace paddle {
namespace dialect {

const std::unordered_set<std::string> LegacyOpList = {
"pd.fused_softmax_mask_upper_triangle",
"pd.fused_softmax_mask_upper_triangle_grad",
"pd.load_combine"};

enum class AttrType {
UNDEFINED = 0,
BOOL,
Expand Down Expand Up @@ -167,5 +172,7 @@ VariantType GetAttributeData(const ir::Attribute& attr) {
return kAttrCastMap[attr_type](attr);
}

bool IsLegacyOp(const std::string& name) { return LegacyOpList.count(name); }

} // namespace dialect
} // namespace paddle
2 changes: 2 additions & 0 deletions paddle/fluid/ir/dialect/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -147,5 +147,7 @@ static inline ir::Attribute TransToIrAttribute(phi::Scalar scalar,

VariantType GetAttributeData(const ir::Attribute& attr);

bool IsLegacyOp(const std::string& name);

} // namespace dialect
} // namespace paddle
76 changes: 70 additions & 6 deletions paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -598,17 +598,39 @@ void BuildRuntimeContext(
PADDLE_ENFORCE_NOT_NULL(inner_scope->FindVar(in_var_name),
phi::errors::PreconditionNotMet(
"can not find var[%s] in scope", in_var_name));

auto var = inner_scope->FindVar(in_var_name);
std::vector<paddle::framework::Variable*> vec_tmp = {var};
auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
runtime_ctx->outputs[legacy_attr_name] = vec_tmp;

auto type = ptr.type();
auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
if (type.isa<paddle::dialect::AllocatedDenseTensorType>() ||
type.isa<paddle::dialect::AllocatedSelectedRowsType>()) {
std::vector<paddle::framework::Variable*> vec_tmp = {var};

runtime_ctx->outputs[legacy_arg_name] = vec_tmp;
Comment on lines +608 to +610
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
std::vector<paddle::framework::Variable*> vec_tmp = {var};
runtime_ctx->outputs[legacy_arg_name] = vec_tmp;
runtime_ctx->outputs[legacy_arg_name] = {var};

} else if (type.isa<ir::VectorType>()) {
auto var_ref = var->Get<paddle::framework::VariableRefArray>();
std::vector<paddle::framework::Variable*> vec_tmp;
vec_tmp.reserve(var_ref.size());
for (size_t k = 0; k < var_ref.size(); ++k) {
vec_tmp.push_back(const_cast<paddle::framework::Variable*>(var_ref[k]));
}
runtime_ctx->outputs[legacy_arg_name] = vec_tmp;
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"only support AllocatedDenseTensor, AllocatedSelectedRowsType and "
"ir::vector type"));
}
}
}

std::shared_ptr<paddle::framework::OperatorBase> BuildOperatorBase(
ir::Operation* op,
const std::unordered_map<ir::Value, std::string>& name_map,
const paddle::dialect::OpYamlInfoParser& op_yaml_info) {
const paddle::dialect::OpYamlInfoParser& op_yaml_info,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name,
const paddle::framework::Scope* scope) {
paddle::framework::VariableNameMap in_name_map;
paddle::framework::VariableNameMap out_name_map;
paddle::framework::AttributeMap attr_map;
Expand Down Expand Up @@ -637,15 +659,57 @@ std::shared_ptr<paddle::framework::OperatorBase> BuildOperatorBase(
}

// build attribute
auto& op_attr_map = op->attributes();
auto attr_name_list = op_yaml_info.AttrParams(true);
for (auto& name : attr_name_list) {
auto& val = op_attr_map.at(name);

if (val.isa<ir::StrAttribute>()) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里的Attribute解析数据的逻辑在paddle/fluid/ir/dialect/utils.h 已经有GetAttributeData 函数实现了,可以看下是否能够复用?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

我确认了下 GetAttributeData 返回的是 phi的attribute, 这里需要的是framework::attribute, 定义还是有些差别,不能直接用同一个

attr_map[name] = val.dyn_cast<ir::StrAttribute>().AsString();
} else if (val.isa<ir::Int32Attribute>()) {
attr_map[name] = val.dyn_cast<ir::Int32Attribute>().data();
} else if (val.isa<ir::BoolAttribute>()) {
attr_map[name] = val.dyn_cast<ir::BoolAttribute>().data();
} else if (val.isa<ir::FloatAttribute>()) {
attr_map[name] = val.dyn_cast<ir::FloatAttribute>().data();
} else if (val.isa<ir::DoubleAttribute>()) {
attr_map[name] = val.dyn_cast<ir::DoubleAttribute>().data();
} else if (val.isa<ir::Int64Attribute>()) {
attr_map[name] = val.dyn_cast<ir::Int64Attribute>().data();
} else {
std::stringstream ss;
val.Print(ss);
VLOG(1) << "type not support " << ss.str() << std::endl;
PADDLE_THROW("Type[%s] in attribute map not support yet", ss.str());
}
}

auto& output_name_list = op_yaml_info.OutputNames();
for (size_t i = 0; i < output_name_list.size(); ++i) {
auto name = output_name_list[i];
ir::Value ptr = op->result(i);

auto out_var_name = name_map.at(ptr);
auto legacy_attr_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
out_name_map[legacy_attr_name].push_back(out_var_name);

auto type = ptr.type();
auto legacy_arg_name = op_normalizer.GetLegacyArgName(fluid_op_name, name);
if (type.isa<paddle::dialect::AllocatedDenseTensorType>() ||
type.isa<paddle::dialect::AllocatedSelectedRowsType>()) {
out_name_map[legacy_arg_name].push_back(out_var_name);
} else if (type.isa<ir::VectorType>()) {
auto var = scope->FindVar(out_var_name);
auto var_ref = var->Get<paddle::framework::VariableRefArray>();
for (size_t k = 0; k < var_ref.size(); ++k) {
PADDLE_ENFORCE(variable_2_var_name.count(var_ref[k]),
"Variable MUST in variable_2_var_name map");
out_name_map[legacy_arg_name].push_back(
variable_2_var_name.at(var_ref[k]));
}
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"only support AllocatedDenseTensor, AllocatedSelectedRowsType and "
"ir::vector type"));
}
}

auto& op_info = paddle::framework::OpInfoMap::Instance().Get(fluid_op_name);
Expand Down
5 changes: 4 additions & 1 deletion paddle/fluid/ir/phi_kernel_adaptor/phi_kernel_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ void BuildRuntimeContext(
std::shared_ptr<paddle::framework::OperatorBase> BuildOperatorBase(
ir::Operation* op,
const std::unordered_map<ir::Value, std::string>& name_map,
const paddle::dialect::OpYamlInfoParser& op_yaml_info);
const paddle::dialect::OpYamlInfoParser& op_yaml_info,
const std::unordered_map<const paddle::framework::Variable*, std::string>&
variable_2_var_name,
const paddle::framework::Scope* scope);

template <typename Context,
typename InType,
Expand Down
11 changes: 5 additions & 6 deletions paddle/fluid/ir/transforms/pd_op_to_kernel_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,6 @@ const std::unordered_set<std::string> UnchangeOutputOps = {
"builtin.get_parameter",
"pd.shadow_output"};

const std::unordered_set<std::string> LegacyOpList = {
"pd.fused_softmax_mask_upper_triangle",
"pd.fused_softmax_mask_upper_triangle_grad"};

bool NeedFallBackCpu(const ir::Operation* op,
const std::string& kernel_fn_name,
const phi::KernelKey& kernel_key) {
Expand Down Expand Up @@ -553,6 +549,9 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
GetKernelKey(op_item, place, map_value_pair, op_info_parser.get());
VLOG(6) << "kernel type " << kernel_key;

if (op_item->name() == "pd.load_combine") {
kernel_key.set_dtype(phi::DataType::FLOAT32);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

这里为什么直接设置 kernel_key 为FP32了?我看load_combile 是支持很多其他数据类型的?是先验证了FP32,还是后面逻辑会自适应的修改?

若是前者,这里是否需要加一下 TODO 标记下?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

load_combine 是一个没有输入的op,无法从输入的参数表里面推导输出的类型,但是kernel key不是有一个类型

}
if (NeedFallBackCpu((op_item), kernel_fn_str, kernel_key)) {
kernel_key.set_backend(phi::Backend::CPU);
}
Expand All @@ -571,7 +570,7 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
auto args_def = phi_kernel.args_def();
auto output_defs = args_def.output_defs();
if (!UnchangeOutputOps.count(op_item->name()) &&
!LegacyOpList.count(op_item->name())) {
!IsLegacyOp(op_item->name())) {
PADDLE_ENFORCE_EQ(
op_item->num_results(),
output_defs.size(),
Expand All @@ -583,7 +582,7 @@ std::unique_ptr<ir::Program> PdOpLowerToKernelPass(ir::Program* prog,
for (size_t i = 0; i < op_item->num_results(); ++i) {
phi::Place out_place;
if ((!UnchangeOutputOps.count(op_item->name())) &&
(!LegacyOpList.count(op_item->name())) && phi_kernel.IsValid()) {
(!IsLegacyOp(op_item->name())) && phi_kernel.IsValid()) {
out_place = phi::TransToPhiPlace(output_defs[i].backend);
} else {
out_place = phi::TransToPhiPlace(kernel_key.backend());
Expand Down
Loading