Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion scripts/fix_header_guards.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def get_file_guard(path):
if path.startswith("src/gpu/intel/gemm/jit/"):
base = os.path.basename(path)
if path != "src/gpu/intel/gemm/jit/" + base:
path = "src/gemmstone_guard/" + os.path.basename(path)
path = path.replace("gpu/intel/gemm/jit", "gemmstone")
elif path.startswith("src/gpu/intel/microkernels"):
path = path.replace("intel/", "")
guard = path
Expand Down
4 changes: 3 additions & 1 deletion src/gpu/intel/binary/multi_po_reorder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

#include "common/c_types_map.hpp"
#include "common/reorder.hpp"
#include "gpu/intel/binary/config.hpp"
#include "gpu/gpu_binary_pd.hpp"
#include "gpu/intel/primitive.hpp"

namespace dnnl {
Expand All @@ -28,6 +28,8 @@ namespace gpu {
namespace intel {
namespace binary {

using pd_t = gpu_binary_pd_t;

struct multi_po_reorder_t : public primitive_t {
using primitive_t::primitive_t;
struct pd_t : public binary::pd_t {
Expand Down
17 changes: 17 additions & 0 deletions src/gpu/intel/block_structure.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,23 @@ block_layout_t::block_layout_t(
if (do_normalize) *this = normalized();
}

block_layout_t get_inner_layout(const memory_desc_wrapper &md) {
block_layout_t inner_layout(md, /* inner_only */ true);

block_layout_t ret;
// Explicitly initialize to size-1 blocks
for (int d = 0; d < MAX_NDIMS; d++) {
ret.append(block_t(d, 1, 0));
}

// Overwrite inner blocks with their actual values
for (const auto &block : inner_layout) {
ret[block.dim_idx] = block;
}

return ret;
}

} // namespace intel
} // namespace gpu
} // namespace impl
Expand Down
2 changes: 2 additions & 0 deletions src/gpu/intel/block_structure.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,8 @@ struct block_layout_t {
std::vector<block_t> normalize_blocks(
const std::vector<block_t> &blocks, bool remove_size_1_blocks = true);

block_layout_t get_inner_layout(const memory_desc_wrapper &md);

} // namespace intel
} // namespace gpu
} // namespace impl
Expand Down
5 changes: 5 additions & 0 deletions src/gpu/intel/compute/device_info.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ uint64_t get_future_extensions(
return extensions;
}

ngen::HW device_info_t::ngen_hw() const {
ngen::Product p = jit::get_ngen_product(*this);
return ngen::getCore(p.family);
}

int device_info_t::stepping_id() const {
ngen::Product p = jit::get_ngen_product(*this);
return p.stepping;
Expand Down
6 changes: 6 additions & 0 deletions src/gpu/intel/compute/device_info.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@

#include "oneapi/dnnl/dnnl_config.h"

namespace ngen {
enum class Core;
using HW = Core;
} // namespace ngen

namespace dnnl {
namespace impl {
namespace gpu {
Expand Down Expand Up @@ -190,6 +195,7 @@ struct device_info_t {
bool has_native(native_ext_t ext) const { return native_extensions_ & (uint64_t)ext; }
gpu_arch_t gpu_arch() const { return gpu_arch_; }
const gpu_product_t &gpu_product() const {return gpu_product_;}
ngen::HW ngen_hw() const;
int stepping_id() const;
uint64_t native_extensions() const { return native_extensions_; }
bool is_integrated() const;
Expand Down
10 changes: 5 additions & 5 deletions src/gpu/intel/conv/jit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -365,9 +365,9 @@ class gen_t {
if (zp.get(key).is_host_scalar()) switch (key) {
case DNNL_ARG_SRC:
return (zp.has_default_values(DNNL_ARG_WEIGHTS))
? type_t::f32()
: type_t::s32();
case DNNL_ARG_DST: return type_t::f32();
? dsl::type_t::f32()
: dsl::type_t::s32();
case DNNL_ARG_DST: return dsl::type_t::f32();
default: return to_ir(zp.get(key).get_data_type());
}
} else if (key & DNNL_ARG_ATTR_SCALES) {
Expand All @@ -376,7 +376,7 @@ class gen_t {
if (sc.get(key).is_host_scalar())
return to_ir(sc.get(key).get_data_type());
}
return type_t::byte(type::attr_t::ptr);
return dsl::type_t::byte(dsl::type::attr_t::ptr);
};
const bool wei_reorder_precalc = (t.name == "wei")
&& cfg.zp_cfg().needs_src_reorder_precalc;
Expand Down Expand Up @@ -412,7 +412,7 @@ class gen_t {
auto create_zero_out_info = [&]() -> kernel_info_t & {
auto &zero_out_info
= create_kernel_info(pd, kernel_id_t::zero_out);
auto size_var = var_t::make(type_t::u32(), "size");
auto size_var = var_t::make(dsl::type_t::u32(), "size");
zero_out_info.register_immediate_arg(
size_var, into<uint32_t>(compute_size));
zero_out_info.set_nd_range(zero_out_kernel_desc_t::nd_range(
Expand Down
40 changes: 21 additions & 19 deletions src/gpu/intel/conv/jit/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ std::string prepend_groups_to_tag(const std::string &tag) {
return "a" + ret;
}

int get_default_mad_block(const type_t &type) {
int get_default_mad_block(const dsl::type_t &type) {
switch (type.size()) {
// fp4 gets upconverted to f16 for mad.
case 1: return (type.is_fp4() ? 16 : 32);
Expand All @@ -109,7 +109,7 @@ int get_default_mad_block(const type_t &type) {
return 1;
}

bool is_small(const type_t &type, dim_t elems) {
bool is_small(const dsl::type_t &type, dim_t elems) {
int block = get_default_mad_block(type);
return elems <= block / 2;
}
Expand Down Expand Up @@ -195,7 +195,7 @@ status_t problem_t::init(
isp = id * ih * iw;
osp = od * oh * ow;

hw_t hw(make_ir_hw(engine));
dsl::hw_t hw(make_ir_hw(engine));
init_transpose(hw);
CHECK(init_abc_data_types(hw));
CHECK(init_acc_data_type());
Expand Down Expand Up @@ -317,7 +317,7 @@ int pick_block(dim_t dim, int b0, int b1 = 0, int b2 = 0) {
return pick_block_impl(false, dim, b0, b1, b2);
}

int get_default_block(fma_kind_t fma, const type_t &type, dim_t elems) {
int get_default_block(fma_kind_t fma, const dsl::type_t &type, dim_t elems) {
if (is_dp_fma(fma)) {
if (is_small(type, elems)) {
int packed_dword_elems = 32 / type.bitsize();
Expand All @@ -330,7 +330,7 @@ int get_default_block(fma_kind_t fma, const type_t &type, dim_t elems) {
return get_default_mad_block(type);
}

fma_kind_t get_default_fma(const hw_t &hw, const type_t &type) {
fma_kind_t get_default_fma(const dsl::hw_t &hw, const dsl::type_t &type) {
switch (type.size()) {
case 1:
if (hw >= ngen::HW::XeHP) return fma_kind_t::dpas;
Expand All @@ -356,8 +356,8 @@ struct nc_block_t {

// Ideally, this should only depend on data type, direction, mb, c, and g to
// enable the same src/dst formats and avoid reorders between convolutions
static nc_block_t get_default_blocking(const hw_t &hw, fma_kind_t fma,
type_t type, bool is_dw, dim_t n, dim_t c, dim_t g,
static nc_block_t get_default_blocking(const dsl::hw_t &hw, fma_kind_t fma,
dsl::type_t type, bool is_dw, dim_t n, dim_t c, dim_t g,
bool is_output = false) {
// Select dst layout to align with fma kind of following conv
// for non-depthwise cases.
Expand Down Expand Up @@ -417,7 +417,7 @@ struct goi_block_t {
{1, o_block_outer_, i_block_outer_}, wei_letters, wei_idxs);
}

static goi_block_t get_default_blocking(type_t type, int vec_size,
static goi_block_t get_default_blocking(dsl::type_t type, int vec_size,
fma_kind_t fma_kind, bool is_fwd, bool is_bwd_d, dim_t g, dim_t o,
dim_t i, bool ab_transpose) {
dim_t x = o;
Expand All @@ -443,7 +443,7 @@ struct goi_block_t {
i_block, o_block_outer, i_block_outer);
}

static void get_default_blocking(type_t type, int vec_size,
static void get_default_blocking(dsl::type_t type, int vec_size,
fma_kind_t fma_kind, bool is_fwd, bool is_bwd_d, dim_t g, dim_t x,
dim_t y, int &g_block, int &x_block, int &y_block,
int &y_block_outer, bool ab_transpose = false) {
Expand Down Expand Up @@ -791,10 +791,10 @@ status_t init_tensor_layouts(
if (prb.is_bwd_w) {
if (utils::one_of(prb.wei_data_type, data_type::bf16, data_type::f16,
data_type::f8_e5m2, data_type::f8_e4m3))
wei_layout = wei_layout.with(type_t::f32());
wei_layout = wei_layout.with(dsl::type_t::f32());
if (utils::one_of(prb.bia_data_type, data_type::bf16, data_type::f16,
data_type::f8_e5m2, data_type::f8_e4m3))
bia_layout = bia_layout.with(type_t::f32());
bia_layout = bia_layout.with(dsl::type_t::f32());
}

src.set_compute_unnormalized(src_layout, src_tag);
Expand Down Expand Up @@ -875,13 +875,13 @@ status_t init_tensor_layouts(
return status::success;
}

bool hw_ok(const hw_t &hw) {
bool hw_ok(const dsl::hw_t &hw) {
if (hw < ngen::HW::XeLP) return false;
return true;
}

bool data_types_ok(
const problem_t &prb, const hw_t &hw, impl::engine_t *engine) {
const problem_t &prb, const dsl::hw_t &hw, impl::engine_t *engine) {
auto src = prb.src_data_type;
auto wei = prb.wei_data_type;
auto dst = prb.dst_data_type;
Expand Down Expand Up @@ -960,7 +960,7 @@ bool zero_points_ok(const problem_t &prb) {
return true;
}

bool post_ops_ok(const problem_t &prb, const hw_t &hw) {
bool post_ops_ok(const problem_t &prb, const dsl::hw_t &hw) {
auto *pd = prb.conv_pd;
auto *attr = prb.attr;

Expand Down Expand Up @@ -1135,7 +1135,7 @@ void init_bwd_d_optimize(config_t &cfg) {

status_t init_pd_time_cfg(const problem_t &prb, config_t &cfg,
impl::engine_t *engine, convolution_pd_t *pd, primitive_attr_t *attr) {
hw_t hw(make_ir_hw(engine));
dsl::hw_t hw(make_ir_hw(engine));

VDISPATCH_CHECK(pd, engine, hw_ok(hw), VERBOSE_UNSUPPORTED_ISA);
VDISPATCH_CHECK(
Expand All @@ -1148,7 +1148,7 @@ status_t init_pd_time_cfg(const problem_t &prb, config_t &cfg,
zero_points_config_t zp_cfg(pd);
cfg.set_zp_cfg(zp_cfg);
cfg.set_prb(prb);
cfg.set_options(kernel::options_t(hw));
cfg.set_options(dsl::kernel::options_t(hw));
cfg.maybe_override_from_env();

CHECK(init_fma_kind(cfg, pd, engine));
Expand All @@ -1165,7 +1165,7 @@ status_t init_pd_time_cfg(const problem_t &prb, config_t &cfg,
}

bool pipeline_unroll_hint(const problem_t &prb, fma_kind_t fma_kind,
const kernel::options_t &options,
const dsl::kernel::options_t &options,
bwd_d_optimize_kind_t bwd_d_optimize_kind,
bool allow_global_reduction) {
bool do_unroll = true;
Expand Down Expand Up @@ -1853,8 +1853,10 @@ void validate_config_and_plan(config_t &cfg) {
b_load_pattern = validate_blocking(
cfg, stride_layout_t::input_tensor_t::dst, b_2d);
}
auto dummy_mem(var_t::make(type_t::byte(type::attr_t::ptr), "mem"));
auto dummy_reg(var_t::make(type_t::byte(type::attr_t::ptr), "reg"));
auto dummy_mem(
var_t::make(dsl::type_t::byte(dsl::type::attr_t::ptr), "mem"));
auto dummy_reg(
var_t::make(dsl::type_t::byte(dsl::type::attr_t::ptr), "reg"));
if (!a_load_pattern.matches(
plan.x2r.a_load.create_stmt(dummy_mem, dummy_reg))) {
gpu_warning() << "Generated load for tensor A does not match "
Expand Down
4 changes: 2 additions & 2 deletions src/gpu/intel/conv/jit/config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ class config_t : public prim_config_t {

int reserved_regs() const;

const hw_t &hw() const { return options().hw(); }
const dsl::hw_t &hw() const { return options().hw(); }

bool is_ge_xe_hpc() const { return hw() >= ngen::HW::XeHPC; }

Expand Down Expand Up @@ -691,7 +691,7 @@ int slm_bufs_hint(const problem_t &prb, dim_t m_tg, dim_t n_tg,
bool do_unroll);
tensor_config_t get_tensor_config(
const config_t &cfg, const memory_desc_t *zp_src);
bool is_small(const type_t &type, dim_t elems);
bool is_small(const dsl::type_t &type, dim_t elems);
int estimate_register_count(const config_t &cfg);
int default_regs(const config_t &cfg);
void init_kernel_grid(config_t &cfg);
Expand Down
Loading
Loading