@@ -1041,6 +1041,322 @@ TEST(Loop, IterationCountAsOutput) {
10411041 test.Run (OpTester::ExpectResult::kExpectSuccess , " " , {kTensorrtExecutionProvider , kOpenVINOExecutionProvider });
10421042}
10431043
1044+ // Verify that Loop correctly handles tensor(string) scan outputs.
1045+ // Strings are not trivially copyable so the concatenation path must use proper copy semantics.
1046+ // Uses strings exceeding the small-string-optimization threshold to exercise heap-allocated payloads.
1047+ TEST (Loop, StringScanOutput) {
1048+ auto create_subgraph = []() {
1049+ Model model (" String scan output subgraph" , false , DefaultLoggingManager ().DefaultLogger ());
1050+ auto & graph = model.MainGraph ();
1051+
1052+ std::vector<NodeArg*> inputs;
1053+ std::vector<NodeArg*> outputs;
1054+
1055+ /* Subgraph produces a constant string tensor as a scan output each iteration.
1056+
1057+ iter_num_in cond_in
1058+ (unused) |
1059+ [Identity]
1060+ |
1061+ cond_out
1062+
1063+ [Constant] -> scan_output (string tensor, shape {1})
1064+ */
1065+
1066+ // graph input types
1067+ TypeProto int64_scalar;
1068+ int64_scalar.mutable_tensor_type ()->set_elem_type (TensorProto_DataType_INT64);
1069+ int64_scalar.mutable_tensor_type ()->mutable_shape ()->add_dim ()->set_dim_value (1 );
1070+
1071+ TypeProto bool_scalar;
1072+ bool_scalar.mutable_tensor_type ()->set_elem_type (TensorProto_DataType_BOOL);
1073+ bool_scalar.mutable_tensor_type ()->mutable_shape ()->add_dim ()->set_dim_value (1 );
1074+
1075+ TypeProto string_tensor;
1076+ string_tensor.mutable_tensor_type ()->set_elem_type (TensorProto_DataType_STRING);
1077+ string_tensor.mutable_tensor_type ()->mutable_shape ()->add_dim ()->set_dim_value (1 );
1078+
1079+ // graph inputs
1080+ auto & iter_num_in = graph.GetOrCreateNodeArg (" iter_num_in" , &int64_scalar);
1081+ auto & cond_in = graph.GetOrCreateNodeArg (" cond_in" , &bool_scalar);
1082+
1083+ // graph outputs
1084+ auto & cond_out = graph.GetOrCreateNodeArg (" cond_out" , &bool_scalar);
1085+ auto & scan_out = graph.GetOrCreateNodeArg (" scan_out" , &string_tensor);
1086+
1087+ // cond_in -> cond_out
1088+ {
1089+ inputs = {&cond_in};
1090+ outputs = {&cond_out};
1091+ graph.AddNode (" cond_identity" , " Identity" , " Forward cond" , inputs, outputs);
1092+ }
1093+
1094+ // Constant -> scan_out (string long enough to exceed SSO)
1095+ {
1096+ TensorProto value_tensor;
1097+ value_tensor.set_name (" string_const" );
1098+ value_tensor.add_dims (1 );
1099+ value_tensor.set_data_type (TensorProto_DataType_STRING);
1100+ // Use a string longer than typical SSO buffer (>22 chars) to ensure heap allocation
1101+ value_tensor.add_string_data (" this_string_exceeds_sso_threshold_and_uses_heap_allocation" );
1102+
1103+ auto & constant_node = graph.AddNode (" string_constant" , " Constant" , " String constant" ,
1104+ {}, {&scan_out});
1105+ constant_node.AddAttribute (" value" , value_tensor);
1106+ }
1107+
1108+ graph.SetInputs ({&iter_num_in, &cond_in});
1109+ graph.SetOutputs ({&cond_out, &scan_out});
1110+
1111+ auto status = graph.Resolve ();
1112+ EXPECT_EQ (status, Status::OK ());
1113+
1114+ return graph.ToGraphProto ();
1115+ };
1116+
1117+ OpTester test (" Loop" , 11 );
1118+ auto body = create_subgraph ();
1119+ test.AddAttribute <GraphProto>(" body" , body);
1120+ test.AddInput <int64_t >(" M" , {1 }, {3 });
1121+ test.AddInput <bool >(" cond" , {1 }, {true });
1122+
1123+ // scan output: 3 iterations, each producing a {1} string tensor -> final shape {3, 1}
1124+ test.AddOutput <std::string>(" scan_out_final" , {3 , 1 },
1125+ {" this_string_exceeds_sso_threshold_and_uses_heap_allocation" ,
1126+ " this_string_exceeds_sso_threshold_and_uses_heap_allocation" ,
1127+ " this_string_exceeds_sso_threshold_and_uses_heap_allocation" });
1128+
1129+ // Only CPU EP supports string tensors.
1130+ std::vector<std::unique_ptr<IExecutionProvider>> eps;
1131+ eps.push_back (DefaultCpuExecutionProvider ());
1132+ test.Run (OpTester::ExpectResult::kExpectSuccess , " " , {}, nullptr , &eps);
1133+ }
1134+
1135+ // Verify multi-element string scan output (shape {2} per iteration).
1136+ TEST (Loop, StringScanOutputMultiElement) {
1137+ auto create_subgraph = []() {
1138+ Model model (" Multi-element string scan output" , false , DefaultLoggingManager ().DefaultLogger ());
1139+ auto & graph = model.MainGraph ();
1140+
1141+ std::vector<NodeArg*> inputs;
1142+ std::vector<NodeArg*> outputs;
1143+
1144+ // graph input types
1145+ TypeProto int64_scalar;
1146+ int64_scalar.mutable_tensor_type ()->set_elem_type (TensorProto_DataType_INT64);
1147+ int64_scalar.mutable_tensor_type ()->mutable_shape ()->add_dim ()->set_dim_value (1 );
1148+
1149+ TypeProto bool_scalar;
1150+ bool_scalar.mutable_tensor_type ()->set_elem_type (TensorProto_DataType_BOOL);
1151+ bool_scalar.mutable_tensor_type ()->mutable_shape ()->add_dim ()->set_dim_value (1 );
1152+
1153+ TypeProto string_tensor;
1154+ string_tensor.mutable_tensor_type ()->set_elem_type (TensorProto_DataType_STRING);
1155+ string_tensor.mutable_tensor_type ()->mutable_shape ()->add_dim ()->set_dim_value (2 );
1156+
1157+ // graph inputs
1158+ auto & iter_num_in = graph.GetOrCreateNodeArg (" iter_num_in" , &int64_scalar);
1159+ auto & cond_in = graph.GetOrCreateNodeArg (" cond_in" , &bool_scalar);
1160+
1161+ // graph outputs
1162+ auto & cond_out = graph.GetOrCreateNodeArg (" cond_out" , &bool_scalar);
1163+ auto & scan_out = graph.GetOrCreateNodeArg (" scan_out" , &string_tensor);
1164+
1165+ // cond_in -> cond_out
1166+ {
1167+ inputs = {&cond_in};
1168+ outputs = {&cond_out};
1169+ graph.AddNode (" cond_identity" , " Identity" , " Forward cond" , inputs, outputs);
1170+ }
1171+
1172+ // Constant -> scan_out with 2 elements
1173+ {
1174+ TensorProto value_tensor;
1175+ value_tensor.set_name (" string_const" );
1176+ value_tensor.add_dims (2 );
1177+ value_tensor.set_data_type (TensorProto_DataType_STRING);
1178+ value_tensor.add_string_data (" first_heap_allocated_string_that_exceeds_sso_buffer_size" );
1179+ value_tensor.add_string_data (" second_heap_allocated_string_that_exceeds_sso_buffer_size" );
1180+
1181+ auto & constant_node = graph.AddNode (" string_constant" , " Constant" , " String constant" ,
1182+ {}, {&scan_out});
1183+ constant_node.AddAttribute (" value" , value_tensor);
1184+ }
1185+
1186+ graph.SetInputs ({&iter_num_in, &cond_in});
1187+ graph.SetOutputs ({&cond_out, &scan_out});
1188+
1189+ auto status = graph.Resolve ();
1190+ EXPECT_EQ (status, Status::OK ());
1191+
1192+ return graph.ToGraphProto ();
1193+ };
1194+
1195+ OpTester test (" Loop" , 11 );
1196+ auto body = create_subgraph ();
1197+ test.AddAttribute <GraphProto>(" body" , body);
1198+ test.AddInput <int64_t >(" M" , {1 }, {2 });
1199+ test.AddInput <bool >(" cond" , {1 }, {true });
1200+
1201+ // scan output: 2 iterations x {2} elements -> {2, 2}
1202+ test.AddOutput <std::string>(" scan_out_final" , {2 , 2 },
1203+ {" first_heap_allocated_string_that_exceeds_sso_buffer_size" ,
1204+ " second_heap_allocated_string_that_exceeds_sso_buffer_size" ,
1205+ " first_heap_allocated_string_that_exceeds_sso_buffer_size" ,
1206+ " second_heap_allocated_string_that_exceeds_sso_buffer_size" });
1207+
1208+ // Only CPU EP supports string tensors.
1209+ std::vector<std::unique_ptr<IExecutionProvider>> eps;
1210+ eps.push_back (DefaultCpuExecutionProvider ());
1211+ test.Run (OpTester::ExpectResult::kExpectSuccess , " " , {}, nullptr , &eps);
1212+ }
1213+
1214+ // Verify Loop with a string loop-carried variable (uses IDataTransfer::CopyTensor path).
1215+ TEST (Loop, StringLoopCarriedVar) {
1216+ auto create_subgraph = []() {
1217+ Model model (" String loop-carried var subgraph" , false , DefaultLoggingManager ().DefaultLogger ());
1218+ auto & graph = model.MainGraph ();
1219+
1220+ std::vector<NodeArg*> inputs;
1221+ std::vector<NodeArg*> outputs;
1222+
1223+ /* Subgraph passes through a string loop-carried variable unchanged.
1224+
1225+ iter_num_in cond_in loop_var_in (string)
1226+ (unused) | |
1227+ [Identity] [Identity]
1228+ | |
1229+ cond_out loop_var_out
1230+ */
1231+
1232+ TypeProto int64_scalar;
1233+ int64_scalar.mutable_tensor_type ()->set_elem_type (TensorProto_DataType_INT64);
1234+ int64_scalar.mutable_tensor_type ()->mutable_shape ()->add_dim ()->set_dim_value (1 );
1235+
1236+ TypeProto bool_scalar;
1237+ bool_scalar.mutable_tensor_type ()->set_elem_type (TensorProto_DataType_BOOL);
1238+ bool_scalar.mutable_tensor_type ()->mutable_shape ()->add_dim ()->set_dim_value (1 );
1239+
1240+ TypeProto string_tensor;
1241+ string_tensor.mutable_tensor_type ()->set_elem_type (TensorProto_DataType_STRING);
1242+ string_tensor.mutable_tensor_type ()->mutable_shape ()->add_dim ()->set_dim_value (1 );
1243+
1244+ auto & iter_num_in = graph.GetOrCreateNodeArg (" iter_num_in" , &int64_scalar);
1245+ auto & cond_in = graph.GetOrCreateNodeArg (" cond_in" , &bool_scalar);
1246+ auto & loop_var_in = graph.GetOrCreateNodeArg (" loop_var_in" , &string_tensor);
1247+
1248+ auto & cond_out = graph.GetOrCreateNodeArg (" cond_out" , &bool_scalar);
1249+ auto & loop_var_out = graph.GetOrCreateNodeArg (" loop_var_out" , &string_tensor);
1250+
1251+ // cond_in -> cond_out
1252+ {
1253+ inputs = {&cond_in};
1254+ outputs = {&cond_out};
1255+ graph.AddNode (" cond_identity" , " Identity" , " Forward cond" , inputs, outputs);
1256+ }
1257+
1258+ // loop_var_in -> loop_var_out
1259+ {
1260+ inputs = {&loop_var_in};
1261+ outputs = {&loop_var_out};
1262+ graph.AddNode (" var_identity" , " Identity" , " Forward loop var" , inputs, outputs);
1263+ }
1264+
1265+ graph.SetInputs ({&iter_num_in, &cond_in, &loop_var_in});
1266+ graph.SetOutputs ({&cond_out, &loop_var_out});
1267+
1268+ auto status = graph.Resolve ();
1269+ EXPECT_EQ (status, Status::OK ());
1270+
1271+ return graph.ToGraphProto ();
1272+ };
1273+
1274+ OpTester test (" Loop" , 11 );
1275+ auto body = create_subgraph ();
1276+ test.AddAttribute <GraphProto>(" body" , body);
1277+ test.AddInput <int64_t >(" M" , {1 }, {3 });
1278+ test.AddInput <bool >(" cond" , {1 }, {true });
1279+ test.AddInput <std::string>(" loop_var_init" , {1 },
1280+ {" a_long_string_value_that_definitely_exceeds_the_sso_threshold" });
1281+
1282+ test.AddOutput <std::string>(" loop_var_final" , {1 },
1283+ {" a_long_string_value_that_definitely_exceeds_the_sso_threshold" });
1284+
1285+ // Only CPU EP supports string tensors.
1286+ std::vector<std::unique_ptr<IExecutionProvider>> eps;
1287+ eps.push_back (DefaultCpuExecutionProvider ());
1288+ test.Run (OpTester::ExpectResult::kExpectSuccess , " " , {}, nullptr , &eps);
1289+ }
1290+
1291+ // Verify Loop with zero trip count produces empty scan output for strings.
1292+ TEST (Loop, StringScanOutputZeroIterations) {
1293+ auto create_subgraph = []() {
1294+ Model model (" String scan output zero iter" , false , DefaultLoggingManager ().DefaultLogger ());
1295+ auto & graph = model.MainGraph ();
1296+
1297+ std::vector<NodeArg*> inputs;
1298+ std::vector<NodeArg*> outputs;
1299+
1300+ TypeProto int64_scalar;
1301+ int64_scalar.mutable_tensor_type ()->set_elem_type (TensorProto_DataType_INT64);
1302+ int64_scalar.mutable_tensor_type ()->mutable_shape ()->add_dim ()->set_dim_value (1 );
1303+
1304+ TypeProto bool_scalar;
1305+ bool_scalar.mutable_tensor_type ()->set_elem_type (TensorProto_DataType_BOOL);
1306+ bool_scalar.mutable_tensor_type ()->mutable_shape ()->add_dim ()->set_dim_value (1 );
1307+
1308+ TypeProto string_tensor;
1309+ string_tensor.mutable_tensor_type ()->set_elem_type (TensorProto_DataType_STRING);
1310+ string_tensor.mutable_tensor_type ()->mutable_shape ()->add_dim ()->set_dim_value (1 );
1311+
1312+ auto & iter_num_in = graph.GetOrCreateNodeArg (" iter_num_in" , &int64_scalar);
1313+ auto & cond_in = graph.GetOrCreateNodeArg (" cond_in" , &bool_scalar);
1314+
1315+ auto & cond_out = graph.GetOrCreateNodeArg (" cond_out" , &bool_scalar);
1316+ auto & scan_out = graph.GetOrCreateNodeArg (" scan_out" , &string_tensor);
1317+
1318+ {
1319+ inputs = {&cond_in};
1320+ outputs = {&cond_out};
1321+ graph.AddNode (" cond_identity" , " Identity" , " Forward cond" , inputs, outputs);
1322+ }
1323+
1324+ {
1325+ TensorProto value_tensor;
1326+ value_tensor.set_name (" string_const" );
1327+ value_tensor.add_dims (1 );
1328+ value_tensor.set_data_type (TensorProto_DataType_STRING);
1329+ value_tensor.add_string_data (" never_produced_because_zero_iterations" );
1330+
1331+ auto & constant_node = graph.AddNode (" string_constant" , " Constant" , " String constant" ,
1332+ {}, {&scan_out});
1333+ constant_node.AddAttribute (" value" , value_tensor);
1334+ }
1335+
1336+ graph.SetInputs ({&iter_num_in, &cond_in});
1337+ graph.SetOutputs ({&cond_out, &scan_out});
1338+
1339+ auto status = graph.Resolve ();
1340+ EXPECT_EQ (status, Status::OK ());
1341+
1342+ return graph.ToGraphProto ();
1343+ };
1344+
1345+ OpTester test (" Loop" , 11 );
1346+ auto body = create_subgraph ();
1347+ test.AddAttribute <GraphProto>(" body" , body);
1348+ test.AddInput <int64_t >(" M" , {1 }, {0 });
1349+ test.AddInput <bool >(" cond" , {1 }, {true });
1350+
1351+ // Zero iterations -> scan output shape {0, 1} with no elements
1352+ test.AddOutput <std::string>(" scan_out_final" , {0 , 1 }, {});
1353+
1354+ // Only CPU EP supports string tensors.
1355+ std::vector<std::unique_ptr<IExecutionProvider>> eps;
1356+ eps.push_back (DefaultCpuExecutionProvider ());
1357+ test.Run (OpTester::ExpectResult::kExpectSuccess , " " , {}, nullptr , &eps);
1358+ }
1359+
10441360#if defined(USE_CUDA)
10451361// test that when part of the subgraph run on CUDA it executes successfully
10461362TEST (Loop, MixedExecutionProviders) {
0 commit comments