I have tried what was suggested by moving the innermost or lowest data element, in this case the element called 'FIELD", to the top of my output and mapping it first which resulted in all 400,000 rows getting the same parent elements assigned.
Take the following data sample
Code: Select all
<BULK_DATA_EXPORTS>
<BULK_DATA_EXPORT>
<BULK_DATA_EXPORT_ID>548499</BULK_DATA_EXPORT_ID>
<PATIENT>
<PATIENT_NAME>BOSS HOGG</PATIENT_NAME>
<PATIENT_AGENCY_CODE>1</PATIENT_AGENCY_CODE>
<PATIENT_CODE>40</PATIENT_CODE>
<PATIENT_INSURANCE_1>MEDIC</PATIENT_INSURANCE_1>
<PATIENT_INSURANCE_2></PATIENT_INSURANCE_2>
<PATIENT_INSURANCE_3></PATIENT_INSURANCE_3>
<PATIENT_INSURANCE_4></PATIENT_INSURANCE_4>
<PATIENT_INSURANCE_5></PATIENT_INSURANCE_5>
</PATIENT>
<BULK_DATA_FIELDS>
<FORM_NAME>OTHER Nurse Start of Care</FORM_NAME>
<FORM_CODE>99999SOC</FORM_CODE>
<FORM_DATE>20120615</FORM_DATE>
<FIELD NAME='M1020cVE'></FIELD>
<FIELD NAME='M1020c'>DMII WO CMP NT ST UNCNTR</FIELD>
<FIELD NAME='M1030_THH_ENT_NUTRITION'></FIELD>
<FIELD NAME='M1020b'>OTHER POSTOP INFECTION</FIELD>
<FIELD NAME='M1020e'>HYPERTENSION NOS</FIELD>
<FIELD NAME='M1020d'>PARAPLEGIA NOS</FIELD>
<FIELD NAME='M1020g'>ATTEN TO CYSTOSTOMY</FIELD>
<FIELD NAME='M1020iOE'></FIELD>
<FIELD NAME='M1020f'>ANXIETY STATE NOS</FIELD>
<FIELD NAME='M1020i'></FIELD>
<FIELD NAME='M1020h'>LONG-TERM (CURRENT) USE OF ASP</FIELD>
<FIELD NAME='M0150_CPAY_MCAID_HMO'></FIELD>
<FIELD NAME='M1020jOE'></FIELD>
<FIELD NAME='M0032_ROC_DT'></FIELD>
<FIELD NAME='M1023_OTH_DIAG1_SEVERITY'></FIELD>
<FIELD NAME='M1021gDate'></FIELD>
<FIELD NAME='M1011_14_DAY_INP5_EICD'></FIELD>
<FIELD NAME='M1020a'>Cellulitis and abscess of butt</FIELD>
<FIELD NAME='M1025_OPT_DIAG_ICD_F3'></FIELD>
<FIELD NAME='M1023_OTH_DIAG4_SEVERITY'></FIELD>
<FIELD NAME='M1025_OPT_DIAG_ICD_F4'></FIELD>
<FIELD NAME='M0150_CPAY_MCARE_HMO'></FIELD>
<FIELD NAME='M1016_CHGREG_ICD_NA'></FIELD>
<FIELD NAME='M0040City'>DAYTON</FIELD>
<FIELD NAME='M1033_HOSP_RISK_HSTRY_FALLS'></FIELD>
<FIELD NAME='M1016_CHGREG_EICD5'></FIELD>
<FIELD NAME='M1308_NSTG_CVRG'></FIELD>
<FIELD NAME='M0032_ROC_DT_NA'>on</FIELD>
<FIELD NAME='M1016_CHGREG_ICD3'></FIELD>
<FIELD NAME='M0072UK'></FIELD>
<FIELD NAME='M1016_CHGREG_ICD4'></FIELD>
<FIELD NAME='M0150_CPAY_MCARE_FFS'>on</FIELD>
<FIELD NAME='M1016_CHGREG_ICD5'></FIELD>
<FIELD NAME='M1016_CHGREG_ICD6'></FIELD>
<FIELD NAME='M1016_CHGREG_ICD1'>998.59</FIELD>
<FIELD NAME='M1016_CHGREG_ICD2'>682.5</FIELD>
<FIELD NAME='M1010_14_DAY_INP5_ICD'></FIELD>
<FIELD NAME='M1322_NBR_PRSULC_STG1'>0</FIELD>
<FIELD NAME='M1011_14_DAY_INP_NA'></FIELD>
<FIELD NAME='M1025_OPT_DIAG_ICD_E3'></FIELD>
<FIELD NAME='M1022_OTH_DIAG9_ICD'></FIELD>
<FIELD NAME='M1018_PRIOR_UR_INCON'></FIELD>
<FIELD NAME='M1025_OPT_DIAG_ICD_E4'></FIELD>
<FIELD NAME='M1023_OTH_DIAG7_SEVERITY'></FIELD>
<FIELD NAME='M1022_OTH_DIAG10_ICD'></FIELD>
<FIELD NAME='M1000_DC_NONE_14_DA'>on</FIELD>
<FIELD NAME='M1024_PMT_DIAG_ICD_L4'></FIELD>
<FIELD NAME='M1022_OTH_DIAG12_ICD'></FIELD>
<FIELD NAME='M1410_RESPTX_OXYGEN'></FIELD>
<FIELD NAME='M1024_PMT_DIAG_ICD_L3'></FIELD>
<FIELD NAME='M1025_OPT_DIAG_ICD_D3'></FIELD>
<FIELD NAME='M1021bVE2'></FIELD>
<FIELD NAME='M1025_OPT_DIAG_ICD_D4'></FIELD>
<FIELD NAME='M1016_CHGREG_CICD3'></FIELD>
<FIELD NAME='M1023_OTH_DIAG8_SEVERITY'></FIELD>
<FIELD NAME='M1020fDate'>01/01/2012</FIELD>
<FIELD NAME='M0069_PAT_GENDER'></FIELD>
<FIELD NAME='M1610_UR_INCONT'>2</FIELD>
</BULK_DATA_FIELDS>
</BULK_DATA_EXPORT>
</BULK_DATA_EXPORTS>
Code: Select all
<?xml version="1.0" encoding="UTF-8"?>
<xs:schema attributeFormDefault="unqualified" elementFormDefault="qualified" xmlns:xs="http://www.w3.org/2001/XMLSchema">
<xs:element name="BULK_DATA_EXPORTS">
<xs:complexType>
<xs:sequence>
<xs:element name="BULK_DATA_EXPORT">
<xs:complexType>
<xs:sequence>
<xs:element name="BULK_DATA_TYPE" type="xs:string"></xs:element>
<xs:element name="BULK_DATA_EXPORT_ID" type="xs:int"></xs:element>
<xs:element name="PATIENT">
<xs:complexType>
<xs:sequence>
<xs:element name="PATIENT_NAME" type="xs:string"></xs:element>
<xs:element name="PATIENT_AGENCY_CODE" type="xs:int"></xs:element>
<xs:element name="PATIENT_CODE" type="xs:int"></xs:element>
<xs:element name="PATIENT_INSURANCE_1" type="xs:string"></xs:element>
<xs:element name="PATIENT_INSURANCE_2"></xs:element>
<xs:element name="PATIENT_INSURANCE_3"></xs:element>
<xs:element name="PATIENT_INSURANCE_4"></xs:element>
<xs:element name="PATIENT_INSURANCE_5"></xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
<xs:element name="BULK_DATA_FIELDS">
<xs:complexType>
<xs:sequence>
<xs:element name="FORM_NAME" type="xs:string"></xs:element>
<xs:element name="FORM_CODE" type="xs:string"></xs:element>
<xs:element name="FORM_DATE" type="xs:int"></xs:element>
<xs:element name="FIELD" maxOccurs="unbounded">
<xs:complexType>
<xs:attribute name="NAME" type="xs:string"></xs:attribute>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:sequence>
</xs:complexType>
</xs:element>
</xs:schema>
<FIELD NAME='M1016_CHGREG_ICD1'>998.59</FIELD>
So far I've found 14 distinct values for FORM_NAME with each of those having a different number of 'FIELD' elements.
How can I correctly parse these varying number of 'FIELD' elements into columns for each row. My file is around 380,000 rows which should result in 1000 output rows.
Any help is greatly appreciated.