Monday, February 20, 2012

loading XML from SQL Server -- faster

I am using a text column to store XML data on a sql server table.
The data contained in these XML documents maps to a few different
schemas but they share a subset of tags which I use to query with
sp_xml_preparedocument and openxml.
The problem I'm having is that this process is very slow: for ~2500
xml documents, it takes 12 seconds to run.
Is there a way to query these XML documents faster using SQL Server?
Or I will have to reduce my working set of records?
Here is the code I've written:
CREATE PROCEDURE dbo.QueryDocuments
AS
SET CONCAT_NULL_YIELDS_NULL OFF
declare @.namespacedef varchar(512)
declare @.rowpattern varchar(128)
declare @.id uniqueidentifier
declare @.doctype varchar(50)
declare @.docinfo varchar(128)
declare @.docversion varchar(3)
declare @.tstamp datetime
declare @.lastchange varchar(50)
IF OBJECT_ID('tempdb..#rs') IS NOT NULL
DROP TABLE #rs
create table #rs (id uniqueidentifier, doctype
varchar(50), docinfo varchar(128), docversion
varchar(10), tstamp datetime, lastchange varchar(50),
cliente varchar(128), estacao varchar(128), tecnico
varchar(256), inicio datetime)
declare crs cursor for
select Documentos.id,
Documentos.doctype,
DocumentTypes.docinfo,
Documentos.docversion,
Documentos.tstamp,
Documentos.lastchange
from
Documentos
inner join
DocumentTypes on Documentos.doctype = DocumentTypes.doctype and
Documentos.docversion = DocumentTypes.docversion
declare @.cmd varchar(8000)
declare @.xml_0 varchar( 8000 ), @.xml_1 varchar( 8000
)
open crs
fetch next from crs into @.id, @.doctype, @.docinfo, @.docversion,
@.tstamp, @.lastchange
while @.@.fetch_status = 0
begin
declare @.idoc int
set @.namespacedef = (select
substring(convert(varchar(8000), xmldata),
patindex('%<my:%', xmldata), patindex('%>%',
substring(xmldata, patindex('%<my:%', xmldata),
500))-1) from Documentos where [id] = @.id) +
'/>'
set @.rowpattern = '/' + substring(@.namespacedef, 2,
charindex(' ', @.namespacedef)-1)
select @.xml_0 = replace(substring( xmldata, ( 0*7000
) + 1, 7000 ), char(39),
char(39)+char(39) ),
@.xml_1 = replace(substring( xmldata, ( 1*7000
) + 1, 7000 ), char(39),
char(39)+char(39) )
from Documentos where [id] = @.id
exec ('declare @.idoc int;exec sp_xml_preparedocument @.idoc
OUTPUT, ''' + @.xml_0 + @.xml_1 /*+ @.xml_2 + @.xml_3 + @.xml_4 + @.xml_5*/
+ ''', ''' + @.namespacedef + ''';declare he_cur cursor forselect
@.idoc')
open he_cur
fetch he_cur into @.idoc
deallocate he_cur
insert into #rs
select
@.id,
@.doctype,
@.docinfo,
@.docversion,
@.tstamp,
@.lastchange,
*
from openxml(@.idoc, @.rowpattern, 2)
with (
cliente varchar(128) 'my:Cliente',
estacao varchar(128) 'my:Estacao',
tecnico varchar(256) 'my:Tecnico',
inicio datetime 'my:Inicio'
)
exec sp_xml_removedocument @.idoc
fetch next from crs into @.id, @.doctype, @.docinfo, @.docversion,
@.tstamp, @.lastchange
end
close crs
deallocate crs
select *
from #rs
order by
year(inicio) asc,
month(inicio) asc,
day(inicio) asc
drop table #rs
GO
Thanks for your help.Moving to SQL Server 2005 could help, since you would not need to do the
string manipulations.
Do you know where you use the time? In the string ops, XML parser or OpenXML
calls?
Also, could you just pass the @.xml_* for the namespace declarations? Or use
a constant namespace declaration?
Best regards
Michael
"mgcm" <miltonmoura@.gmail-dot-com.no-spam.invalid> wrote in message
news:CsWdneQdj-phirveRVn_vA@.giganews.com...
>I am using a text column to store XML data on a sql server table.
> The data contained in these XML documents maps to a few different
> schemas but they share a subset of tags which I use to query with
> sp_xml_preparedocument and openxml.
> The problem I'm having is that this process is very slow: for ~2500
> xml documents, it takes 12 seconds to run.
> Is there a way to query these XML documents faster using SQL Server?
> Or I will have to reduce my working set of records?
> Here is the code I've written:
>
> CREATE PROCEDURE dbo.QueryDocuments
> AS
> SET CONCAT_NULL_YIELDS_NULL OFF
> declare @.namespacedef varchar(512)
> declare @.rowpattern varchar(128)
> declare @.id uniqueidentifier
> declare @.doctype varchar(50)
> declare @.docinfo varchar(128)
> declare @.docversion varchar(3)
> declare @.tstamp datetime
> declare @.lastchange varchar(50)
> IF OBJECT_ID('tempdb..#rs') IS NOT NULL
> DROP TABLE #rs
> create table #rs (id uniqueidentifier, doctype
> varchar(50), docinfo varchar(128), docversion
> varchar(10), tstamp datetime, lastchange varchar(50),
> cliente varchar(128), estacao varchar(128), tecnico
> varchar(256), inicio datetime)
> declare crs cursor for
> select Documentos.id,
> Documentos.doctype,
> DocumentTypes.docinfo,
> Documentos.docversion,
> Documentos.tstamp,
> Documentos.lastchange
> from
> Documentos
> inner join
> DocumentTypes on Documentos.doctype = DocumentTypes.doctype and
> Documentos.docversion = DocumentTypes.docversion
> declare @.cmd varchar(8000)
> declare @.xml_0 varchar( 8000 ), @.xml_1 varchar( 8000
> )
> open crs
> fetch next from crs into @.id, @.doctype, @.docinfo, @.docversion,
> @.tstamp, @.lastchange
> while @.@.fetch_status = 0
> begin
> declare @.idoc int
> set @.namespacedef = (select
> substring(convert(varchar(8000), xmldata),
> patindex('%<my:%', xmldata), patindex('%>%',
> substring(xmldata, patindex('%<my:%', xmldata),
> 500))-1) from Documentos where [id] = @.id) +
> '/>'
> set @.rowpattern = '/' + substring(@.namespacedef, 2,
> charindex(' ', @.namespacedef)-1)
> select @.xml_0 = replace(substring( xmldata, ( 0*7000
> ) + 1, 7000 ), char(39),
> char(39)+char(39) ),
> @.xml_1 = replace(substring( xmldata, ( 1*7000
> ) + 1, 7000 ), char(39),
> char(39)+char(39) )
> from Documentos where [id] = @.id
> exec ('declare @.idoc int;exec sp_xml_preparedocument @.idoc
> OUTPUT, ''' + @.xml_0 + @.xml_1 /*+ @.xml_2 + @.xml_3 + @.xml_4 + @.xml_5*/
> + ''', ''' + @.namespacedef + ''';declare he_cur cursor forselect
> @.idoc')
> open he_cur
> fetch he_cur into @.idoc
> deallocate he_cur
> insert into #rs
> select
> @.id,
> @.doctype,
> @.docinfo,
> @.docversion,
> @.tstamp,
> @.lastchange,
> *
> from openxml(@.idoc, @.rowpattern, 2)
> with (
> cliente varchar(128) 'my:Cliente',
> estacao varchar(128) 'my:Estacao',
> tecnico varchar(256) 'my:Tecnico',
> inicio datetime 'my:Inicio'
> )
> exec sp_xml_removedocument @.idoc
> fetch next from crs into @.id, @.doctype, @.docinfo, @.docversion,
> @.tstamp, @.lastchange
> end
> close crs
> deallocate crs
> select *
> from #rs
> order by
> year(inicio) asc,
> month(inicio) asc,
> day(inicio) asc
> drop table #rs
> GO
> Thanks for your help.
>

No comments:

Post a Comment